kern_mbuf.c revision 174292
1129906Sbmilekic/*- 2141991Sbmilekic * Copyright (c) 2004, 2005, 3141991Sbmilekic * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4129906Sbmilekic * 5129906Sbmilekic * Redistribution and use in source and binary forms, with or without 6129906Sbmilekic * modification, are permitted provided that the following conditions 7129906Sbmilekic * are met: 8129906Sbmilekic * 1. Redistributions of source code must retain the above copyright 9129906Sbmilekic * notice unmodified, this list of conditions and the following 10129906Sbmilekic * disclaimer. 11129906Sbmilekic * 2. Redistributions in binary form must reproduce the above copyright 12129906Sbmilekic * notice, this list of conditions and the following disclaimer in the 13129906Sbmilekic * documentation and/or other materials provided with the distribution. 14129906Sbmilekic * 15129906Sbmilekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16129906Sbmilekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17129906Sbmilekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18129906Sbmilekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19129906Sbmilekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20129906Sbmilekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21129906Sbmilekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22129906Sbmilekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23129906Sbmilekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24129906Sbmilekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25129906Sbmilekic * SUCH DAMAGE. 26129906Sbmilekic */ 27129906Sbmilekic 28129906Sbmilekic#include <sys/cdefs.h> 29129906Sbmilekic__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 174292 2007-12-05 15:29:44Z rrs $"); 30129906Sbmilekic 31129906Sbmilekic#include "opt_mac.h" 32129906Sbmilekic#include "opt_param.h" 33129906Sbmilekic 34129906Sbmilekic#include <sys/param.h> 35129906Sbmilekic#include <sys/malloc.h> 36129906Sbmilekic#include <sys/systm.h> 37129906Sbmilekic#include <sys/mbuf.h> 38129906Sbmilekic#include <sys/domain.h> 39129906Sbmilekic#include <sys/eventhandler.h> 40129906Sbmilekic#include <sys/kernel.h> 41129906Sbmilekic#include <sys/protosw.h> 42129906Sbmilekic#include <sys/smp.h> 43129906Sbmilekic#include <sys/sysctl.h> 44129906Sbmilekic 45163606Srwatson#include <security/mac/mac_framework.h> 46163606Srwatson 47129906Sbmilekic#include <vm/vm.h> 48129906Sbmilekic#include <vm/vm_page.h> 49129906Sbmilekic#include <vm/uma.h> 50147537Ssilby#include <vm/uma_int.h> 51147537Ssilby#include <vm/uma_dbg.h> 52129906Sbmilekic 53129906Sbmilekic/* 54129906Sbmilekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 55129906Sbmilekic * Zones. 56129906Sbmilekic * 57129906Sbmilekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 58129906Sbmilekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 59129906Sbmilekic * administrator so desires. 60129906Sbmilekic * 61129906Sbmilekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 62129906Sbmilekic * Zone. 63129906Sbmilekic * 64129906Sbmilekic * Additionally, FreeBSD provides a Packet Zone, which it 65129906Sbmilekic * configures as a Secondary Zone to the Mbuf Master Zone, 66129906Sbmilekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 67129906Sbmilekic * 68129906Sbmilekic * Thus common-case allocations and locking are simplified: 69129906Sbmilekic * 70129906Sbmilekic * m_clget() m_getcl() 71129906Sbmilekic * | | 72129906Sbmilekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 73129906Sbmilekic * | | [ Packet ] | 74129906Sbmilekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 75129906Sbmilekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 76129906Sbmilekic * | \________ | 77129906Sbmilekic * [ Cluster Keg ] \ / 78156023Sglebius * | [ Mbuf Keg ] 79129906Sbmilekic * [ Cluster Slabs ] | 80129906Sbmilekic * | [ Mbuf Slabs ] 81129906Sbmilekic * \____________(VM)_________________/ 82151976Sandre * 83151976Sandre * 84156023Sglebius * Whenever an object is allocated with uma_zalloc() out of 85151976Sandre * one of the Zones its _ctor_ function is executed. The same 86156023Sglebius * for any deallocation through uma_zfree() the _dtor_ function 87151976Sandre * is executed. 88156023Sglebius * 89151976Sandre * Caches are per-CPU and are filled from the Master Zone. 90151976Sandre * 91156023Sglebius * Whenever an object is allocated from the underlying global 92151976Sandre * memory pool it gets pre-initialized with the _zinit_ functions. 93151976Sandre * When the Keg's are overfull objects get decomissioned with 94151976Sandre * _zfini_ functions and free'd back to the global memory pool. 95151976Sandre * 96129906Sbmilekic */ 97129906Sbmilekic 98151976Sandreint nmbclusters; /* limits number of mbuf clusters */ 99155780Sandreint nmbjumbop; /* limits number of page size jumbo clusters */ 100151976Sandreint nmbjumbo9; /* limits number of 9k jumbo clusters */ 101151976Sandreint nmbjumbo16; /* limits number of 16k jumbo clusters */ 102129906Sbmilekicstruct mbstat mbstat; 103129906Sbmilekic 104129906Sbmilekicstatic void 105129906Sbmilekictunable_mbinit(void *dummy) 106129906Sbmilekic{ 107129906Sbmilekic 108129906Sbmilekic /* This has to be done before VM init. */ 109174292Srrs nmbclusters = 1024 + maxusers * 64; 110174292Srrs nmbjumbop = nmbclusters / 2; 111174292Srrs nmbjumbo9 = nmbjumbop / 2; 112174292Srrs nmbjumbo16 = nmbjumbo9 / 2; 113129906Sbmilekic TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 114129906Sbmilekic} 115129906SbmilekicSYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); 116129906Sbmilekic 117151976Sandre/* XXX: These should be tuneables. Can't change UMA limits on the fly. */ 118157927Spsstatic int 119157927Spssysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 120157927Sps{ 121157927Sps int error, newnmbclusters; 122157927Sps 123157927Sps newnmbclusters = nmbclusters; 124170289Sdwmalone error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 125157927Sps if (error == 0 && req->newptr) { 126157927Sps if (newnmbclusters > nmbclusters) { 127157927Sps nmbclusters = newnmbclusters; 128157927Sps uma_zone_set_max(zone_clust, nmbclusters); 129157927Sps EVENTHANDLER_INVOKE(nmbclusters_change); 130157927Sps } else 131157927Sps error = EINVAL; 132157927Sps } 133157927Sps return (error); 134157927Sps} 135157927SpsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 136157927Sps&nmbclusters, 0, sysctl_nmbclusters, "IU", 137129906Sbmilekic "Maximum number of mbuf clusters allowed"); 138174292Srrs 139174292Srrsstatic int 140174292Srrssysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 141174292Srrs{ 142174292Srrs int error, newnmbjumbop; 143174292Srrs 144174292Srrs newnmbjumbop = nmbjumbop; 145174292Srrs error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 146174292Srrs if (error == 0 && req->newptr) { 147174292Srrs if (newnmbjumbop> nmbjumbop) { 148174292Srrs nmbjumbop = newnmbjumbop; 149174292Srrs uma_zone_set_max(zone_jumbop, nmbjumbop); 150174292Srrs } else 151174292Srrs error = EINVAL; 152174292Srrs } 153174292Srrs return (error); 154174292Srrs} 155174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 156174292Srrs&nmbjumbop, 0, sysctl_nmbjumbop, "IU", 157174292Srrs "Maximum number of mbuf page size jumbo clusters allowed"); 158174292Srrs 159174292Srrs 160174292Srrsstatic int 161174292Srrssysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 162174292Srrs{ 163174292Srrs int error, newnmbjumbo9; 164174292Srrs 165174292Srrs newnmbjumbo9 = nmbjumbo9; 166174292Srrs error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 167174292Srrs if (error == 0 && req->newptr) { 168174292Srrs if (newnmbjumbo9> nmbjumbo9) { 169174292Srrs nmbjumbo9 = newnmbjumbo9; 170174292Srrs uma_zone_set_max(zone_jumbo9, nmbjumbo9); 171174292Srrs } else 172174292Srrs error = EINVAL; 173174292Srrs } 174174292Srrs return (error); 175174292Srrs} 176174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 177174292Srrs&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 178174292Srrs "Maximum number of mbuf 9k jumbo clusters allowed"); 179174292Srrs 180174292Srrsstatic int 181174292Srrssysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 182174292Srrs{ 183174292Srrs int error, newnmbjumbo16; 184174292Srrs 185174292Srrs newnmbjumbo16 = nmbjumbo16; 186174292Srrs error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 187174292Srrs if (error == 0 && req->newptr) { 188174292Srrs if (newnmbjumbo16> nmbjumbo16) { 189174292Srrs nmbjumbo16 = newnmbjumbo16; 190174292Srrs uma_zone_set_max(zone_jumbo16, nmbjumbo16); 191174292Srrs } else 192174292Srrs error = EINVAL; 193174292Srrs } 194174292Srrs return (error); 195174292Srrs} 196174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 197174292Srrs&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 198151976Sandre "Maximum number of mbuf 16k jumbo clusters allowed"); 199174292Srrs 200174292Srrs 201174292Srrs 202129906SbmilekicSYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, 203129906Sbmilekic "Mbuf general information and statistics"); 204129906Sbmilekic 205129906Sbmilekic/* 206129906Sbmilekic * Zones from which we allocate. 207129906Sbmilekic */ 208129906Sbmilekicuma_zone_t zone_mbuf; 209129906Sbmilekicuma_zone_t zone_clust; 210129906Sbmilekicuma_zone_t zone_pack; 211155780Sandreuma_zone_t zone_jumbop; 212151976Sandreuma_zone_t zone_jumbo9; 213151976Sandreuma_zone_t zone_jumbo16; 214151976Sandreuma_zone_t zone_ext_refcnt; 215129906Sbmilekic 216129906Sbmilekic/* 217129906Sbmilekic * Local prototypes. 218129906Sbmilekic */ 219132987Sgreenstatic int mb_ctor_mbuf(void *, int, void *, int); 220132987Sgreenstatic int mb_ctor_clust(void *, int, void *, int); 221132987Sgreenstatic int mb_ctor_pack(void *, int, void *, int); 222129906Sbmilekicstatic void mb_dtor_mbuf(void *, int, void *); 223151976Sandrestatic void mb_dtor_clust(void *, int, void *); 224151976Sandrestatic void mb_dtor_pack(void *, int, void *); 225151976Sandrestatic int mb_zinit_pack(void *, int, int); 226151976Sandrestatic void mb_zfini_pack(void *, int); 227129906Sbmilekic 228129906Sbmilekicstatic void mb_reclaim(void *); 229129906Sbmilekicstatic void mbuf_init(void *); 230174247Salcstatic void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); 231174247Salcstatic void mbuf_jumbo_free(void *, int, u_int8_t); 232129906Sbmilekic 233174247Salcstatic MALLOC_DEFINE(M_JUMBOFRAME, "jumboframes", "mbuf jumbo frame buffers"); 234174247Salc 235135510Sbrian/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ 236135510SbrianCTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 237135510Sbrian 238129906Sbmilekic/* 239129906Sbmilekic * Initialize FreeBSD Network buffer allocation. 240129906Sbmilekic */ 241129906SbmilekicSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL) 242129906Sbmilekicstatic void 243129906Sbmilekicmbuf_init(void *dummy) 244129906Sbmilekic{ 245129906Sbmilekic 246129906Sbmilekic /* 247129906Sbmilekic * Configure UMA zones for Mbufs, Clusters, and Packets. 248129906Sbmilekic */ 249151976Sandre zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 250151976Sandre mb_ctor_mbuf, mb_dtor_mbuf, 251147537Ssilby#ifdef INVARIANTS 252151976Sandre trash_init, trash_fini, 253147537Ssilby#else 254151976Sandre NULL, NULL, 255147537Ssilby#endif 256151976Sandre MSIZE - 1, UMA_ZONE_MAXBUCKET); 257151976Sandre 258148095Srwatson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 259151976Sandre mb_ctor_clust, mb_dtor_clust, 260147537Ssilby#ifdef INVARIANTS 261151976Sandre trash_init, trash_fini, 262147537Ssilby#else 263151976Sandre NULL, NULL, 264147537Ssilby#endif 265151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 266129906Sbmilekic if (nmbclusters > 0) 267129906Sbmilekic uma_zone_set_max(zone_clust, nmbclusters); 268151976Sandre 269148095Srwatson zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 270151976Sandre mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 271129906Sbmilekic 272156023Sglebius /* Make jumbo frame zone too. Page size, 9k and 16k. */ 273155780Sandre zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 274153232Sandre mb_ctor_clust, mb_dtor_clust, 275153232Sandre#ifdef INVARIANTS 276153232Sandre trash_init, trash_fini, 277153232Sandre#else 278153232Sandre NULL, NULL, 279153232Sandre#endif 280153232Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 281155780Sandre if (nmbjumbop > 0) 282155780Sandre uma_zone_set_max(zone_jumbop, nmbjumbop); 283153232Sandre 284151976Sandre zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 285151976Sandre mb_ctor_clust, mb_dtor_clust, 286151976Sandre#ifdef INVARIANTS 287151976Sandre trash_init, trash_fini, 288151976Sandre#else 289151976Sandre NULL, NULL, 290151976Sandre#endif 291151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 292151976Sandre if (nmbjumbo9 > 0) 293151976Sandre uma_zone_set_max(zone_jumbo9, nmbjumbo9); 294174247Salc uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 295174247Salc uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free); 296129906Sbmilekic 297151976Sandre zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 298151976Sandre mb_ctor_clust, mb_dtor_clust, 299151976Sandre#ifdef INVARIANTS 300151976Sandre trash_init, trash_fini, 301151976Sandre#else 302151976Sandre NULL, NULL, 303151976Sandre#endif 304151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 305151976Sandre if (nmbjumbo16 > 0) 306151976Sandre uma_zone_set_max(zone_jumbo16, nmbjumbo16); 307174247Salc uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 308174247Salc uma_zone_set_freef(zone_jumbo16, mbuf_jumbo_free); 309151976Sandre 310151976Sandre zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 311151976Sandre NULL, NULL, 312151976Sandre NULL, NULL, 313151976Sandre UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 314151976Sandre 315151976Sandre /* uma_prealloc() goes here... */ 316151976Sandre 317129906Sbmilekic /* 318129906Sbmilekic * Hook event handler for low-memory situation, used to 319129906Sbmilekic * drain protocols and push data back to the caches (UMA 320129906Sbmilekic * later pushes it back to VM). 321129906Sbmilekic */ 322129906Sbmilekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 323129906Sbmilekic EVENTHANDLER_PRI_FIRST); 324129906Sbmilekic 325129906Sbmilekic /* 326129906Sbmilekic * [Re]set counters and local statistics knobs. 327129906Sbmilekic * XXX Some of these should go and be replaced, but UMA stat 328129906Sbmilekic * gathering needs to be revised. 329129906Sbmilekic */ 330129906Sbmilekic mbstat.m_mbufs = 0; 331129906Sbmilekic mbstat.m_mclusts = 0; 332129906Sbmilekic mbstat.m_drain = 0; 333129906Sbmilekic mbstat.m_msize = MSIZE; 334129906Sbmilekic mbstat.m_mclbytes = MCLBYTES; 335129906Sbmilekic mbstat.m_minclsize = MINCLSIZE; 336129906Sbmilekic mbstat.m_mlen = MLEN; 337129906Sbmilekic mbstat.m_mhlen = MHLEN; 338129906Sbmilekic mbstat.m_numtypes = MT_NTYPES; 339129906Sbmilekic 340129906Sbmilekic mbstat.m_mcfail = mbstat.m_mpfail = 0; 341129906Sbmilekic mbstat.sf_iocnt = 0; 342129906Sbmilekic mbstat.sf_allocwait = mbstat.sf_allocfail = 0; 343129906Sbmilekic} 344129906Sbmilekic 345129906Sbmilekic/* 346174247Salc * UMA backend page allocator for the jumbo frame zones. 347174247Salc * 348174247Salc * Allocates kernel virtual memory that is backed by contiguous physical 349174247Salc * pages. 350174247Salc */ 351174247Salcstatic void * 352174247Salcmbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 353174247Salc{ 354174247Salc 355174247Salc *flags = UMA_SLAB_PRIV; 356174247Salc return (contigmalloc(bytes, M_JUMBOFRAME, wait, (vm_paddr_t)0, 357174247Salc ~(vm_paddr_t)0, 1, 0)); 358174247Salc} 359174247Salc 360174247Salc/* 361174247Salc * UMA backend page deallocator for the jumbo frame zones. 362174247Salc */ 363174247Salcstatic void 364174247Salcmbuf_jumbo_free(void *mem, int size, u_int8_t flags) 365174247Salc{ 366174247Salc 367174247Salc contigfree(mem, size, M_JUMBOFRAME); 368174247Salc} 369174247Salc 370174247Salc/* 371129906Sbmilekic * Constructor for Mbuf master zone. 372129906Sbmilekic * 373129906Sbmilekic * The 'arg' pointer points to a mb_args structure which 374129906Sbmilekic * contains call-specific information required to support the 375151976Sandre * mbuf allocation API. See mbuf.h. 376129906Sbmilekic */ 377132987Sgreenstatic int 378132987Sgreenmb_ctor_mbuf(void *mem, int size, void *arg, int how) 379129906Sbmilekic{ 380129906Sbmilekic struct mbuf *m; 381129906Sbmilekic struct mb_args *args; 382132987Sgreen#ifdef MAC 383132987Sgreen int error; 384132987Sgreen#endif 385129906Sbmilekic int flags; 386129906Sbmilekic short type; 387129906Sbmilekic 388147537Ssilby#ifdef INVARIANTS 389147537Ssilby trash_ctor(mem, size, arg, how); 390147537Ssilby#endif 391129906Sbmilekic m = (struct mbuf *)mem; 392129906Sbmilekic args = (struct mb_args *)arg; 393129906Sbmilekic flags = args->flags; 394129906Sbmilekic type = args->type; 395129906Sbmilekic 396151976Sandre /* 397151976Sandre * The mbuf is initialized later. The caller has the 398156059Sglebius * responsibility to set up any MAC labels too. 399151976Sandre */ 400151976Sandre if (type == MT_NOINIT) 401151976Sandre return (0); 402151976Sandre 403129906Sbmilekic m->m_next = NULL; 404129906Sbmilekic m->m_nextpkt = NULL; 405151976Sandre m->m_len = 0; 406129947Sbmilekic m->m_flags = flags; 407151976Sandre m->m_type = type; 408129906Sbmilekic if (flags & M_PKTHDR) { 409129906Sbmilekic m->m_data = m->m_pktdat; 410129906Sbmilekic m->m_pkthdr.rcvif = NULL; 411151976Sandre m->m_pkthdr.len = 0; 412151976Sandre m->m_pkthdr.header = NULL; 413129906Sbmilekic m->m_pkthdr.csum_flags = 0; 414151976Sandre m->m_pkthdr.csum_data = 0; 415162377Sandre m->m_pkthdr.tso_segsz = 0; 416162377Sandre m->m_pkthdr.ether_vtag = 0; 417129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 418129906Sbmilekic#ifdef MAC 419129906Sbmilekic /* If the label init fails, fail the alloc */ 420172930Srwatson error = mac_mbuf_init(m, how); 421132987Sgreen if (error) 422132987Sgreen return (error); 423129906Sbmilekic#endif 424129947Sbmilekic } else 425129906Sbmilekic m->m_data = m->m_dat; 426132987Sgreen return (0); 427129906Sbmilekic} 428129906Sbmilekic 429129906Sbmilekic/* 430151976Sandre * The Mbuf master zone destructor. 431129906Sbmilekic */ 432129906Sbmilekicstatic void 433129906Sbmilekicmb_dtor_mbuf(void *mem, int size, void *arg) 434129906Sbmilekic{ 435129906Sbmilekic struct mbuf *m; 436172462Skmacy unsigned long flags; 437129906Sbmilekic 438129906Sbmilekic m = (struct mbuf *)mem; 439172462Skmacy flags = (unsigned long)arg; 440173029Sobrien 441172462Skmacy if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) 442129906Sbmilekic m_tag_delete_chain(m, NULL); 443151976Sandre KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); 444173029Sobrien KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 445147537Ssilby#ifdef INVARIANTS 446147537Ssilby trash_dtor(mem, size, arg); 447147537Ssilby#endif 448129906Sbmilekic} 449129906Sbmilekic 450151976Sandre/* 451151976Sandre * The Mbuf Packet zone destructor. 452151976Sandre */ 453129906Sbmilekicstatic void 454129906Sbmilekicmb_dtor_pack(void *mem, int size, void *arg) 455129906Sbmilekic{ 456129906Sbmilekic struct mbuf *m; 457129906Sbmilekic 458129906Sbmilekic m = (struct mbuf *)mem; 459129906Sbmilekic if ((m->m_flags & M_PKTHDR) != 0) 460129906Sbmilekic m_tag_delete_chain(m, NULL); 461151976Sandre 462151976Sandre /* Make sure we've got a clean cluster back. */ 463151976Sandre KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 464151976Sandre KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 465151976Sandre KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 466151976Sandre KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__)); 467151976Sandre KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 468152130Sglebius KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 469151976Sandre KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); 470147537Ssilby#ifdef INVARIANTS 471147537Ssilby trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 472147537Ssilby#endif 473166213Smohans /* 474173029Sobrien * If there are processes blocked on zone_clust, waiting for pages 475173029Sobrien * to be freed up, * cause them to be woken up by draining the 476173029Sobrien * packet zone. We are exposed to a race here * (in the check for 477173029Sobrien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 478173029Sobrien * is deliberate. We don't want to acquire the zone lock for every 479173029Sobrien * mbuf free. 480166213Smohans */ 481173029Sobrien if (uma_zone_exhausted_nolock(zone_clust)) 482173029Sobrien zone_drain(zone_pack); 483129906Sbmilekic} 484129906Sbmilekic 485129906Sbmilekic/* 486155780Sandre * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 487129906Sbmilekic * 488129906Sbmilekic * Here the 'arg' pointer points to the Mbuf which we 489151976Sandre * are configuring cluster storage for. If 'arg' is 490151976Sandre * empty we allocate just the cluster without setting 491151976Sandre * the mbuf to it. See mbuf.h. 492129906Sbmilekic */ 493132987Sgreenstatic int 494132987Sgreenmb_ctor_clust(void *mem, int size, void *arg, int how) 495129906Sbmilekic{ 496129906Sbmilekic struct mbuf *m; 497151976Sandre u_int *refcnt; 498168374Skmacy int type; 499168374Skmacy uma_zone_t zone; 500173029Sobrien 501147537Ssilby#ifdef INVARIANTS 502147537Ssilby trash_ctor(mem, size, arg, how); 503147537Ssilby#endif 504168374Skmacy switch (size) { 505168374Skmacy case MCLBYTES: 506168374Skmacy type = EXT_CLUSTER; 507168374Skmacy zone = zone_clust; 508168374Skmacy break; 509168374Skmacy#if MJUMPAGESIZE != MCLBYTES 510168374Skmacy case MJUMPAGESIZE: 511168374Skmacy type = EXT_JUMBOP; 512168374Skmacy zone = zone_jumbop; 513168374Skmacy break; 514168374Skmacy#endif 515168374Skmacy case MJUM9BYTES: 516168374Skmacy type = EXT_JUMBO9; 517168374Skmacy zone = zone_jumbo9; 518168374Skmacy break; 519168374Skmacy case MJUM16BYTES: 520168374Skmacy type = EXT_JUMBO16; 521168374Skmacy zone = zone_jumbo16; 522168374Skmacy break; 523168374Skmacy default: 524168374Skmacy panic("unknown cluster size"); 525168374Skmacy break; 526168374Skmacy } 527168374Skmacy 528129906Sbmilekic m = (struct mbuf *)arg; 529168374Skmacy refcnt = uma_find_refcnt(zone, mem); 530173029Sobrien *refcnt = 1; 531151976Sandre if (m != NULL) { 532151976Sandre m->m_ext.ext_buf = (caddr_t)mem; 533151976Sandre m->m_data = m->m_ext.ext_buf; 534151976Sandre m->m_flags |= M_EXT; 535151976Sandre m->m_ext.ext_free = NULL; 536151976Sandre m->m_ext.ext_args = NULL; 537151976Sandre m->m_ext.ext_size = size; 538151976Sandre m->m_ext.ext_type = type; 539168374Skmacy m->m_ext.ref_cnt = refcnt; 540151976Sandre } 541168374Skmacy 542132987Sgreen return (0); 543129906Sbmilekic} 544129906Sbmilekic 545151976Sandre/* 546151976Sandre * The Mbuf Cluster zone destructor. 547151976Sandre */ 548129906Sbmilekicstatic void 549129906Sbmilekicmb_dtor_clust(void *mem, int size, void *arg) 550129906Sbmilekic{ 551168374Skmacy#ifdef INVARIANTS 552168374Skmacy uma_zone_t zone; 553151976Sandre 554168374Skmacy zone = m_getzone(size); 555168374Skmacy KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 556152035Sandre ("%s: refcnt incorrect %u", __func__, 557168374Skmacy *(uma_find_refcnt(zone, mem))) ); 558168374Skmacy 559147537Ssilby trash_dtor(mem, size, arg); 560147537Ssilby#endif 561129906Sbmilekic} 562129906Sbmilekic 563129906Sbmilekic/* 564129906Sbmilekic * The Packet secondary zone's init routine, executed on the 565151976Sandre * object's transition from mbuf keg slab to zone cache. 566129906Sbmilekic */ 567132987Sgreenstatic int 568151976Sandremb_zinit_pack(void *mem, int size, int how) 569129906Sbmilekic{ 570129906Sbmilekic struct mbuf *m; 571129906Sbmilekic 572151976Sandre m = (struct mbuf *)mem; /* m is virgin. */ 573156428Sandre if (uma_zalloc_arg(zone_clust, m, how) == NULL || 574156428Sandre m->m_ext.ext_buf == NULL) 575132987Sgreen return (ENOMEM); 576152101Sandre m->m_ext.ext_type = EXT_PACKET; /* Override. */ 577147537Ssilby#ifdef INVARIANTS 578147537Ssilby trash_init(m->m_ext.ext_buf, MCLBYTES, how); 579147537Ssilby#endif 580132987Sgreen return (0); 581129906Sbmilekic} 582129906Sbmilekic 583129906Sbmilekic/* 584129906Sbmilekic * The Packet secondary zone's fini routine, executed on the 585129906Sbmilekic * object's transition from zone cache to keg slab. 586129906Sbmilekic */ 587129906Sbmilekicstatic void 588151976Sandremb_zfini_pack(void *mem, int size) 589129906Sbmilekic{ 590129906Sbmilekic struct mbuf *m; 591129906Sbmilekic 592129906Sbmilekic m = (struct mbuf *)mem; 593147537Ssilby#ifdef INVARIANTS 594147537Ssilby trash_fini(m->m_ext.ext_buf, MCLBYTES); 595147537Ssilby#endif 596129906Sbmilekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 597147652Ssilby#ifdef INVARIANTS 598147652Ssilby trash_dtor(mem, size, NULL); 599147652Ssilby#endif 600129906Sbmilekic} 601129906Sbmilekic 602129906Sbmilekic/* 603129906Sbmilekic * The "packet" keg constructor. 604129906Sbmilekic */ 605132987Sgreenstatic int 606132987Sgreenmb_ctor_pack(void *mem, int size, void *arg, int how) 607129906Sbmilekic{ 608129906Sbmilekic struct mbuf *m; 609129906Sbmilekic struct mb_args *args; 610132987Sgreen#ifdef MAC 611132987Sgreen int error; 612132987Sgreen#endif 613132987Sgreen int flags; 614129906Sbmilekic short type; 615129906Sbmilekic 616129906Sbmilekic m = (struct mbuf *)mem; 617129906Sbmilekic args = (struct mb_args *)arg; 618129906Sbmilekic flags = args->flags; 619129906Sbmilekic type = args->type; 620129906Sbmilekic 621147537Ssilby#ifdef INVARIANTS 622147537Ssilby trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 623147537Ssilby#endif 624129906Sbmilekic m->m_next = NULL; 625129947Sbmilekic m->m_nextpkt = NULL; 626129906Sbmilekic m->m_data = m->m_ext.ext_buf; 627151976Sandre m->m_len = 0; 628151976Sandre m->m_flags = (flags | M_EXT); 629151976Sandre m->m_type = type; 630173029Sobrien 631129906Sbmilekic if (flags & M_PKTHDR) { 632129906Sbmilekic m->m_pkthdr.rcvif = NULL; 633151976Sandre m->m_pkthdr.len = 0; 634151976Sandre m->m_pkthdr.header = NULL; 635129906Sbmilekic m->m_pkthdr.csum_flags = 0; 636151976Sandre m->m_pkthdr.csum_data = 0; 637162377Sandre m->m_pkthdr.tso_segsz = 0; 638162377Sandre m->m_pkthdr.ether_vtag = 0; 639129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 640129906Sbmilekic#ifdef MAC 641129906Sbmilekic /* If the label init fails, fail the alloc */ 642172930Srwatson error = mac_mbuf_init(m, how); 643132987Sgreen if (error) 644132987Sgreen return (error); 645129906Sbmilekic#endif 646129906Sbmilekic } 647151976Sandre /* m_ext is already initialized. */ 648151976Sandre 649132987Sgreen return (0); 650129906Sbmilekic} 651129906Sbmilekic 652129906Sbmilekic/* 653129906Sbmilekic * This is the protocol drain routine. 654129906Sbmilekic * 655129906Sbmilekic * No locks should be held when this is called. The drain routines have to 656129906Sbmilekic * presently acquire some locks which raises the possibility of lock order 657129906Sbmilekic * reversal. 658129906Sbmilekic */ 659129906Sbmilekicstatic void 660129906Sbmilekicmb_reclaim(void *junk) 661129906Sbmilekic{ 662129906Sbmilekic struct domain *dp; 663129906Sbmilekic struct protosw *pr; 664129906Sbmilekic 665129906Sbmilekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 666129906Sbmilekic "mb_reclaim()"); 667129906Sbmilekic 668129906Sbmilekic for (dp = domains; dp != NULL; dp = dp->dom_next) 669129906Sbmilekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 670129906Sbmilekic if (pr->pr_drain != NULL) 671129906Sbmilekic (*pr->pr_drain)(); 672129906Sbmilekic} 673