kern_mbuf.c revision 129947
1129906Sbmilekic/*- 2129906Sbmilekic * Copyright (c) 2004 3129906Sbmilekic * Bosko Milekic <bmilekic@FreeBSD.org>. 4129906Sbmilekic * All rights reserved. 5129906Sbmilekic * 6129906Sbmilekic * Redistribution and use in source and binary forms, with or without 7129906Sbmilekic * modification, are permitted provided that the following conditions 8129906Sbmilekic * are met: 9129906Sbmilekic * 1. Redistributions of source code must retain the above copyright 10129906Sbmilekic * notice unmodified, this list of conditions and the following 11129906Sbmilekic * disclaimer. 12129906Sbmilekic * 2. Redistributions in binary form must reproduce the above copyright 13129906Sbmilekic * notice, this list of conditions and the following disclaimer in the 14129906Sbmilekic * documentation and/or other materials provided with the distribution. 15129906Sbmilekic * 3. Neither the name of the author nor the names of contributors may be 16129906Sbmilekic * used to endorse or promote products derived from this software 17129906Sbmilekic * without specific prior written permission. 18129906Sbmilekic * 19129906Sbmilekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20129906Sbmilekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21129906Sbmilekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22129906Sbmilekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23129906Sbmilekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24129906Sbmilekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25129906Sbmilekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26129906Sbmilekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27129906Sbmilekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28129906Sbmilekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29129906Sbmilekic * SUCH DAMAGE. 30129906Sbmilekic */ 31129906Sbmilekic 32129906Sbmilekic#include <sys/cdefs.h> 33129906Sbmilekic__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 129947 2004-06-01 16:17:10Z bmilekic $"); 34129906Sbmilekic 35129906Sbmilekic#include "opt_mac.h" 36129906Sbmilekic#include "opt_param.h" 37129906Sbmilekic 38129906Sbmilekic#include <sys/param.h> 39129906Sbmilekic#include <sys/mac.h> 40129906Sbmilekic#include <sys/malloc.h> 41129906Sbmilekic#include <sys/systm.h> 42129906Sbmilekic#include <sys/mbuf.h> 43129906Sbmilekic#include <sys/domain.h> 44129906Sbmilekic#include <sys/eventhandler.h> 45129906Sbmilekic#include <sys/kernel.h> 46129906Sbmilekic#include <sys/protosw.h> 47129906Sbmilekic#include <sys/smp.h> 48129906Sbmilekic#include <sys/sysctl.h> 49129906Sbmilekic 50129906Sbmilekic#include <vm/vm.h> 51129906Sbmilekic#include <vm/vm_page.h> 52129906Sbmilekic#include <vm/uma.h> 53129906Sbmilekic 54129906Sbmilekic/* 55129906Sbmilekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 56129906Sbmilekic * Zones. 57129906Sbmilekic * 58129906Sbmilekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 59129906Sbmilekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 60129906Sbmilekic * administrator so desires. 61129906Sbmilekic * 62129906Sbmilekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 63129906Sbmilekic * Zone. 64129906Sbmilekic * 65129906Sbmilekic * Additionally, FreeBSD provides a Packet Zone, which it 66129906Sbmilekic * configures as a Secondary Zone to the Mbuf Master Zone, 67129906Sbmilekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 68129906Sbmilekic * 69129906Sbmilekic * Thus common-case allocations and locking are simplified: 70129906Sbmilekic * 71129906Sbmilekic * m_clget() m_getcl() 72129906Sbmilekic * | | 73129906Sbmilekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 74129906Sbmilekic * | | [ Packet ] | 75129906Sbmilekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 76129906Sbmilekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 77129906Sbmilekic * | \________ | 78129906Sbmilekic * [ Cluster Keg ] \ / 79129906Sbmilekic * | [ Mbuf Keg ] 80129906Sbmilekic * [ Cluster Slabs ] | 81129906Sbmilekic * | [ Mbuf Slabs ] 82129906Sbmilekic * \____________(VM)_________________/ 83129906Sbmilekic */ 84129906Sbmilekic 85129906Sbmilekicint nmbclusters; 86129906Sbmilekicstruct mbstat mbstat; 87129906Sbmilekic 88129906Sbmilekicstatic void 89129906Sbmilekictunable_mbinit(void *dummy) 90129906Sbmilekic{ 91129906Sbmilekic 92129906Sbmilekic /* This has to be done before VM init. */ 93129906Sbmilekic nmbclusters = 1024 + maxusers * 64; 94129906Sbmilekic TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 95129906Sbmilekic} 96129906SbmilekicSYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); 97129906Sbmilekic 98129906SbmilekicSYSCTL_DECL(_kern_ipc); 99129906SbmilekicSYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0, 100129906Sbmilekic "Maximum number of mbuf clusters allowed"); 101129906SbmilekicSYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, 102129906Sbmilekic "Mbuf general information and statistics"); 103129906Sbmilekic 104129906Sbmilekic/* 105129906Sbmilekic * Zones from which we allocate. 106129906Sbmilekic */ 107129906Sbmilekicuma_zone_t zone_mbuf; 108129906Sbmilekicuma_zone_t zone_clust; 109129906Sbmilekicuma_zone_t zone_pack; 110129906Sbmilekic 111129906Sbmilekic/* 112129906Sbmilekic * Local prototypes. 113129906Sbmilekic */ 114129906Sbmilekicstatic void mb_ctor_mbuf(void *, int, void *); 115129906Sbmilekicstatic void mb_ctor_clust(void *, int, void *); 116129906Sbmilekicstatic void mb_ctor_pack(void *, int, void *); 117129906Sbmilekicstatic void mb_dtor_mbuf(void *, int, void *); 118129906Sbmilekicstatic void mb_dtor_clust(void *, int, void *); /* XXX */ 119129906Sbmilekicstatic void mb_dtor_pack(void *, int, void *); /* XXX */ 120129906Sbmilekicstatic void mb_init_pack(void *, int); 121129906Sbmilekicstatic void mb_fini_pack(void *, int); 122129906Sbmilekic 123129906Sbmilekicstatic void mb_reclaim(void *); 124129906Sbmilekicstatic void mbuf_init(void *); 125129906Sbmilekic 126129906Sbmilekic/* 127129906Sbmilekic * Initialize FreeBSD Network buffer allocation. 128129906Sbmilekic */ 129129906SbmilekicSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL) 130129906Sbmilekicstatic void 131129906Sbmilekicmbuf_init(void *dummy) 132129906Sbmilekic{ 133129906Sbmilekic 134129906Sbmilekic /* 135129906Sbmilekic * Configure UMA zones for Mbufs, Clusters, and Packets. 136129906Sbmilekic */ 137129906Sbmilekic zone_mbuf = uma_zcreate("Mbuf", MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, 138129906Sbmilekic NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); 139129906Sbmilekic zone_clust = uma_zcreate("MbufClust", MCLBYTES, mb_ctor_clust, 140129906Sbmilekic mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 141129906Sbmilekic if (nmbclusters > 0) 142129906Sbmilekic uma_zone_set_max(zone_clust, nmbclusters); 143129906Sbmilekic zone_pack = uma_zsecond_create("Packet", mb_ctor_pack, mb_dtor_pack, 144129906Sbmilekic mb_init_pack, mb_fini_pack, zone_mbuf); 145129906Sbmilekic 146129906Sbmilekic /* uma_prealloc() goes here */ 147129906Sbmilekic 148129906Sbmilekic /* 149129906Sbmilekic * Hook event handler for low-memory situation, used to 150129906Sbmilekic * drain protocols and push data back to the caches (UMA 151129906Sbmilekic * later pushes it back to VM). 152129906Sbmilekic */ 153129906Sbmilekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 154129906Sbmilekic EVENTHANDLER_PRI_FIRST); 155129906Sbmilekic 156129906Sbmilekic /* 157129906Sbmilekic * [Re]set counters and local statistics knobs. 158129906Sbmilekic * XXX Some of these should go and be replaced, but UMA stat 159129906Sbmilekic * gathering needs to be revised. 160129906Sbmilekic */ 161129906Sbmilekic mbstat.m_mbufs = 0; 162129906Sbmilekic mbstat.m_mclusts = 0; 163129906Sbmilekic mbstat.m_drain = 0; 164129906Sbmilekic mbstat.m_msize = MSIZE; 165129906Sbmilekic mbstat.m_mclbytes = MCLBYTES; 166129906Sbmilekic mbstat.m_minclsize = MINCLSIZE; 167129906Sbmilekic mbstat.m_mlen = MLEN; 168129906Sbmilekic mbstat.m_mhlen = MHLEN; 169129906Sbmilekic mbstat.m_numtypes = MT_NTYPES; 170129906Sbmilekic 171129906Sbmilekic mbstat.m_mcfail = mbstat.m_mpfail = 0; 172129906Sbmilekic mbstat.sf_iocnt = 0; 173129906Sbmilekic mbstat.sf_allocwait = mbstat.sf_allocfail = 0; 174129906Sbmilekic} 175129906Sbmilekic 176129906Sbmilekic/* 177129906Sbmilekic * Constructor for Mbuf master zone. 178129906Sbmilekic * 179129906Sbmilekic * The 'arg' pointer points to a mb_args structure which 180129906Sbmilekic * contains call-specific information required to support the 181129906Sbmilekic * mbuf allocation API. 182129906Sbmilekic */ 183129906Sbmilekicstatic void 184129906Sbmilekicmb_ctor_mbuf(void *mem, int size, void *arg) 185129906Sbmilekic{ 186129906Sbmilekic struct mbuf *m; 187129906Sbmilekic struct mb_args *args; 188129906Sbmilekic int flags; 189129906Sbmilekic int how; 190129906Sbmilekic short type; 191129906Sbmilekic 192129906Sbmilekic m = (struct mbuf *)mem; 193129906Sbmilekic args = (struct mb_args *)arg; 194129906Sbmilekic flags = args->flags; 195129906Sbmilekic how = args->how; 196129906Sbmilekic type = args->type; 197129906Sbmilekic 198129906Sbmilekic m->m_type = type; 199129906Sbmilekic m->m_next = NULL; 200129906Sbmilekic m->m_nextpkt = NULL; 201129947Sbmilekic m->m_flags = flags; 202129906Sbmilekic if (flags & M_PKTHDR) { 203129906Sbmilekic m->m_data = m->m_pktdat; 204129906Sbmilekic m->m_pkthdr.rcvif = NULL; 205129906Sbmilekic m->m_pkthdr.csum_flags = 0; 206129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 207129906Sbmilekic#ifdef MAC 208129906Sbmilekic /* If the label init fails, fail the alloc */ 209129906Sbmilekic if (mac_init_mbuf(m, how) != 0) { 210129906Sbmilekic m_free(m); 211129906Sbmilekic/* XXX*/ panic("mb_ctor_mbuf(): can't deal with failure!"); 212129906Sbmilekic/* return 0; */ 213129906Sbmilekic } 214129906Sbmilekic#endif 215129947Sbmilekic } else 216129906Sbmilekic m->m_data = m->m_dat; 217129906Sbmilekic mbstat.m_mbufs += 1; /* XXX */ 218129906Sbmilekic/* return 1; 219129906Sbmilekic*/ 220129906Sbmilekic} 221129906Sbmilekic 222129906Sbmilekic/* 223129906Sbmilekic * The Mbuf master zone and Packet secondary zone destructor. 224129906Sbmilekic */ 225129906Sbmilekicstatic void 226129906Sbmilekicmb_dtor_mbuf(void *mem, int size, void *arg) 227129906Sbmilekic{ 228129906Sbmilekic struct mbuf *m; 229129906Sbmilekic 230129906Sbmilekic m = (struct mbuf *)mem; 231129906Sbmilekic if ((m->m_flags & M_PKTHDR) != 0) 232129906Sbmilekic m_tag_delete_chain(m, NULL); 233129906Sbmilekic mbstat.m_mbufs -= 1; /* XXX */ 234129906Sbmilekic} 235129906Sbmilekic 236129906Sbmilekic/* XXX Only because of stats */ 237129906Sbmilekicstatic void 238129906Sbmilekicmb_dtor_pack(void *mem, int size, void *arg) 239129906Sbmilekic{ 240129906Sbmilekic struct mbuf *m; 241129906Sbmilekic 242129906Sbmilekic m = (struct mbuf *)mem; 243129906Sbmilekic if ((m->m_flags & M_PKTHDR) != 0) 244129906Sbmilekic m_tag_delete_chain(m, NULL); 245129906Sbmilekic mbstat.m_mbufs -= 1; /* XXX */ 246129906Sbmilekic mbstat.m_mclusts -= 1; /* XXX */ 247129906Sbmilekic} 248129906Sbmilekic 249129906Sbmilekic/* 250129906Sbmilekic * The Cluster zone constructor. 251129906Sbmilekic * 252129906Sbmilekic * Here the 'arg' pointer points to the Mbuf which we 253129906Sbmilekic * are configuring cluster storage for. 254129906Sbmilekic */ 255129906Sbmilekicstatic void 256129906Sbmilekicmb_ctor_clust(void *mem, int size, void *arg) 257129906Sbmilekic{ 258129906Sbmilekic struct mbuf *m; 259129906Sbmilekic 260129906Sbmilekic m = (struct mbuf *)arg; 261129906Sbmilekic m->m_ext.ext_buf = (caddr_t)mem; 262129906Sbmilekic m->m_data = m->m_ext.ext_buf; 263129906Sbmilekic m->m_flags |= M_EXT; 264129906Sbmilekic m->m_ext.ext_free = NULL; 265129906Sbmilekic m->m_ext.ext_args = NULL; 266129906Sbmilekic m->m_ext.ext_size = MCLBYTES; 267129906Sbmilekic m->m_ext.ext_type = EXT_CLUSTER; 268129906Sbmilekic m->m_ext.ref_cnt = (u_int *)uma_find_refcnt(zone_clust, 269129906Sbmilekic m->m_ext.ext_buf); 270129906Sbmilekic *(m->m_ext.ref_cnt) = 1; 271129906Sbmilekic mbstat.m_mclusts += 1; /* XXX */ 272129906Sbmilekic/* return 1; 273129906Sbmilekic*/ 274129906Sbmilekic} 275129906Sbmilekic 276129906Sbmilekic/* XXX */ 277129906Sbmilekicstatic void 278129906Sbmilekicmb_dtor_clust(void *mem, int size, void *arg) 279129906Sbmilekic{ 280129906Sbmilekic mbstat.m_mclusts -= 1; /* XXX */ 281129906Sbmilekic} 282129906Sbmilekic 283129906Sbmilekic/* 284129906Sbmilekic * The Packet secondary zone's init routine, executed on the 285129906Sbmilekic * object's transition from keg slab to zone cache. 286129906Sbmilekic */ 287129906Sbmilekicstatic void 288129906Sbmilekicmb_init_pack(void *mem, int size) 289129906Sbmilekic{ 290129906Sbmilekic struct mbuf *m; 291129906Sbmilekic 292129906Sbmilekic m = (struct mbuf *)mem; 293129906Sbmilekic m->m_ext.ext_buf = NULL; 294129906Sbmilekic uma_zalloc_arg(zone_clust, m, M_NOWAIT); 295129906Sbmilekic if (m->m_ext.ext_buf == NULL) /* XXX */ 296129906Sbmilekic panic("mb_init_pack(): Can't deal with failure yet."); 297129906Sbmilekic mbstat.m_mclusts -= 1; /* XXX */ 298129906Sbmilekic} 299129906Sbmilekic 300129906Sbmilekic/* 301129906Sbmilekic * The Packet secondary zone's fini routine, executed on the 302129906Sbmilekic * object's transition from zone cache to keg slab. 303129906Sbmilekic */ 304129906Sbmilekicstatic void 305129906Sbmilekicmb_fini_pack(void *mem, int size) 306129906Sbmilekic{ 307129906Sbmilekic struct mbuf *m; 308129906Sbmilekic 309129906Sbmilekic m = (struct mbuf *)mem; 310129906Sbmilekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 311129906Sbmilekic m->m_ext.ext_buf = NULL; 312129906Sbmilekic mbstat.m_mclusts += 1; /* XXX */ 313129906Sbmilekic} 314129906Sbmilekic 315129906Sbmilekic/* 316129906Sbmilekic * The "packet" keg constructor. 317129906Sbmilekic */ 318129906Sbmilekicstatic void 319129906Sbmilekicmb_ctor_pack(void *mem, int size, void *arg) 320129906Sbmilekic{ 321129906Sbmilekic struct mbuf *m; 322129906Sbmilekic struct mb_args *args; 323129906Sbmilekic int flags, how; 324129906Sbmilekic short type; 325129906Sbmilekic 326129906Sbmilekic m = (struct mbuf *)mem; 327129906Sbmilekic args = (struct mb_args *)arg; 328129906Sbmilekic flags = args->flags; 329129906Sbmilekic type = args->type; 330129906Sbmilekic how = args->how; 331129906Sbmilekic 332129906Sbmilekic m->m_type = type; 333129906Sbmilekic m->m_next = NULL; 334129947Sbmilekic m->m_nextpkt = NULL; 335129906Sbmilekic m->m_data = m->m_ext.ext_buf; 336129906Sbmilekic m->m_flags = flags|M_EXT; 337129906Sbmilekic m->m_ext.ext_free = NULL; 338129906Sbmilekic m->m_ext.ext_args = NULL; 339129906Sbmilekic m->m_ext.ext_size = MCLBYTES; 340129906Sbmilekic m->m_ext.ext_type = EXT_PACKET; 341129906Sbmilekic *(m->m_ext.ref_cnt) = 1; 342129906Sbmilekic 343129906Sbmilekic if (flags & M_PKTHDR) { 344129906Sbmilekic m->m_pkthdr.rcvif = NULL; 345129906Sbmilekic m->m_pkthdr.csum_flags = 0; 346129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 347129906Sbmilekic#ifdef MAC 348129906Sbmilekic /* If the label init fails, fail the alloc */ 349129906Sbmilekic if (mac_init_mbuf(m, how) != 0) { 350129906Sbmilekic m_free(m); 351129906Sbmilekic/* XXX*/ panic("mb_ctor_pack(): can't deal with failure!"); 352129906Sbmilekic/* return 0; */ 353129906Sbmilekic } 354129906Sbmilekic#endif 355129906Sbmilekic } 356129906Sbmilekic mbstat.m_mbufs += 1; /* XXX */ 357129906Sbmilekic mbstat.m_mclusts += 1; /* XXX */ 358129906Sbmilekic/* return 1; 359129906Sbmilekic*/ 360129906Sbmilekic} 361129906Sbmilekic 362129906Sbmilekic/* 363129906Sbmilekic * This is the protocol drain routine. 364129906Sbmilekic * 365129906Sbmilekic * No locks should be held when this is called. The drain routines have to 366129906Sbmilekic * presently acquire some locks which raises the possibility of lock order 367129906Sbmilekic * reversal. 368129906Sbmilekic */ 369129906Sbmilekicstatic void 370129906Sbmilekicmb_reclaim(void *junk) 371129906Sbmilekic{ 372129906Sbmilekic struct domain *dp; 373129906Sbmilekic struct protosw *pr; 374129906Sbmilekic 375129906Sbmilekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 376129906Sbmilekic "mb_reclaim()"); 377129906Sbmilekic 378129906Sbmilekic mbstat.m_drain++; 379129906Sbmilekic for (dp = domains; dp != NULL; dp = dp->dom_next) 380129906Sbmilekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 381129906Sbmilekic if (pr->pr_drain != NULL) 382129906Sbmilekic (*pr->pr_drain)(); 383129906Sbmilekic} 384