kern_mbuf.c revision 254814
1129906Sbmilekic/*- 2141991Sbmilekic * Copyright (c) 2004, 2005, 3243995Spjd * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4129906Sbmilekic * 5129906Sbmilekic * Redistribution and use in source and binary forms, with or without 6129906Sbmilekic * modification, are permitted provided that the following conditions 7129906Sbmilekic * are met: 8129906Sbmilekic * 1. Redistributions of source code must retain the above copyright 9129906Sbmilekic * notice unmodified, this list of conditions and the following 10129906Sbmilekic * disclaimer. 11129906Sbmilekic * 2. Redistributions in binary form must reproduce the above copyright 12129906Sbmilekic * notice, this list of conditions and the following disclaimer in the 13129906Sbmilekic * documentation and/or other materials provided with the distribution. 14129906Sbmilekic * 15129906Sbmilekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16129906Sbmilekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17129906Sbmilekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18129906Sbmilekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19129906Sbmilekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20129906Sbmilekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21129906Sbmilekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22129906Sbmilekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23129906Sbmilekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24129906Sbmilekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25129906Sbmilekic * SUCH DAMAGE. 26129906Sbmilekic */ 27129906Sbmilekic 28129906Sbmilekic#include <sys/cdefs.h> 29129906Sbmilekic__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 254814 2013-08-24 21:25:53Z andre $"); 30129906Sbmilekic 31129906Sbmilekic#include "opt_param.h" 32129906Sbmilekic 33129906Sbmilekic#include <sys/param.h> 34129906Sbmilekic#include <sys/malloc.h> 35129906Sbmilekic#include <sys/systm.h> 36129906Sbmilekic#include <sys/mbuf.h> 37129906Sbmilekic#include <sys/domain.h> 38129906Sbmilekic#include <sys/eventhandler.h> 39129906Sbmilekic#include <sys/kernel.h> 40129906Sbmilekic#include <sys/protosw.h> 41129906Sbmilekic#include <sys/smp.h> 42129906Sbmilekic#include <sys/sysctl.h> 43129906Sbmilekic 44163606Srwatson#include <security/mac/mac_framework.h> 45163606Srwatson 46129906Sbmilekic#include <vm/vm.h> 47194454Salc#include <vm/vm_extern.h> 48194454Salc#include <vm/vm_kern.h> 49129906Sbmilekic#include <vm/vm_page.h> 50245575Sandre#include <vm/vm_map.h> 51129906Sbmilekic#include <vm/uma.h> 52147537Ssilby#include <vm/uma_int.h> 53147537Ssilby#include <vm/uma_dbg.h> 54129906Sbmilekic 55129906Sbmilekic/* 56129906Sbmilekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 57129906Sbmilekic * Zones. 58129906Sbmilekic * 59129906Sbmilekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 60129906Sbmilekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 61129906Sbmilekic * administrator so desires. 62129906Sbmilekic * 63129906Sbmilekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 64129906Sbmilekic * Zone. 65129906Sbmilekic * 66129906Sbmilekic * Additionally, FreeBSD provides a Packet Zone, which it 67129906Sbmilekic * configures as a Secondary Zone to the Mbuf Master Zone, 68129906Sbmilekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 69129906Sbmilekic * 70129906Sbmilekic * Thus common-case allocations and locking are simplified: 71129906Sbmilekic * 72129906Sbmilekic * m_clget() m_getcl() 73129906Sbmilekic * | | 74129906Sbmilekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 75129906Sbmilekic * | | [ Packet ] | 76129906Sbmilekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 77129906Sbmilekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 78129906Sbmilekic * | \________ | 79129906Sbmilekic * [ Cluster Keg ] \ / 80243995Spjd * | [ Mbuf Keg ] 81129906Sbmilekic * [ Cluster Slabs ] | 82129906Sbmilekic * | [ Mbuf Slabs ] 83129906Sbmilekic * \____________(VM)_________________/ 84151976Sandre * 85151976Sandre * 86156023Sglebius * Whenever an object is allocated with uma_zalloc() out of 87151976Sandre * one of the Zones its _ctor_ function is executed. The same 88156023Sglebius * for any deallocation through uma_zfree() the _dtor_ function 89151976Sandre * is executed. 90156023Sglebius * 91151976Sandre * Caches are per-CPU and are filled from the Master Zone. 92151976Sandre * 93156023Sglebius * Whenever an object is allocated from the underlying global 94151976Sandre * memory pool it gets pre-initialized with the _zinit_ functions. 95151976Sandre * When the Keg's are overfull objects get decomissioned with 96151976Sandre * _zfini_ functions and free'd back to the global memory pool. 97151976Sandre * 98129906Sbmilekic */ 99129906Sbmilekic 100243631Sandreint nmbufs; /* limits number of mbufs */ 101151976Sandreint nmbclusters; /* limits number of mbuf clusters */ 102155780Sandreint nmbjumbop; /* limits number of page size jumbo clusters */ 103151976Sandreint nmbjumbo9; /* limits number of 9k jumbo clusters */ 104151976Sandreint nmbjumbo16; /* limits number of 16k jumbo clusters */ 105129906Sbmilekic 106253204Sandrestatic quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 107253204Sandre 108253204SandreSYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0, 109253204Sandre "Maximum real memory allocateable to various mbuf types"); 110253204Sandre 111185893Sbz/* 112245575Sandre * tunable_mbinit() has to be run before any mbuf allocations are done. 113185893Sbz */ 114129906Sbmilekicstatic void 115129906Sbmilekictunable_mbinit(void *dummy) 116129906Sbmilekic{ 117253204Sandre quad_t realmem; 118129906Sbmilekic 119245575Sandre /* 120245575Sandre * The default limit for all mbuf related memory is 1/2 of all 121245575Sandre * available kernel memory (physical or kmem). 122245575Sandre * At most it can be 3/4 of available kernel memory. 123245575Sandre */ 124254025Sjeff realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 125245575Sandre maxmbufmem = realmem / 2; 126253204Sandre TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 127245575Sandre if (maxmbufmem > realmem / 4 * 3) 128245575Sandre maxmbufmem = realmem / 4 * 3; 129245575Sandre 130239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 131243639Sandre if (nmbclusters == 0) 132243639Sandre nmbclusters = maxmbufmem / MCLBYTES / 4; 133239624Snp 134239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 135239624Snp if (nmbjumbop == 0) 136243639Sandre nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 137239624Snp 138239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 139239624Snp if (nmbjumbo9 == 0) 140243639Sandre nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 141239624Snp 142239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 143239624Snp if (nmbjumbo16 == 0) 144243639Sandre nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 145243639Sandre 146243639Sandre /* 147243639Sandre * We need at least as many mbufs as we have clusters of 148243639Sandre * the various types added together. 149243639Sandre */ 150243639Sandre TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 151243639Sandre if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 152243639Sandre nmbufs = lmax(maxmbufmem / MSIZE / 5, 153243996Spjd nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 154129906Sbmilekic} 155245575SandreSYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 156129906Sbmilekic 157157927Spsstatic int 158157927Spssysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 159157927Sps{ 160157927Sps int error, newnmbclusters; 161157927Sps 162157927Sps newnmbclusters = nmbclusters; 163243995Spjd error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 164157927Sps if (error == 0 && req->newptr) { 165243631Sandre if (newnmbclusters > nmbclusters && 166243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 167157927Sps nmbclusters = newnmbclusters; 168253207Sandre nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 169157927Sps EVENTHANDLER_INVOKE(nmbclusters_change); 170157927Sps } else 171157927Sps error = EINVAL; 172157927Sps } 173157927Sps return (error); 174157927Sps} 175157927SpsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 176157927Sps&nmbclusters, 0, sysctl_nmbclusters, "IU", 177129906Sbmilekic "Maximum number of mbuf clusters allowed"); 178174292Srrs 179174292Srrsstatic int 180174292Srrssysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 181174292Srrs{ 182174292Srrs int error, newnmbjumbop; 183174292Srrs 184174292Srrs newnmbjumbop = nmbjumbop; 185243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 186174292Srrs if (error == 0 && req->newptr) { 187243631Sandre if (newnmbjumbop > nmbjumbop && 188243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 189174292Srrs nmbjumbop = newnmbjumbop; 190253207Sandre nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 191174292Srrs } else 192174292Srrs error = EINVAL; 193174292Srrs } 194174292Srrs return (error); 195174292Srrs} 196174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 197174292Srrs&nmbjumbop, 0, sysctl_nmbjumbop, "IU", 198243996Spjd "Maximum number of mbuf page size jumbo clusters allowed"); 199174292Srrs 200174292Srrsstatic int 201174292Srrssysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 202174292Srrs{ 203174292Srrs int error, newnmbjumbo9; 204174292Srrs 205174292Srrs newnmbjumbo9 = nmbjumbo9; 206243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 207174292Srrs if (error == 0 && req->newptr) { 208253204Sandre if (newnmbjumbo9 > nmbjumbo9 && 209243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 210174292Srrs nmbjumbo9 = newnmbjumbo9; 211253207Sandre nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 212174292Srrs } else 213174292Srrs error = EINVAL; 214174292Srrs } 215174292Srrs return (error); 216174292Srrs} 217174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 218174292Srrs&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 219243995Spjd "Maximum number of mbuf 9k jumbo clusters allowed"); 220174292Srrs 221174292Srrsstatic int 222174292Srrssysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 223174292Srrs{ 224174292Srrs int error, newnmbjumbo16; 225174292Srrs 226174292Srrs newnmbjumbo16 = nmbjumbo16; 227243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 228174292Srrs if (error == 0 && req->newptr) { 229243631Sandre if (newnmbjumbo16 > nmbjumbo16 && 230243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 231174292Srrs nmbjumbo16 = newnmbjumbo16; 232253207Sandre nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 233174292Srrs } else 234174292Srrs error = EINVAL; 235174292Srrs } 236174292Srrs return (error); 237174292Srrs} 238174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 239174292Srrs&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 240151976Sandre "Maximum number of mbuf 16k jumbo clusters allowed"); 241174292Srrs 242243631Sandrestatic int 243243631Sandresysctl_nmbufs(SYSCTL_HANDLER_ARGS) 244243631Sandre{ 245243631Sandre int error, newnmbufs; 246174292Srrs 247243631Sandre newnmbufs = nmbufs; 248243995Spjd error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 249243631Sandre if (error == 0 && req->newptr) { 250243631Sandre if (newnmbufs > nmbufs) { 251243631Sandre nmbufs = newnmbufs; 252253207Sandre nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 253243631Sandre EVENTHANDLER_INVOKE(nmbufs_change); 254243631Sandre } else 255243631Sandre error = EINVAL; 256243631Sandre } 257243631Sandre return (error); 258243631Sandre} 259253204SandreSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 260243631Sandre&nmbufs, 0, sysctl_nmbufs, "IU", 261243631Sandre "Maximum number of mbufs allowed"); 262174292Srrs 263129906Sbmilekic/* 264129906Sbmilekic * Zones from which we allocate. 265129906Sbmilekic */ 266129906Sbmilekicuma_zone_t zone_mbuf; 267129906Sbmilekicuma_zone_t zone_clust; 268129906Sbmilekicuma_zone_t zone_pack; 269155780Sandreuma_zone_t zone_jumbop; 270151976Sandreuma_zone_t zone_jumbo9; 271151976Sandreuma_zone_t zone_jumbo16; 272151976Sandreuma_zone_t zone_ext_refcnt; 273129906Sbmilekic 274129906Sbmilekic/* 275129906Sbmilekic * Local prototypes. 276129906Sbmilekic */ 277132987Sgreenstatic int mb_ctor_mbuf(void *, int, void *, int); 278132987Sgreenstatic int mb_ctor_clust(void *, int, void *, int); 279132987Sgreenstatic int mb_ctor_pack(void *, int, void *, int); 280129906Sbmilekicstatic void mb_dtor_mbuf(void *, int, void *); 281151976Sandrestatic void mb_dtor_clust(void *, int, void *); 282151976Sandrestatic void mb_dtor_pack(void *, int, void *); 283151976Sandrestatic int mb_zinit_pack(void *, int, int); 284151976Sandrestatic void mb_zfini_pack(void *, int); 285129906Sbmilekic 286129906Sbmilekicstatic void mb_reclaim(void *); 287209390Sedstatic void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); 288129906Sbmilekic 289245575Sandre/* Ensure that MSIZE is a power of 2. */ 290135510SbrianCTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 291135510Sbrian 292129906Sbmilekic/* 293129906Sbmilekic * Initialize FreeBSD Network buffer allocation. 294129906Sbmilekic */ 295129906Sbmilekicstatic void 296129906Sbmilekicmbuf_init(void *dummy) 297129906Sbmilekic{ 298129906Sbmilekic 299129906Sbmilekic /* 300129906Sbmilekic * Configure UMA zones for Mbufs, Clusters, and Packets. 301129906Sbmilekic */ 302151976Sandre zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 303151976Sandre mb_ctor_mbuf, mb_dtor_mbuf, 304147537Ssilby#ifdef INVARIANTS 305151976Sandre trash_init, trash_fini, 306147537Ssilby#else 307151976Sandre NULL, NULL, 308147537Ssilby#endif 309151976Sandre MSIZE - 1, UMA_ZONE_MAXBUCKET); 310243997Spjd if (nmbufs > 0) 311243997Spjd nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 312243999Spjd uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 313151976Sandre 314148095Srwatson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 315151976Sandre mb_ctor_clust, mb_dtor_clust, 316147537Ssilby#ifdef INVARIANTS 317151976Sandre trash_init, trash_fini, 318147537Ssilby#else 319151976Sandre NULL, NULL, 320147537Ssilby#endif 321151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 322243997Spjd if (nmbclusters > 0) 323243997Spjd nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 324243999Spjd uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 325151976Sandre 326148095Srwatson zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 327151976Sandre mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 328129906Sbmilekic 329156023Sglebius /* Make jumbo frame zone too. Page size, 9k and 16k. */ 330155780Sandre zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 331153232Sandre mb_ctor_clust, mb_dtor_clust, 332153232Sandre#ifdef INVARIANTS 333153232Sandre trash_init, trash_fini, 334153232Sandre#else 335153232Sandre NULL, NULL, 336153232Sandre#endif 337153232Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 338243997Spjd if (nmbjumbop > 0) 339243997Spjd nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 340243999Spjd uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 341153232Sandre 342151976Sandre zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 343151976Sandre mb_ctor_clust, mb_dtor_clust, 344151976Sandre#ifdef INVARIANTS 345151976Sandre trash_init, trash_fini, 346151976Sandre#else 347151976Sandre NULL, NULL, 348151976Sandre#endif 349151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 350243631Sandre uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 351243997Spjd if (nmbjumbo9 > 0) 352243997Spjd nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 353243999Spjd uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 354129906Sbmilekic 355151976Sandre zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 356151976Sandre mb_ctor_clust, mb_dtor_clust, 357151976Sandre#ifdef INVARIANTS 358151976Sandre trash_init, trash_fini, 359151976Sandre#else 360151976Sandre NULL, NULL, 361151976Sandre#endif 362151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 363243631Sandre uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 364243997Spjd if (nmbjumbo16 > 0) 365243997Spjd nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 366243999Spjd uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 367151976Sandre 368151976Sandre zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 369151976Sandre NULL, NULL, 370151976Sandre NULL, NULL, 371151976Sandre UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 372151976Sandre 373151976Sandre /* uma_prealloc() goes here... */ 374151976Sandre 375129906Sbmilekic /* 376129906Sbmilekic * Hook event handler for low-memory situation, used to 377129906Sbmilekic * drain protocols and push data back to the caches (UMA 378129906Sbmilekic * later pushes it back to VM). 379129906Sbmilekic */ 380129906Sbmilekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 381129906Sbmilekic EVENTHANDLER_PRI_FIRST); 382129906Sbmilekic} 383245575SandreSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 384129906Sbmilekic 385129906Sbmilekic/* 386174247Salc * UMA backend page allocator for the jumbo frame zones. 387174247Salc * 388174247Salc * Allocates kernel virtual memory that is backed by contiguous physical 389174247Salc * pages. 390174247Salc */ 391174247Salcstatic void * 392209390Sedmbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) 393174247Salc{ 394174247Salc 395177921Salc /* Inform UMA that this allocator uses kernel_map/object. */ 396177921Salc *flags = UMA_SLAB_KERNEL; 397254025Sjeff return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 398195649Salc (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 399174247Salc} 400174247Salc 401174247Salc/* 402129906Sbmilekic * Constructor for Mbuf master zone. 403129906Sbmilekic * 404129906Sbmilekic * The 'arg' pointer points to a mb_args structure which 405129906Sbmilekic * contains call-specific information required to support the 406151976Sandre * mbuf allocation API. See mbuf.h. 407129906Sbmilekic */ 408132987Sgreenstatic int 409132987Sgreenmb_ctor_mbuf(void *mem, int size, void *arg, int how) 410129906Sbmilekic{ 411129906Sbmilekic struct mbuf *m; 412129906Sbmilekic struct mb_args *args; 413132987Sgreen int error; 414129906Sbmilekic int flags; 415129906Sbmilekic short type; 416129906Sbmilekic 417147537Ssilby#ifdef INVARIANTS 418147537Ssilby trash_ctor(mem, size, arg, how); 419147537Ssilby#endif 420129906Sbmilekic args = (struct mb_args *)arg; 421129906Sbmilekic type = args->type; 422129906Sbmilekic 423151976Sandre /* 424151976Sandre * The mbuf is initialized later. The caller has the 425156059Sglebius * responsibility to set up any MAC labels too. 426151976Sandre */ 427151976Sandre if (type == MT_NOINIT) 428151976Sandre return (0); 429151976Sandre 430254779Sandre m = (struct mbuf *)mem; 431254779Sandre flags = args->flags; 432254779Sandre 433254779Sandre error = m_init(m, NULL, size, how, type, flags); 434254779Sandre 435254779Sandre return (error); 436129906Sbmilekic} 437129906Sbmilekic 438129906Sbmilekic/* 439151976Sandre * The Mbuf master zone destructor. 440129906Sbmilekic */ 441129906Sbmilekicstatic void 442129906Sbmilekicmb_dtor_mbuf(void *mem, int size, void *arg) 443129906Sbmilekic{ 444129906Sbmilekic struct mbuf *m; 445243995Spjd unsigned long flags; 446129906Sbmilekic 447129906Sbmilekic m = (struct mbuf *)mem; 448172462Skmacy flags = (unsigned long)arg; 449173029Sobrien 450254812Sandre if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 451129906Sbmilekic m_tag_delete_chain(m, NULL); 452151976Sandre KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); 453254605Sandre KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 454147537Ssilby#ifdef INVARIANTS 455147537Ssilby trash_dtor(mem, size, arg); 456147537Ssilby#endif 457129906Sbmilekic} 458129906Sbmilekic 459151976Sandre/* 460151976Sandre * The Mbuf Packet zone destructor. 461151976Sandre */ 462129906Sbmilekicstatic void 463129906Sbmilekicmb_dtor_pack(void *mem, int size, void *arg) 464129906Sbmilekic{ 465129906Sbmilekic struct mbuf *m; 466129906Sbmilekic 467129906Sbmilekic m = (struct mbuf *)mem; 468129906Sbmilekic if ((m->m_flags & M_PKTHDR) != 0) 469129906Sbmilekic m_tag_delete_chain(m, NULL); 470151976Sandre 471151976Sandre /* Make sure we've got a clean cluster back. */ 472151976Sandre KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 473151976Sandre KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 474151976Sandre KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 475175872Sphk KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 476175872Sphk KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 477151976Sandre KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 478152130Sglebius KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 479151976Sandre KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); 480147537Ssilby#ifdef INVARIANTS 481147537Ssilby trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 482147537Ssilby#endif 483166213Smohans /* 484173029Sobrien * If there are processes blocked on zone_clust, waiting for pages 485173029Sobrien * to be freed up, * cause them to be woken up by draining the 486173029Sobrien * packet zone. We are exposed to a race here * (in the check for 487173029Sobrien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 488173029Sobrien * is deliberate. We don't want to acquire the zone lock for every 489173029Sobrien * mbuf free. 490166213Smohans */ 491173029Sobrien if (uma_zone_exhausted_nolock(zone_clust)) 492173029Sobrien zone_drain(zone_pack); 493129906Sbmilekic} 494129906Sbmilekic 495129906Sbmilekic/* 496155780Sandre * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 497129906Sbmilekic * 498129906Sbmilekic * Here the 'arg' pointer points to the Mbuf which we 499151976Sandre * are configuring cluster storage for. If 'arg' is 500151976Sandre * empty we allocate just the cluster without setting 501151976Sandre * the mbuf to it. See mbuf.h. 502129906Sbmilekic */ 503132987Sgreenstatic int 504132987Sgreenmb_ctor_clust(void *mem, int size, void *arg, int how) 505129906Sbmilekic{ 506129906Sbmilekic struct mbuf *m; 507151976Sandre u_int *refcnt; 508168374Skmacy int type; 509168374Skmacy uma_zone_t zone; 510173029Sobrien 511147537Ssilby#ifdef INVARIANTS 512147537Ssilby trash_ctor(mem, size, arg, how); 513147537Ssilby#endif 514168374Skmacy switch (size) { 515168374Skmacy case MCLBYTES: 516168374Skmacy type = EXT_CLUSTER; 517168374Skmacy zone = zone_clust; 518168374Skmacy break; 519168374Skmacy#if MJUMPAGESIZE != MCLBYTES 520168374Skmacy case MJUMPAGESIZE: 521168374Skmacy type = EXT_JUMBOP; 522168374Skmacy zone = zone_jumbop; 523168374Skmacy break; 524168374Skmacy#endif 525168374Skmacy case MJUM9BYTES: 526168374Skmacy type = EXT_JUMBO9; 527168374Skmacy zone = zone_jumbo9; 528168374Skmacy break; 529168374Skmacy case MJUM16BYTES: 530168374Skmacy type = EXT_JUMBO16; 531168374Skmacy zone = zone_jumbo16; 532168374Skmacy break; 533168374Skmacy default: 534168374Skmacy panic("unknown cluster size"); 535168374Skmacy break; 536168374Skmacy } 537168374Skmacy 538129906Sbmilekic m = (struct mbuf *)arg; 539168374Skmacy refcnt = uma_find_refcnt(zone, mem); 540173029Sobrien *refcnt = 1; 541151976Sandre if (m != NULL) { 542151976Sandre m->m_ext.ext_buf = (caddr_t)mem; 543151976Sandre m->m_data = m->m_ext.ext_buf; 544151976Sandre m->m_flags |= M_EXT; 545151976Sandre m->m_ext.ext_free = NULL; 546175872Sphk m->m_ext.ext_arg1 = NULL; 547175872Sphk m->m_ext.ext_arg2 = NULL; 548151976Sandre m->m_ext.ext_size = size; 549151976Sandre m->m_ext.ext_type = type; 550254780Sandre m->m_ext.ext_flags = 0; 551168374Skmacy m->m_ext.ref_cnt = refcnt; 552151976Sandre } 553168374Skmacy 554132987Sgreen return (0); 555129906Sbmilekic} 556129906Sbmilekic 557151976Sandre/* 558151976Sandre * The Mbuf Cluster zone destructor. 559151976Sandre */ 560129906Sbmilekicstatic void 561129906Sbmilekicmb_dtor_clust(void *mem, int size, void *arg) 562129906Sbmilekic{ 563168374Skmacy#ifdef INVARIANTS 564168374Skmacy uma_zone_t zone; 565151976Sandre 566168374Skmacy zone = m_getzone(size); 567168374Skmacy KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 568152035Sandre ("%s: refcnt incorrect %u", __func__, 569168374Skmacy *(uma_find_refcnt(zone, mem))) ); 570168374Skmacy 571147537Ssilby trash_dtor(mem, size, arg); 572147537Ssilby#endif 573129906Sbmilekic} 574129906Sbmilekic 575129906Sbmilekic/* 576129906Sbmilekic * The Packet secondary zone's init routine, executed on the 577151976Sandre * object's transition from mbuf keg slab to zone cache. 578129906Sbmilekic */ 579132987Sgreenstatic int 580151976Sandremb_zinit_pack(void *mem, int size, int how) 581129906Sbmilekic{ 582129906Sbmilekic struct mbuf *m; 583129906Sbmilekic 584151976Sandre m = (struct mbuf *)mem; /* m is virgin. */ 585156428Sandre if (uma_zalloc_arg(zone_clust, m, how) == NULL || 586156428Sandre m->m_ext.ext_buf == NULL) 587132987Sgreen return (ENOMEM); 588152101Sandre m->m_ext.ext_type = EXT_PACKET; /* Override. */ 589147537Ssilby#ifdef INVARIANTS 590147537Ssilby trash_init(m->m_ext.ext_buf, MCLBYTES, how); 591147537Ssilby#endif 592132987Sgreen return (0); 593129906Sbmilekic} 594129906Sbmilekic 595129906Sbmilekic/* 596129906Sbmilekic * The Packet secondary zone's fini routine, executed on the 597129906Sbmilekic * object's transition from zone cache to keg slab. 598129906Sbmilekic */ 599129906Sbmilekicstatic void 600151976Sandremb_zfini_pack(void *mem, int size) 601129906Sbmilekic{ 602129906Sbmilekic struct mbuf *m; 603129906Sbmilekic 604129906Sbmilekic m = (struct mbuf *)mem; 605147537Ssilby#ifdef INVARIANTS 606147537Ssilby trash_fini(m->m_ext.ext_buf, MCLBYTES); 607147537Ssilby#endif 608129906Sbmilekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 609147652Ssilby#ifdef INVARIANTS 610147652Ssilby trash_dtor(mem, size, NULL); 611147652Ssilby#endif 612129906Sbmilekic} 613129906Sbmilekic 614129906Sbmilekic/* 615129906Sbmilekic * The "packet" keg constructor. 616129906Sbmilekic */ 617132987Sgreenstatic int 618132987Sgreenmb_ctor_pack(void *mem, int size, void *arg, int how) 619129906Sbmilekic{ 620129906Sbmilekic struct mbuf *m; 621129906Sbmilekic struct mb_args *args; 622254814Sandre int error, flags; 623129906Sbmilekic short type; 624129906Sbmilekic 625129906Sbmilekic m = (struct mbuf *)mem; 626129906Sbmilekic args = (struct mb_args *)arg; 627129906Sbmilekic flags = args->flags; 628129906Sbmilekic type = args->type; 629129906Sbmilekic 630147537Ssilby#ifdef INVARIANTS 631147537Ssilby trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 632147537Ssilby#endif 633173029Sobrien 634254779Sandre error = m_init(m, NULL, size, how, type, flags); 635254779Sandre 636151976Sandre /* m_ext is already initialized. */ 637254779Sandre m->m_data = m->m_ext.ext_buf; 638254779Sandre m->m_flags = (flags | M_EXT); 639151976Sandre 640254779Sandre return (error); 641129906Sbmilekic} 642129906Sbmilekic 643194515Skmacyint 644194515Skmacym_pkthdr_init(struct mbuf *m, int how) 645194515Skmacy{ 646194515Skmacy#ifdef MAC 647194515Skmacy int error; 648194515Skmacy#endif 649194515Skmacy m->m_data = m->m_pktdat; 650254804Sandre m->m_pkthdr.rcvif = NULL; 651194515Skmacy SLIST_INIT(&m->m_pkthdr.tags); 652194515Skmacy m->m_pkthdr.len = 0; 653194515Skmacy m->m_pkthdr.flowid = 0; 654254804Sandre m->m_pkthdr.csum_flags = 0; 655250952Sjulian m->m_pkthdr.fibnum = 0; 656254804Sandre m->m_pkthdr.cosqos = 0; 657254804Sandre m->m_pkthdr.rsstype = 0; 658254804Sandre m->m_pkthdr.l2hlen = 0; 659254804Sandre m->m_pkthdr.l3hlen = 0; 660254804Sandre m->m_pkthdr.l4hlen = 0; 661254804Sandre m->m_pkthdr.l5hlen = 0; 662254804Sandre m->m_pkthdr.PH_per.sixtyfour[0] = 0; 663254804Sandre m->m_pkthdr.PH_loc.sixtyfour[0] = 0; 664194515Skmacy#ifdef MAC 665194515Skmacy /* If the label init fails, fail the alloc */ 666194515Skmacy error = mac_mbuf_init(m, how); 667194515Skmacy if (error) 668194515Skmacy return (error); 669194515Skmacy#endif 670194515Skmacy 671194515Skmacy return (0); 672194515Skmacy} 673194515Skmacy 674129906Sbmilekic/* 675129906Sbmilekic * This is the protocol drain routine. 676129906Sbmilekic * 677129906Sbmilekic * No locks should be held when this is called. The drain routines have to 678129906Sbmilekic * presently acquire some locks which raises the possibility of lock order 679129906Sbmilekic * reversal. 680129906Sbmilekic */ 681129906Sbmilekicstatic void 682129906Sbmilekicmb_reclaim(void *junk) 683129906Sbmilekic{ 684129906Sbmilekic struct domain *dp; 685129906Sbmilekic struct protosw *pr; 686129906Sbmilekic 687129906Sbmilekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 688129906Sbmilekic "mb_reclaim()"); 689129906Sbmilekic 690129906Sbmilekic for (dp = domains; dp != NULL; dp = dp->dom_next) 691129906Sbmilekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 692129906Sbmilekic if (pr->pr_drain != NULL) 693129906Sbmilekic (*pr->pr_drain)(); 694129906Sbmilekic} 695