kern_mbuf.c revision 249843
1/*-
2 * Copyright (c) 2004, 2005,
3 *	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 249843 2013-04-24 13:54:55Z andre $");
30
31#include "opt_param.h"
32
33#include <sys/param.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/domain.h>
38#include <sys/eventhandler.h>
39#include <sys/kernel.h>
40#include <sys/protosw.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43
44#include <security/mac/mac_framework.h>
45
46#include <vm/vm.h>
47#include <vm/vm_extern.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_page.h>
50#include <vm/vm_map.h>
51#include <vm/uma.h>
52#include <vm/uma_int.h>
53#include <vm/uma_dbg.h>
54
55/*
56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
57 * Zones.
58 *
59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
60 * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
61 * administrator so desires.
62 *
63 * Mbufs are allocated from a UMA Master Zone called the Mbuf
64 * Zone.
65 *
66 * Additionally, FreeBSD provides a Packet Zone, which it
67 * configures as a Secondary Zone to the Mbuf Master Zone,
68 * thus sharing backend Slab kegs with the Mbuf Master Zone.
69 *
70 * Thus common-case allocations and locking are simplified:
71 *
72 *  m_clget()                m_getcl()
73 *    |                         |
74 *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
75 *    |   |             [     Packet   ]            |
76 *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
77 *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
78 *        |                       \________         |
79 *  [ Cluster Keg   ]                      \       /
80 *        |	                         [ Mbuf Keg   ]
81 *  [ Cluster Slabs ]                         |
82 *        |                              [ Mbuf Slabs ]
83 *         \____________(VM)_________________/
84 *
85 *
86 * Whenever an object is allocated with uma_zalloc() out of
87 * one of the Zones its _ctor_ function is executed.  The same
88 * for any deallocation through uma_zfree() the _dtor_ function
89 * is executed.
90 *
91 * Caches are per-CPU and are filled from the Master Zone.
92 *
93 * Whenever an object is allocated from the underlying global
94 * memory pool it gets pre-initialized with the _zinit_ functions.
95 * When the Keg's are overfull objects get decomissioned with
96 * _zfini_ functions and free'd back to the global memory pool.
97 *
98 */
99
100int nmbufs;			/* limits number of mbufs */
101int nmbclusters;		/* limits number of mbuf clusters */
102int nmbjumbop;			/* limits number of page size jumbo clusters */
103int nmbjumbo9;			/* limits number of 9k jumbo clusters */
104int nmbjumbo16;			/* limits number of 16k jumbo clusters */
105struct mbstat mbstat;
106
107/*
108 * tunable_mbinit() has to be run before any mbuf allocations are done.
109 */
110static void
111tunable_mbinit(void *dummy)
112{
113	quad_t realmem, maxmbufmem;
114
115	/*
116	 * The default limit for all mbuf related memory is 1/2 of all
117	 * available kernel memory (physical or kmem).
118	 * At most it can be 3/4 of available kernel memory.
119	 */
120	realmem = qmin((quad_t)physmem * PAGE_SIZE,
121	    vm_map_max(kmem_map) - vm_map_min(kmem_map));
122	maxmbufmem = realmem / 2;
123	TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
124	if (maxmbufmem > realmem / 4 * 3)
125		maxmbufmem = realmem / 4 * 3;
126
127	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
128	if (nmbclusters == 0)
129		nmbclusters = maxmbufmem / MCLBYTES / 4;
130
131	TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
132	if (nmbjumbop == 0)
133		nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
134
135	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
136	if (nmbjumbo9 == 0)
137		nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
138
139	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
140	if (nmbjumbo16 == 0)
141		nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
142
143	/*
144	 * We need at least as many mbufs as we have clusters of
145	 * the various types added together.
146	 */
147	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
148	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
149		nmbufs = lmax(maxmbufmem / MSIZE / 5,
150		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
151}
152SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
153
154static int
155sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
156{
157	int error, newnmbclusters;
158
159	newnmbclusters = nmbclusters;
160	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
161	if (error == 0 && req->newptr) {
162		if (newnmbclusters > nmbclusters &&
163		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
164			nmbclusters = newnmbclusters;
165			uma_zone_set_max(zone_clust, nmbclusters);
166			nmbclusters = uma_zone_get_max(zone_clust);
167			EVENTHANDLER_INVOKE(nmbclusters_change);
168		} else
169			error = EINVAL;
170	}
171	return (error);
172}
173SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
174&nmbclusters, 0, sysctl_nmbclusters, "IU",
175    "Maximum number of mbuf clusters allowed");
176
177static int
178sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
179{
180	int error, newnmbjumbop;
181
182	newnmbjumbop = nmbjumbop;
183	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
184	if (error == 0 && req->newptr) {
185		if (newnmbjumbop > nmbjumbop &&
186		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
187			nmbjumbop = newnmbjumbop;
188			uma_zone_set_max(zone_jumbop, nmbjumbop);
189			nmbjumbop = uma_zone_get_max(zone_jumbop);
190		} else
191			error = EINVAL;
192	}
193	return (error);
194}
195SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
196&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
197    "Maximum number of mbuf page size jumbo clusters allowed");
198
199static int
200sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
201{
202	int error, newnmbjumbo9;
203
204	newnmbjumbo9 = nmbjumbo9;
205	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
206	if (error == 0 && req->newptr) {
207		if (newnmbjumbo9 > nmbjumbo9&&
208		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
209			nmbjumbo9 = newnmbjumbo9;
210			uma_zone_set_max(zone_jumbo9, nmbjumbo9);
211			nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
212		} else
213			error = EINVAL;
214	}
215	return (error);
216}
217SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
218&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
219    "Maximum number of mbuf 9k jumbo clusters allowed");
220
221static int
222sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
223{
224	int error, newnmbjumbo16;
225
226	newnmbjumbo16 = nmbjumbo16;
227	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
228	if (error == 0 && req->newptr) {
229		if (newnmbjumbo16 > nmbjumbo16 &&
230		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
231			nmbjumbo16 = newnmbjumbo16;
232			uma_zone_set_max(zone_jumbo16, nmbjumbo16);
233			nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
234		} else
235			error = EINVAL;
236	}
237	return (error);
238}
239SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
240&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
241    "Maximum number of mbuf 16k jumbo clusters allowed");
242
243static int
244sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
245{
246	int error, newnmbufs;
247
248	newnmbufs = nmbufs;
249	error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
250	if (error == 0 && req->newptr) {
251		if (newnmbufs > nmbufs) {
252			nmbufs = newnmbufs;
253			uma_zone_set_max(zone_mbuf, nmbufs);
254			nmbufs = uma_zone_get_max(zone_mbuf);
255			EVENTHANDLER_INVOKE(nmbufs_change);
256		} else
257			error = EINVAL;
258	}
259	return (error);
260}
261SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
262&nmbufs, 0, sysctl_nmbufs, "IU",
263    "Maximum number of mbufs allowed");
264
265SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
266    "Mbuf general information and statistics");
267
268/*
269 * Zones from which we allocate.
270 */
271uma_zone_t	zone_mbuf;
272uma_zone_t	zone_clust;
273uma_zone_t	zone_pack;
274uma_zone_t	zone_jumbop;
275uma_zone_t	zone_jumbo9;
276uma_zone_t	zone_jumbo16;
277uma_zone_t	zone_ext_refcnt;
278
279/*
280 * Local prototypes.
281 */
282static int	mb_ctor_mbuf(void *, int, void *, int);
283static int	mb_ctor_clust(void *, int, void *, int);
284static int	mb_ctor_pack(void *, int, void *, int);
285static void	mb_dtor_mbuf(void *, int, void *);
286static void	mb_dtor_clust(void *, int, void *);
287static void	mb_dtor_pack(void *, int, void *);
288static int	mb_zinit_pack(void *, int, int);
289static void	mb_zfini_pack(void *, int);
290
291static void	mb_reclaim(void *);
292static void    *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
293
294/* Ensure that MSIZE is a power of 2. */
295CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
296
297/*
298 * Initialize FreeBSD Network buffer allocation.
299 */
300static void
301mbuf_init(void *dummy)
302{
303
304	/*
305	 * Configure UMA zones for Mbufs, Clusters, and Packets.
306	 */
307	zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
308	    mb_ctor_mbuf, mb_dtor_mbuf,
309#ifdef INVARIANTS
310	    trash_init, trash_fini,
311#else
312	    NULL, NULL,
313#endif
314	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
315	if (nmbufs > 0)
316		nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
317	uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
318
319	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
320	    mb_ctor_clust, mb_dtor_clust,
321#ifdef INVARIANTS
322	    trash_init, trash_fini,
323#else
324	    NULL, NULL,
325#endif
326	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
327	if (nmbclusters > 0)
328		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
329	uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
330
331	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
332	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
333
334	/* Make jumbo frame zone too. Page size, 9k and 16k. */
335	zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
336	    mb_ctor_clust, mb_dtor_clust,
337#ifdef INVARIANTS
338	    trash_init, trash_fini,
339#else
340	    NULL, NULL,
341#endif
342	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
343	if (nmbjumbop > 0)
344		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
345	uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
346
347	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
348	    mb_ctor_clust, mb_dtor_clust,
349#ifdef INVARIANTS
350	    trash_init, trash_fini,
351#else
352	    NULL, NULL,
353#endif
354	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
355	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
356	if (nmbjumbo9 > 0)
357		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
358	uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
359
360	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
361	    mb_ctor_clust, mb_dtor_clust,
362#ifdef INVARIANTS
363	    trash_init, trash_fini,
364#else
365	    NULL, NULL,
366#endif
367	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
368	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
369	if (nmbjumbo16 > 0)
370		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
371	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
372
373	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
374	    NULL, NULL,
375	    NULL, NULL,
376	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
377
378	/* uma_prealloc() goes here... */
379
380	/*
381	 * Hook event handler for low-memory situation, used to
382	 * drain protocols and push data back to the caches (UMA
383	 * later pushes it back to VM).
384	 */
385	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
386	    EVENTHANDLER_PRI_FIRST);
387
388	/*
389	 * [Re]set counters and local statistics knobs.
390	 * XXX Some of these should go and be replaced, but UMA stat
391	 * gathering needs to be revised.
392	 */
393	mbstat.m_mbufs = 0;
394	mbstat.m_mclusts = 0;
395	mbstat.m_drain = 0;
396	mbstat.m_msize = MSIZE;
397	mbstat.m_mclbytes = MCLBYTES;
398	mbstat.m_minclsize = MINCLSIZE;
399	mbstat.m_mlen = MLEN;
400	mbstat.m_mhlen = MHLEN;
401	mbstat.m_numtypes = MT_NTYPES;
402
403	mbstat.m_mcfail = mbstat.m_mpfail = 0;
404	mbstat.sf_iocnt = 0;
405	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
406}
407SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
408
409/*
410 * UMA backend page allocator for the jumbo frame zones.
411 *
412 * Allocates kernel virtual memory that is backed by contiguous physical
413 * pages.
414 */
415static void *
416mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
417{
418
419	/* Inform UMA that this allocator uses kernel_map/object. */
420	*flags = UMA_SLAB_KERNEL;
421	return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
422	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
423}
424
425/*
426 * Constructor for Mbuf master zone.
427 *
428 * The 'arg' pointer points to a mb_args structure which
429 * contains call-specific information required to support the
430 * mbuf allocation API.  See mbuf.h.
431 */
432static int
433mb_ctor_mbuf(void *mem, int size, void *arg, int how)
434{
435	struct mbuf *m;
436	struct mb_args *args;
437#ifdef MAC
438	int error;
439#endif
440	int flags;
441	short type;
442
443#ifdef INVARIANTS
444	trash_ctor(mem, size, arg, how);
445#endif
446	m = (struct mbuf *)mem;
447	args = (struct mb_args *)arg;
448	flags = args->flags;
449	type = args->type;
450
451	/*
452	 * The mbuf is initialized later.  The caller has the
453	 * responsibility to set up any MAC labels too.
454	 */
455	if (type == MT_NOINIT)
456		return (0);
457
458	m->m_next = NULL;
459	m->m_nextpkt = NULL;
460	m->m_len = 0;
461	m->m_flags = flags;
462	m->m_type = type;
463	if (flags & M_PKTHDR) {
464		m->m_data = m->m_pktdat;
465		m->m_pkthdr.rcvif = NULL;
466		m->m_pkthdr.header = NULL;
467		m->m_pkthdr.len = 0;
468		m->m_pkthdr.csum_flags = 0;
469		m->m_pkthdr.csum_data = 0;
470		m->m_pkthdr.tso_segsz = 0;
471		m->m_pkthdr.ether_vtag = 0;
472		m->m_pkthdr.flowid = 0;
473		SLIST_INIT(&m->m_pkthdr.tags);
474#ifdef MAC
475		/* If the label init fails, fail the alloc */
476		error = mac_mbuf_init(m, how);
477		if (error)
478			return (error);
479#endif
480	} else
481		m->m_data = m->m_dat;
482	return (0);
483}
484
485/*
486 * The Mbuf master zone destructor.
487 */
488static void
489mb_dtor_mbuf(void *mem, int size, void *arg)
490{
491	struct mbuf *m;
492	unsigned long flags;
493
494	m = (struct mbuf *)mem;
495	flags = (unsigned long)arg;
496
497	if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
498		m_tag_delete_chain(m, NULL);
499	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
500	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
501#ifdef INVARIANTS
502	trash_dtor(mem, size, arg);
503#endif
504}
505
506/*
507 * The Mbuf Packet zone destructor.
508 */
509static void
510mb_dtor_pack(void *mem, int size, void *arg)
511{
512	struct mbuf *m;
513
514	m = (struct mbuf *)mem;
515	if ((m->m_flags & M_PKTHDR) != 0)
516		m_tag_delete_chain(m, NULL);
517
518	/* Make sure we've got a clean cluster back. */
519	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
520	KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
521	KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
522	KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
523	KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
524	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
525	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
526	KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
527#ifdef INVARIANTS
528	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
529#endif
530	/*
531	 * If there are processes blocked on zone_clust, waiting for pages
532	 * to be freed up, * cause them to be woken up by draining the
533	 * packet zone.  We are exposed to a race here * (in the check for
534	 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
535	 * is deliberate. We don't want to acquire the zone lock for every
536	 * mbuf free.
537	 */
538	if (uma_zone_exhausted_nolock(zone_clust))
539		zone_drain(zone_pack);
540}
541
542/*
543 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
544 *
545 * Here the 'arg' pointer points to the Mbuf which we
546 * are configuring cluster storage for.  If 'arg' is
547 * empty we allocate just the cluster without setting
548 * the mbuf to it.  See mbuf.h.
549 */
550static int
551mb_ctor_clust(void *mem, int size, void *arg, int how)
552{
553	struct mbuf *m;
554	u_int *refcnt;
555	int type;
556	uma_zone_t zone;
557
558#ifdef INVARIANTS
559	trash_ctor(mem, size, arg, how);
560#endif
561	switch (size) {
562	case MCLBYTES:
563		type = EXT_CLUSTER;
564		zone = zone_clust;
565		break;
566#if MJUMPAGESIZE != MCLBYTES
567	case MJUMPAGESIZE:
568		type = EXT_JUMBOP;
569		zone = zone_jumbop;
570		break;
571#endif
572	case MJUM9BYTES:
573		type = EXT_JUMBO9;
574		zone = zone_jumbo9;
575		break;
576	case MJUM16BYTES:
577		type = EXT_JUMBO16;
578		zone = zone_jumbo16;
579		break;
580	default:
581		panic("unknown cluster size");
582		break;
583	}
584
585	m = (struct mbuf *)arg;
586	refcnt = uma_find_refcnt(zone, mem);
587	*refcnt = 1;
588	if (m != NULL) {
589		m->m_ext.ext_buf = (caddr_t)mem;
590		m->m_data = m->m_ext.ext_buf;
591		m->m_flags |= M_EXT;
592		m->m_ext.ext_free = NULL;
593		m->m_ext.ext_arg1 = NULL;
594		m->m_ext.ext_arg2 = NULL;
595		m->m_ext.ext_size = size;
596		m->m_ext.ext_type = type;
597		m->m_ext.ref_cnt = refcnt;
598	}
599
600	return (0);
601}
602
603/*
604 * The Mbuf Cluster zone destructor.
605 */
606static void
607mb_dtor_clust(void *mem, int size, void *arg)
608{
609#ifdef INVARIANTS
610	uma_zone_t zone;
611
612	zone = m_getzone(size);
613	KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
614		("%s: refcnt incorrect %u", __func__,
615		 *(uma_find_refcnt(zone, mem))) );
616
617	trash_dtor(mem, size, arg);
618#endif
619}
620
621/*
622 * The Packet secondary zone's init routine, executed on the
623 * object's transition from mbuf keg slab to zone cache.
624 */
625static int
626mb_zinit_pack(void *mem, int size, int how)
627{
628	struct mbuf *m;
629
630	m = (struct mbuf *)mem;		/* m is virgin. */
631	if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
632	    m->m_ext.ext_buf == NULL)
633		return (ENOMEM);
634	m->m_ext.ext_type = EXT_PACKET;	/* Override. */
635#ifdef INVARIANTS
636	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
637#endif
638	return (0);
639}
640
641/*
642 * The Packet secondary zone's fini routine, executed on the
643 * object's transition from zone cache to keg slab.
644 */
645static void
646mb_zfini_pack(void *mem, int size)
647{
648	struct mbuf *m;
649
650	m = (struct mbuf *)mem;
651#ifdef INVARIANTS
652	trash_fini(m->m_ext.ext_buf, MCLBYTES);
653#endif
654	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
655#ifdef INVARIANTS
656	trash_dtor(mem, size, NULL);
657#endif
658}
659
660/*
661 * The "packet" keg constructor.
662 */
663static int
664mb_ctor_pack(void *mem, int size, void *arg, int how)
665{
666	struct mbuf *m;
667	struct mb_args *args;
668#ifdef MAC
669	int error;
670#endif
671	int flags;
672	short type;
673
674	m = (struct mbuf *)mem;
675	args = (struct mb_args *)arg;
676	flags = args->flags;
677	type = args->type;
678
679#ifdef INVARIANTS
680	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
681#endif
682	m->m_next = NULL;
683	m->m_nextpkt = NULL;
684	m->m_data = m->m_ext.ext_buf;
685	m->m_len = 0;
686	m->m_flags = (flags | M_EXT);
687	m->m_type = type;
688
689	if (flags & M_PKTHDR) {
690		m->m_pkthdr.rcvif = NULL;
691		m->m_pkthdr.len = 0;
692		m->m_pkthdr.header = NULL;
693		m->m_pkthdr.csum_flags = 0;
694		m->m_pkthdr.csum_data = 0;
695		m->m_pkthdr.tso_segsz = 0;
696		m->m_pkthdr.ether_vtag = 0;
697		m->m_pkthdr.flowid = 0;
698		SLIST_INIT(&m->m_pkthdr.tags);
699#ifdef MAC
700		/* If the label init fails, fail the alloc */
701		error = mac_mbuf_init(m, how);
702		if (error)
703			return (error);
704#endif
705	}
706	/* m_ext is already initialized. */
707
708	return (0);
709}
710
711int
712m_pkthdr_init(struct mbuf *m, int how)
713{
714#ifdef MAC
715	int error;
716#endif
717	m->m_data = m->m_pktdat;
718	SLIST_INIT(&m->m_pkthdr.tags);
719	m->m_pkthdr.rcvif = NULL;
720	m->m_pkthdr.header = NULL;
721	m->m_pkthdr.len = 0;
722	m->m_pkthdr.flowid = 0;
723	m->m_pkthdr.csum_flags = 0;
724	m->m_pkthdr.csum_data = 0;
725	m->m_pkthdr.tso_segsz = 0;
726	m->m_pkthdr.ether_vtag = 0;
727#ifdef MAC
728	/* If the label init fails, fail the alloc */
729	error = mac_mbuf_init(m, how);
730	if (error)
731		return (error);
732#endif
733
734	return (0);
735}
736
737/*
738 * This is the protocol drain routine.
739 *
740 * No locks should be held when this is called.  The drain routines have to
741 * presently acquire some locks which raises the possibility of lock order
742 * reversal.
743 */
744static void
745mb_reclaim(void *junk)
746{
747	struct domain *dp;
748	struct protosw *pr;
749
750	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
751	    "mb_reclaim()");
752
753	for (dp = domains; dp != NULL; dp = dp->dom_next)
754		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
755			if (pr->pr_drain != NULL)
756				(*pr->pr_drain)();
757}
758