kern_mbuf.c revision 253204
1/*-
2 * Copyright (c) 2004, 2005,
3 *	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 253204 2013-07-11 12:46:35Z andre $");
30
31#include "opt_param.h"
32
33#include <sys/param.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/domain.h>
38#include <sys/eventhandler.h>
39#include <sys/kernel.h>
40#include <sys/protosw.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43
44#include <security/mac/mac_framework.h>
45
46#include <vm/vm.h>
47#include <vm/vm_extern.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_page.h>
50#include <vm/vm_map.h>
51#include <vm/uma.h>
52#include <vm/uma_int.h>
53#include <vm/uma_dbg.h>
54
55/*
56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
57 * Zones.
58 *
59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
60 * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
61 * administrator so desires.
62 *
63 * Mbufs are allocated from a UMA Master Zone called the Mbuf
64 * Zone.
65 *
66 * Additionally, FreeBSD provides a Packet Zone, which it
67 * configures as a Secondary Zone to the Mbuf Master Zone,
68 * thus sharing backend Slab kegs with the Mbuf Master Zone.
69 *
70 * Thus common-case allocations and locking are simplified:
71 *
72 *  m_clget()                m_getcl()
73 *    |                         |
74 *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
75 *    |   |             [     Packet   ]            |
76 *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
77 *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
78 *        |                       \________         |
79 *  [ Cluster Keg   ]                      \       /
80 *        |	                         [ Mbuf Keg   ]
81 *  [ Cluster Slabs ]                         |
82 *        |                              [ Mbuf Slabs ]
83 *         \____________(VM)_________________/
84 *
85 *
86 * Whenever an object is allocated with uma_zalloc() out of
87 * one of the Zones its _ctor_ function is executed.  The same
88 * for any deallocation through uma_zfree() the _dtor_ function
89 * is executed.
90 *
91 * Caches are per-CPU and are filled from the Master Zone.
92 *
93 * Whenever an object is allocated from the underlying global
94 * memory pool it gets pre-initialized with the _zinit_ functions.
95 * When the Keg's are overfull objects get decomissioned with
96 * _zfini_ functions and free'd back to the global memory pool.
97 *
98 */
99
100int nmbufs;			/* limits number of mbufs */
101int nmbclusters;		/* limits number of mbuf clusters */
102int nmbjumbop;			/* limits number of page size jumbo clusters */
103int nmbjumbo9;			/* limits number of 9k jumbo clusters */
104int nmbjumbo16;			/* limits number of 16k jumbo clusters */
105struct mbstat mbstat;
106
107static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
108
109SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
110    "Maximum real memory allocateable to various mbuf types");
111
112/*
113 * tunable_mbinit() has to be run before any mbuf allocations are done.
114 */
115static void
116tunable_mbinit(void *dummy)
117{
118	quad_t realmem;
119
120	/*
121	 * The default limit for all mbuf related memory is 1/2 of all
122	 * available kernel memory (physical or kmem).
123	 * At most it can be 3/4 of available kernel memory.
124	 */
125	realmem = qmin((quad_t)physmem * PAGE_SIZE,
126	    vm_map_max(kmem_map) - vm_map_min(kmem_map));
127	maxmbufmem = realmem / 2;
128	TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
129	if (maxmbufmem > realmem / 4 * 3)
130		maxmbufmem = realmem / 4 * 3;
131
132	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
133	if (nmbclusters == 0)
134		nmbclusters = maxmbufmem / MCLBYTES / 4;
135
136	TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
137	if (nmbjumbop == 0)
138		nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
139
140	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
141	if (nmbjumbo9 == 0)
142		nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
143
144	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
145	if (nmbjumbo16 == 0)
146		nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
147
148	/*
149	 * We need at least as many mbufs as we have clusters of
150	 * the various types added together.
151	 */
152	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
153	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
154		nmbufs = lmax(maxmbufmem / MSIZE / 5,
155		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
156}
157SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
158
159static int
160sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
161{
162	int error, newnmbclusters;
163
164	newnmbclusters = nmbclusters;
165	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
166	if (error == 0 && req->newptr) {
167		if (newnmbclusters > nmbclusters &&
168		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
169			nmbclusters = newnmbclusters;
170			uma_zone_set_max(zone_clust, nmbclusters);
171			nmbclusters = uma_zone_get_max(zone_clust);
172			EVENTHANDLER_INVOKE(nmbclusters_change);
173		} else
174			error = EINVAL;
175	}
176	return (error);
177}
178SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
179&nmbclusters, 0, sysctl_nmbclusters, "IU",
180    "Maximum number of mbuf clusters allowed");
181
182static int
183sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
184{
185	int error, newnmbjumbop;
186
187	newnmbjumbop = nmbjumbop;
188	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
189	if (error == 0 && req->newptr) {
190		if (newnmbjumbop > nmbjumbop &&
191		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
192			nmbjumbop = newnmbjumbop;
193			uma_zone_set_max(zone_jumbop, nmbjumbop);
194			nmbjumbop = uma_zone_get_max(zone_jumbop);
195		} else
196			error = EINVAL;
197	}
198	return (error);
199}
200SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
201&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
202    "Maximum number of mbuf page size jumbo clusters allowed");
203
204static int
205sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
206{
207	int error, newnmbjumbo9;
208
209	newnmbjumbo9 = nmbjumbo9;
210	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
211	if (error == 0 && req->newptr) {
212		if (newnmbjumbo9 > nmbjumbo9 &&
213		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
214			nmbjumbo9 = newnmbjumbo9;
215			uma_zone_set_max(zone_jumbo9, nmbjumbo9);
216			nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
217		} else
218			error = EINVAL;
219	}
220	return (error);
221}
222SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
223&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
224    "Maximum number of mbuf 9k jumbo clusters allowed");
225
226static int
227sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
228{
229	int error, newnmbjumbo16;
230
231	newnmbjumbo16 = nmbjumbo16;
232	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
233	if (error == 0 && req->newptr) {
234		if (newnmbjumbo16 > nmbjumbo16 &&
235		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
236			nmbjumbo16 = newnmbjumbo16;
237			uma_zone_set_max(zone_jumbo16, nmbjumbo16);
238			nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
239		} else
240			error = EINVAL;
241	}
242	return (error);
243}
244SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
245&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
246    "Maximum number of mbuf 16k jumbo clusters allowed");
247
248static int
249sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
250{
251	int error, newnmbufs;
252
253	newnmbufs = nmbufs;
254	error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
255	if (error == 0 && req->newptr) {
256		if (newnmbufs > nmbufs) {
257			nmbufs = newnmbufs;
258			uma_zone_set_max(zone_mbuf, nmbufs);
259			nmbufs = uma_zone_get_max(zone_mbuf);
260			EVENTHANDLER_INVOKE(nmbufs_change);
261		} else
262			error = EINVAL;
263	}
264	return (error);
265}
266SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
267&nmbufs, 0, sysctl_nmbufs, "IU",
268    "Maximum number of mbufs allowed");
269
270SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
271    "Mbuf general information and statistics");
272
273/*
274 * Zones from which we allocate.
275 */
276uma_zone_t	zone_mbuf;
277uma_zone_t	zone_clust;
278uma_zone_t	zone_pack;
279uma_zone_t	zone_jumbop;
280uma_zone_t	zone_jumbo9;
281uma_zone_t	zone_jumbo16;
282uma_zone_t	zone_ext_refcnt;
283
284/*
285 * Local prototypes.
286 */
287static int	mb_ctor_mbuf(void *, int, void *, int);
288static int	mb_ctor_clust(void *, int, void *, int);
289static int	mb_ctor_pack(void *, int, void *, int);
290static void	mb_dtor_mbuf(void *, int, void *);
291static void	mb_dtor_clust(void *, int, void *);
292static void	mb_dtor_pack(void *, int, void *);
293static int	mb_zinit_pack(void *, int, int);
294static void	mb_zfini_pack(void *, int);
295
296static void	mb_reclaim(void *);
297static void    *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
298
299/* Ensure that MSIZE is a power of 2. */
300CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
301
302/*
303 * Initialize FreeBSD Network buffer allocation.
304 */
305static void
306mbuf_init(void *dummy)
307{
308
309	/*
310	 * Configure UMA zones for Mbufs, Clusters, and Packets.
311	 */
312	zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
313	    mb_ctor_mbuf, mb_dtor_mbuf,
314#ifdef INVARIANTS
315	    trash_init, trash_fini,
316#else
317	    NULL, NULL,
318#endif
319	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
320	if (nmbufs > 0)
321		nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
322	uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
323
324	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
325	    mb_ctor_clust, mb_dtor_clust,
326#ifdef INVARIANTS
327	    trash_init, trash_fini,
328#else
329	    NULL, NULL,
330#endif
331	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
332	if (nmbclusters > 0)
333		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
334	uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
335
336	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
337	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
338
339	/* Make jumbo frame zone too. Page size, 9k and 16k. */
340	zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
341	    mb_ctor_clust, mb_dtor_clust,
342#ifdef INVARIANTS
343	    trash_init, trash_fini,
344#else
345	    NULL, NULL,
346#endif
347	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
348	if (nmbjumbop > 0)
349		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
350	uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
351
352	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
353	    mb_ctor_clust, mb_dtor_clust,
354#ifdef INVARIANTS
355	    trash_init, trash_fini,
356#else
357	    NULL, NULL,
358#endif
359	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
360	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
361	if (nmbjumbo9 > 0)
362		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
363	uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
364
365	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
366	    mb_ctor_clust, mb_dtor_clust,
367#ifdef INVARIANTS
368	    trash_init, trash_fini,
369#else
370	    NULL, NULL,
371#endif
372	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
373	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
374	if (nmbjumbo16 > 0)
375		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
376	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
377
378	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
379	    NULL, NULL,
380	    NULL, NULL,
381	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
382
383	/* uma_prealloc() goes here... */
384
385	/*
386	 * Hook event handler for low-memory situation, used to
387	 * drain protocols and push data back to the caches (UMA
388	 * later pushes it back to VM).
389	 */
390	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
391	    EVENTHANDLER_PRI_FIRST);
392
393	/*
394	 * [Re]set counters and local statistics knobs.
395	 * XXX Some of these should go and be replaced, but UMA stat
396	 * gathering needs to be revised.
397	 */
398	mbstat.m_mbufs = 0;
399	mbstat.m_mclusts = 0;
400	mbstat.m_drain = 0;
401	mbstat.m_msize = MSIZE;
402	mbstat.m_mclbytes = MCLBYTES;
403	mbstat.m_minclsize = MINCLSIZE;
404	mbstat.m_mlen = MLEN;
405	mbstat.m_mhlen = MHLEN;
406	mbstat.m_numtypes = MT_NTYPES;
407
408	mbstat.m_mcfail = mbstat.m_mpfail = 0;
409	mbstat.sf_iocnt = 0;
410	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
411}
412SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
413
414/*
415 * UMA backend page allocator for the jumbo frame zones.
416 *
417 * Allocates kernel virtual memory that is backed by contiguous physical
418 * pages.
419 */
420static void *
421mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
422{
423
424	/* Inform UMA that this allocator uses kernel_map/object. */
425	*flags = UMA_SLAB_KERNEL;
426	return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
427	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
428}
429
430/*
431 * Constructor for Mbuf master zone.
432 *
433 * The 'arg' pointer points to a mb_args structure which
434 * contains call-specific information required to support the
435 * mbuf allocation API.  See mbuf.h.
436 */
437static int
438mb_ctor_mbuf(void *mem, int size, void *arg, int how)
439{
440	struct mbuf *m;
441	struct mb_args *args;
442#ifdef MAC
443	int error;
444#endif
445	int flags;
446	short type;
447
448#ifdef INVARIANTS
449	trash_ctor(mem, size, arg, how);
450#endif
451	m = (struct mbuf *)mem;
452	args = (struct mb_args *)arg;
453	flags = args->flags;
454	type = args->type;
455
456	/*
457	 * The mbuf is initialized later.  The caller has the
458	 * responsibility to set up any MAC labels too.
459	 */
460	if (type == MT_NOINIT)
461		return (0);
462
463	m->m_next = NULL;
464	m->m_nextpkt = NULL;
465	m->m_len = 0;
466	m->m_flags = flags;
467	m->m_type = type;
468	if (flags & M_PKTHDR) {
469		m->m_data = m->m_pktdat;
470		m->m_pkthdr.rcvif = NULL;
471		m->m_pkthdr.header = NULL;
472		m->m_pkthdr.len = 0;
473		m->m_pkthdr.csum_flags = 0;
474		m->m_pkthdr.csum_data = 0;
475		m->m_pkthdr.tso_segsz = 0;
476		m->m_pkthdr.ether_vtag = 0;
477		m->m_pkthdr.flowid = 0;
478		m->m_pkthdr.fibnum = 0;
479		SLIST_INIT(&m->m_pkthdr.tags);
480#ifdef MAC
481		/* If the label init fails, fail the alloc */
482		error = mac_mbuf_init(m, how);
483		if (error)
484			return (error);
485#endif
486	} else
487		m->m_data = m->m_dat;
488	return (0);
489}
490
491/*
492 * The Mbuf master zone destructor.
493 */
494static void
495mb_dtor_mbuf(void *mem, int size, void *arg)
496{
497	struct mbuf *m;
498	unsigned long flags;
499
500	m = (struct mbuf *)mem;
501	flags = (unsigned long)arg;
502
503	if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
504		m_tag_delete_chain(m, NULL);
505	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
506	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
507#ifdef INVARIANTS
508	trash_dtor(mem, size, arg);
509#endif
510}
511
512/*
513 * The Mbuf Packet zone destructor.
514 */
515static void
516mb_dtor_pack(void *mem, int size, void *arg)
517{
518	struct mbuf *m;
519
520	m = (struct mbuf *)mem;
521	if ((m->m_flags & M_PKTHDR) != 0)
522		m_tag_delete_chain(m, NULL);
523
524	/* Make sure we've got a clean cluster back. */
525	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
526	KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
527	KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
528	KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
529	KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
530	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
531	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
532	KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
533#ifdef INVARIANTS
534	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
535#endif
536	/*
537	 * If there are processes blocked on zone_clust, waiting for pages
538	 * to be freed up, * cause them to be woken up by draining the
539	 * packet zone.  We are exposed to a race here * (in the check for
540	 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
541	 * is deliberate. We don't want to acquire the zone lock for every
542	 * mbuf free.
543	 */
544	if (uma_zone_exhausted_nolock(zone_clust))
545		zone_drain(zone_pack);
546}
547
548/*
549 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
550 *
551 * Here the 'arg' pointer points to the Mbuf which we
552 * are configuring cluster storage for.  If 'arg' is
553 * empty we allocate just the cluster without setting
554 * the mbuf to it.  See mbuf.h.
555 */
556static int
557mb_ctor_clust(void *mem, int size, void *arg, int how)
558{
559	struct mbuf *m;
560	u_int *refcnt;
561	int type;
562	uma_zone_t zone;
563
564#ifdef INVARIANTS
565	trash_ctor(mem, size, arg, how);
566#endif
567	switch (size) {
568	case MCLBYTES:
569		type = EXT_CLUSTER;
570		zone = zone_clust;
571		break;
572#if MJUMPAGESIZE != MCLBYTES
573	case MJUMPAGESIZE:
574		type = EXT_JUMBOP;
575		zone = zone_jumbop;
576		break;
577#endif
578	case MJUM9BYTES:
579		type = EXT_JUMBO9;
580		zone = zone_jumbo9;
581		break;
582	case MJUM16BYTES:
583		type = EXT_JUMBO16;
584		zone = zone_jumbo16;
585		break;
586	default:
587		panic("unknown cluster size");
588		break;
589	}
590
591	m = (struct mbuf *)arg;
592	refcnt = uma_find_refcnt(zone, mem);
593	*refcnt = 1;
594	if (m != NULL) {
595		m->m_ext.ext_buf = (caddr_t)mem;
596		m->m_data = m->m_ext.ext_buf;
597		m->m_flags |= M_EXT;
598		m->m_ext.ext_free = NULL;
599		m->m_ext.ext_arg1 = NULL;
600		m->m_ext.ext_arg2 = NULL;
601		m->m_ext.ext_size = size;
602		m->m_ext.ext_type = type;
603		m->m_ext.ref_cnt = refcnt;
604	}
605
606	return (0);
607}
608
609/*
610 * The Mbuf Cluster zone destructor.
611 */
612static void
613mb_dtor_clust(void *mem, int size, void *arg)
614{
615#ifdef INVARIANTS
616	uma_zone_t zone;
617
618	zone = m_getzone(size);
619	KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
620		("%s: refcnt incorrect %u", __func__,
621		 *(uma_find_refcnt(zone, mem))) );
622
623	trash_dtor(mem, size, arg);
624#endif
625}
626
627/*
628 * The Packet secondary zone's init routine, executed on the
629 * object's transition from mbuf keg slab to zone cache.
630 */
631static int
632mb_zinit_pack(void *mem, int size, int how)
633{
634	struct mbuf *m;
635
636	m = (struct mbuf *)mem;		/* m is virgin. */
637	if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
638	    m->m_ext.ext_buf == NULL)
639		return (ENOMEM);
640	m->m_ext.ext_type = EXT_PACKET;	/* Override. */
641#ifdef INVARIANTS
642	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
643#endif
644	return (0);
645}
646
647/*
648 * The Packet secondary zone's fini routine, executed on the
649 * object's transition from zone cache to keg slab.
650 */
651static void
652mb_zfini_pack(void *mem, int size)
653{
654	struct mbuf *m;
655
656	m = (struct mbuf *)mem;
657#ifdef INVARIANTS
658	trash_fini(m->m_ext.ext_buf, MCLBYTES);
659#endif
660	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
661#ifdef INVARIANTS
662	trash_dtor(mem, size, NULL);
663#endif
664}
665
666/*
667 * The "packet" keg constructor.
668 */
669static int
670mb_ctor_pack(void *mem, int size, void *arg, int how)
671{
672	struct mbuf *m;
673	struct mb_args *args;
674#ifdef MAC
675	int error;
676#endif
677	int flags;
678	short type;
679
680	m = (struct mbuf *)mem;
681	args = (struct mb_args *)arg;
682	flags = args->flags;
683	type = args->type;
684
685#ifdef INVARIANTS
686	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
687#endif
688	m->m_next = NULL;
689	m->m_nextpkt = NULL;
690	m->m_data = m->m_ext.ext_buf;
691	m->m_len = 0;
692	m->m_flags = (flags | M_EXT);
693	m->m_type = type;
694
695	if (flags & M_PKTHDR) {
696		m->m_pkthdr.rcvif = NULL;
697		m->m_pkthdr.len = 0;
698		m->m_pkthdr.header = NULL;
699		m->m_pkthdr.csum_flags = 0;
700		m->m_pkthdr.csum_data = 0;
701		m->m_pkthdr.tso_segsz = 0;
702		m->m_pkthdr.ether_vtag = 0;
703		m->m_pkthdr.flowid = 0;
704		m->m_pkthdr.fibnum = 0;
705		SLIST_INIT(&m->m_pkthdr.tags);
706#ifdef MAC
707		/* If the label init fails, fail the alloc */
708		error = mac_mbuf_init(m, how);
709		if (error)
710			return (error);
711#endif
712	}
713	/* m_ext is already initialized. */
714
715	return (0);
716}
717
718int
719m_pkthdr_init(struct mbuf *m, int how)
720{
721#ifdef MAC
722	int error;
723#endif
724	m->m_data = m->m_pktdat;
725	SLIST_INIT(&m->m_pkthdr.tags);
726	m->m_pkthdr.rcvif = NULL;
727	m->m_pkthdr.header = NULL;
728	m->m_pkthdr.len = 0;
729	m->m_pkthdr.flowid = 0;
730	m->m_pkthdr.fibnum = 0;
731	m->m_pkthdr.csum_flags = 0;
732	m->m_pkthdr.csum_data = 0;
733	m->m_pkthdr.tso_segsz = 0;
734	m->m_pkthdr.ether_vtag = 0;
735#ifdef MAC
736	/* If the label init fails, fail the alloc */
737	error = mac_mbuf_init(m, how);
738	if (error)
739		return (error);
740#endif
741
742	return (0);
743}
744
745/*
746 * This is the protocol drain routine.
747 *
748 * No locks should be held when this is called.  The drain routines have to
749 * presently acquire some locks which raises the possibility of lock order
750 * reversal.
751 */
752static void
753mb_reclaim(void *junk)
754{
755	struct domain *dp;
756	struct protosw *pr;
757
758	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
759	    "mb_reclaim()");
760
761	for (dp = domains; dp != NULL; dp = dp->dom_next)
762		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
763			if (pr->pr_drain != NULL)
764				(*pr->pr_drain)();
765}
766