1/*-
2 * Copyright (c) 2004, 2005,
3 *	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include "opt_param.h"
32
33#include <sys/param.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/domain.h>
38#include <sys/eventhandler.h>
39#include <sys/kernel.h>
40#include <sys/protosw.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43
44#include <security/mac/mac_framework.h>
45
46#include <vm/vm.h>
47#include <vm/vm_extern.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_page.h>
50#include <vm/vm_map.h>
51#include <vm/uma.h>
52#include <vm/uma_int.h>
53#include <vm/uma_dbg.h>
54
55/*
56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
57 * Zones.
58 *
59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
60 * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
61 * administrator so desires.
62 *
63 * Mbufs are allocated from a UMA Master Zone called the Mbuf
64 * Zone.
65 *
66 * Additionally, FreeBSD provides a Packet Zone, which it
67 * configures as a Secondary Zone to the Mbuf Master Zone,
68 * thus sharing backend Slab kegs with the Mbuf Master Zone.
69 *
70 * Thus common-case allocations and locking are simplified:
71 *
72 *  m_clget()                m_getcl()
73 *    |                         |
74 *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
75 *    |   |             [     Packet   ]            |
76 *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
77 *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
78 *        |                       \________         |
79 *  [ Cluster Keg   ]                      \       /
80 *        |	                         [ Mbuf Keg   ]
81 *  [ Cluster Slabs ]                         |
82 *        |                              [ Mbuf Slabs ]
83 *         \____________(VM)_________________/
84 *
85 *
86 * Whenever an object is allocated with uma_zalloc() out of
87 * one of the Zones its _ctor_ function is executed.  The same
88 * for any deallocation through uma_zfree() the _dtor_ function
89 * is executed.
90 *
91 * Caches are per-CPU and are filled from the Master Zone.
92 *
93 * Whenever an object is allocated from the underlying global
94 * memory pool it gets pre-initialized with the _zinit_ functions.
95 * When the Keg's are overfull objects get decomissioned with
96 * _zfini_ functions and free'd back to the global memory pool.
97 *
98 */
99
100int nmbufs;			/* limits number of mbufs */
101int nmbclusters;		/* limits number of mbuf clusters */
102int nmbjumbop;			/* limits number of page size jumbo clusters */
103int nmbjumbo9;			/* limits number of 9k jumbo clusters */
104int nmbjumbo16;			/* limits number of 16k jumbo clusters */
105struct mbstat mbstat;
106
107static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
108
109SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
110    "Maximum real memory allocateable to various mbuf types");
111
112/*
113 * tunable_mbinit() has to be run before any mbuf allocations are done.
114 */
115static void
116tunable_mbinit(void *dummy)
117{
118	quad_t realmem;
119
120	/*
121	 * The default limit for all mbuf related memory is 1/2 of all
122	 * available kernel memory (physical or kmem).
123	 * At most it can be 3/4 of available kernel memory.
124	 */
125	realmem = qmin((quad_t)physmem * PAGE_SIZE,
126	    vm_map_max(kmem_map) - vm_map_min(kmem_map));
127	maxmbufmem = realmem / 2;
128	TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
129	if (maxmbufmem > realmem / 4 * 3)
130		maxmbufmem = realmem / 4 * 3;
131
132	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
133	if (nmbclusters == 0)
134		nmbclusters = maxmbufmem / MCLBYTES / 4;
135
136	TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
137	if (nmbjumbop == 0)
138		nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
139
140	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
141	if (nmbjumbo9 == 0)
142		nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
143
144	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
145	if (nmbjumbo16 == 0)
146		nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
147
148	/*
149	 * We need at least as many mbufs as we have clusters of
150	 * the various types added together.
151	 */
152	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
153	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
154		nmbufs = lmax(maxmbufmem / MSIZE / 5,
155		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
156}
157SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
158
159static int
160sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
161{
162	int error, newnmbclusters;
163
164	newnmbclusters = nmbclusters;
165	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
166	if (error == 0 && req->newptr && newnmbclusters != nmbclusters) {
167		if (newnmbclusters > nmbclusters &&
168		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
169			nmbclusters = newnmbclusters;
170			nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
171			EVENTHANDLER_INVOKE(nmbclusters_change);
172		} else
173			error = EINVAL;
174	}
175	return (error);
176}
177SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
178&nmbclusters, 0, sysctl_nmbclusters, "IU",
179    "Maximum number of mbuf clusters allowed");
180
181static int
182sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
183{
184	int error, newnmbjumbop;
185
186	newnmbjumbop = nmbjumbop;
187	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
188	if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) {
189		if (newnmbjumbop > nmbjumbop &&
190		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
191			nmbjumbop = newnmbjumbop;
192			nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
193		} else
194			error = EINVAL;
195	}
196	return (error);
197}
198SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
199&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
200    "Maximum number of mbuf page size jumbo clusters allowed");
201
202static int
203sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
204{
205	int error, newnmbjumbo9;
206
207	newnmbjumbo9 = nmbjumbo9;
208	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
209	if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) {
210		if (newnmbjumbo9 > nmbjumbo9 &&
211		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
212			nmbjumbo9 = newnmbjumbo9;
213			nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
214		} else
215			error = EINVAL;
216	}
217	return (error);
218}
219SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
220&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
221    "Maximum number of mbuf 9k jumbo clusters allowed");
222
223static int
224sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
225{
226	int error, newnmbjumbo16;
227
228	newnmbjumbo16 = nmbjumbo16;
229	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
230	if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) {
231		if (newnmbjumbo16 > nmbjumbo16 &&
232		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
233			nmbjumbo16 = newnmbjumbo16;
234			nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
235		} else
236			error = EINVAL;
237	}
238	return (error);
239}
240SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
241&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
242    "Maximum number of mbuf 16k jumbo clusters allowed");
243
244static int
245sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
246{
247	int error, newnmbufs;
248
249	newnmbufs = nmbufs;
250	error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
251	if (error == 0 && req->newptr && newnmbufs != nmbufs) {
252		if (newnmbufs > nmbufs) {
253			nmbufs = newnmbufs;
254			nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
255			EVENTHANDLER_INVOKE(nmbufs_change);
256		} else
257			error = EINVAL;
258	}
259	return (error);
260}
261SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
262&nmbufs, 0, sysctl_nmbufs, "IU",
263    "Maximum number of mbufs allowed");
264
265SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
266    "Mbuf general information and statistics");
267
268/*
269 * Zones from which we allocate.
270 */
271uma_zone_t	zone_mbuf;
272uma_zone_t	zone_clust;
273uma_zone_t	zone_pack;
274uma_zone_t	zone_jumbop;
275uma_zone_t	zone_jumbo9;
276uma_zone_t	zone_jumbo16;
277uma_zone_t	zone_ext_refcnt;
278
279/*
280 * Local prototypes.
281 */
282static int	mb_ctor_mbuf(void *, int, void *, int);
283static int	mb_ctor_clust(void *, int, void *, int);
284static int	mb_ctor_pack(void *, int, void *, int);
285static void	mb_dtor_mbuf(void *, int, void *);
286static void	mb_dtor_clust(void *, int, void *);
287static void	mb_dtor_pack(void *, int, void *);
288static int	mb_zinit_pack(void *, int, int);
289static void	mb_zfini_pack(void *, int);
290
291static void	mb_reclaim(void *);
292static void    *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
293
294/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
295CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
296
297/*
298 * Initialize FreeBSD Network buffer allocation.
299 */
300static void
301mbuf_init(void *dummy)
302{
303
304	/*
305	 * Configure UMA zones for Mbufs, Clusters, and Packets.
306	 */
307	zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
308	    mb_ctor_mbuf, mb_dtor_mbuf,
309#ifdef INVARIANTS
310	    trash_init, trash_fini,
311#else
312	    NULL, NULL,
313#endif
314	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
315	if (nmbufs > 0)
316		nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
317
318	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
319	    mb_ctor_clust, mb_dtor_clust,
320#ifdef INVARIANTS
321	    trash_init, trash_fini,
322#else
323	    NULL, NULL,
324#endif
325	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
326	if (nmbclusters > 0)
327		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
328
329	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
330	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
331
332	/* Make jumbo frame zone too. Page size, 9k and 16k. */
333	zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
334	    mb_ctor_clust, mb_dtor_clust,
335#ifdef INVARIANTS
336	    trash_init, trash_fini,
337#else
338	    NULL, NULL,
339#endif
340	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
341	if (nmbjumbop > 0)
342		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
343
344	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
345	    mb_ctor_clust, mb_dtor_clust,
346#ifdef INVARIANTS
347	    trash_init, trash_fini,
348#else
349	    NULL, NULL,
350#endif
351	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
352	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
353	if (nmbjumbo9 > 0)
354		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
355
356	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
357	    mb_ctor_clust, mb_dtor_clust,
358#ifdef INVARIANTS
359	    trash_init, trash_fini,
360#else
361	    NULL, NULL,
362#endif
363	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
364	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
365	if (nmbjumbo16 > 0)
366		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
367
368	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
369	    NULL, NULL,
370	    NULL, NULL,
371	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
372
373	/* uma_prealloc() goes here... */
374
375	/*
376	 * Hook event handler for low-memory situation, used to
377	 * drain protocols and push data back to the caches (UMA
378	 * later pushes it back to VM).
379	 */
380	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
381	    EVENTHANDLER_PRI_FIRST);
382
383	/*
384	 * [Re]set counters and local statistics knobs.
385	 * XXX Some of these should go and be replaced, but UMA stat
386	 * gathering needs to be revised.
387	 */
388	mbstat.m_mbufs = 0;
389	mbstat.m_mclusts = 0;
390	mbstat.m_drain = 0;
391	mbstat.m_msize = MSIZE;
392	mbstat.m_mclbytes = MCLBYTES;
393	mbstat.m_minclsize = MINCLSIZE;
394	mbstat.m_mlen = MLEN;
395	mbstat.m_mhlen = MHLEN;
396	mbstat.m_numtypes = MT_NTYPES;
397
398	mbstat.m_mcfail = mbstat.m_mpfail = 0;
399	mbstat.sf_iocnt = 0;
400	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
401}
402SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
403
404/*
405 * UMA backend page allocator for the jumbo frame zones.
406 *
407 * Allocates kernel virtual memory that is backed by contiguous physical
408 * pages.
409 */
410static void *
411mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
412{
413
414	/* Inform UMA that this allocator uses kernel_map/object. */
415	*flags = UMA_SLAB_KERNEL;
416	return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
417	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
418}
419
420/*
421 * Constructor for Mbuf master zone.
422 *
423 * The 'arg' pointer points to a mb_args structure which
424 * contains call-specific information required to support the
425 * mbuf allocation API.  See mbuf.h.
426 */
427static int
428mb_ctor_mbuf(void *mem, int size, void *arg, int how)
429{
430	struct mbuf *m;
431	struct mb_args *args;
432#ifdef MAC
433	int error;
434#endif
435	int flags;
436	short type;
437
438#ifdef INVARIANTS
439	trash_ctor(mem, size, arg, how);
440#endif
441	m = (struct mbuf *)mem;
442	args = (struct mb_args *)arg;
443	flags = args->flags;
444	type = args->type;
445
446	/*
447	 * The mbuf is initialized later.  The caller has the
448	 * responsibility to set up any MAC labels too.
449	 */
450	if (type == MT_NOINIT)
451		return (0);
452
453	m->m_next = NULL;
454	m->m_nextpkt = NULL;
455	m->m_len = 0;
456	m->m_flags = flags;
457	m->m_type = type;
458	if (flags & M_PKTHDR) {
459		m->m_data = m->m_pktdat;
460		m->m_pkthdr.rcvif = NULL;
461		m->m_pkthdr.header = NULL;
462		m->m_pkthdr.len = 0;
463		m->m_pkthdr.csum_flags = 0;
464		m->m_pkthdr.csum_data = 0;
465		m->m_pkthdr.tso_segsz = 0;
466		m->m_pkthdr.ether_vtag = 0;
467		m->m_pkthdr.flowid = 0;
468		SLIST_INIT(&m->m_pkthdr.tags);
469#ifdef MAC
470		/* If the label init fails, fail the alloc */
471		error = mac_mbuf_init(m, how);
472		if (error)
473			return (error);
474#endif
475	} else
476		m->m_data = m->m_dat;
477	return (0);
478}
479
480/*
481 * The Mbuf master zone destructor.
482 */
483static void
484mb_dtor_mbuf(void *mem, int size, void *arg)
485{
486	struct mbuf *m;
487	unsigned long flags;
488
489	m = (struct mbuf *)mem;
490	flags = (unsigned long)arg;
491
492	if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
493		m_tag_delete_chain(m, NULL);
494	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
495	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
496#ifdef INVARIANTS
497	trash_dtor(mem, size, arg);
498#endif
499}
500
501/*
502 * The Mbuf Packet zone destructor.
503 */
504static void
505mb_dtor_pack(void *mem, int size, void *arg)
506{
507	struct mbuf *m;
508
509	m = (struct mbuf *)mem;
510	if ((m->m_flags & M_PKTHDR) != 0)
511		m_tag_delete_chain(m, NULL);
512
513	/* Make sure we've got a clean cluster back. */
514	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
515	KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
516	KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
517	KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
518	KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
519	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
520	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
521	KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
522#ifdef INVARIANTS
523	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
524#endif
525	/*
526	 * If there are processes blocked on zone_clust, waiting for pages
527	 * to be freed up, * cause them to be woken up by draining the
528	 * packet zone.  We are exposed to a race here * (in the check for
529	 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
530	 * is deliberate. We don't want to acquire the zone lock for every
531	 * mbuf free.
532	 */
533	if (uma_zone_exhausted_nolock(zone_clust))
534		zone_drain(zone_pack);
535}
536
537/*
538 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
539 *
540 * Here the 'arg' pointer points to the Mbuf which we
541 * are configuring cluster storage for.  If 'arg' is
542 * empty we allocate just the cluster without setting
543 * the mbuf to it.  See mbuf.h.
544 */
545static int
546mb_ctor_clust(void *mem, int size, void *arg, int how)
547{
548	struct mbuf *m;
549	u_int *refcnt;
550	int type;
551	uma_zone_t zone;
552
553#ifdef INVARIANTS
554	trash_ctor(mem, size, arg, how);
555#endif
556	switch (size) {
557	case MCLBYTES:
558		type = EXT_CLUSTER;
559		zone = zone_clust;
560		break;
561#if MJUMPAGESIZE != MCLBYTES
562	case MJUMPAGESIZE:
563		type = EXT_JUMBOP;
564		zone = zone_jumbop;
565		break;
566#endif
567	case MJUM9BYTES:
568		type = EXT_JUMBO9;
569		zone = zone_jumbo9;
570		break;
571	case MJUM16BYTES:
572		type = EXT_JUMBO16;
573		zone = zone_jumbo16;
574		break;
575	default:
576		panic("unknown cluster size");
577		break;
578	}
579
580	m = (struct mbuf *)arg;
581	refcnt = uma_find_refcnt(zone, mem);
582	*refcnt = 1;
583	if (m != NULL) {
584		m->m_ext.ext_buf = (caddr_t)mem;
585		m->m_data = m->m_ext.ext_buf;
586		m->m_flags |= M_EXT;
587		m->m_ext.ext_free = NULL;
588		m->m_ext.ext_arg1 = NULL;
589		m->m_ext.ext_arg2 = NULL;
590		m->m_ext.ext_size = size;
591		m->m_ext.ext_type = type;
592		m->m_ext.ref_cnt = refcnt;
593	}
594
595	return (0);
596}
597
598/*
599 * The Mbuf Cluster zone destructor.
600 */
601static void
602mb_dtor_clust(void *mem, int size, void *arg)
603{
604#ifdef INVARIANTS
605	uma_zone_t zone;
606
607	zone = m_getzone(size);
608	KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
609		("%s: refcnt incorrect %u", __func__,
610		 *(uma_find_refcnt(zone, mem))) );
611
612	trash_dtor(mem, size, arg);
613#endif
614}
615
616/*
617 * The Packet secondary zone's init routine, executed on the
618 * object's transition from mbuf keg slab to zone cache.
619 */
620static int
621mb_zinit_pack(void *mem, int size, int how)
622{
623	struct mbuf *m;
624
625	m = (struct mbuf *)mem;		/* m is virgin. */
626	if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
627	    m->m_ext.ext_buf == NULL)
628		return (ENOMEM);
629	m->m_ext.ext_type = EXT_PACKET;	/* Override. */
630#ifdef INVARIANTS
631	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
632#endif
633	return (0);
634}
635
636/*
637 * The Packet secondary zone's fini routine, executed on the
638 * object's transition from zone cache to keg slab.
639 */
640static void
641mb_zfini_pack(void *mem, int size)
642{
643	struct mbuf *m;
644
645	m = (struct mbuf *)mem;
646#ifdef INVARIANTS
647	trash_fini(m->m_ext.ext_buf, MCLBYTES);
648#endif
649	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
650#ifdef INVARIANTS
651	trash_dtor(mem, size, NULL);
652#endif
653}
654
655/*
656 * The "packet" keg constructor.
657 */
658static int
659mb_ctor_pack(void *mem, int size, void *arg, int how)
660{
661	struct mbuf *m;
662	struct mb_args *args;
663#ifdef MAC
664	int error;
665#endif
666	int flags;
667	short type;
668
669	m = (struct mbuf *)mem;
670	args = (struct mb_args *)arg;
671	flags = args->flags;
672	type = args->type;
673
674#ifdef INVARIANTS
675	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
676#endif
677	m->m_next = NULL;
678	m->m_nextpkt = NULL;
679	m->m_data = m->m_ext.ext_buf;
680	m->m_len = 0;
681	m->m_flags = (flags | M_EXT);
682	m->m_type = type;
683
684	if (flags & M_PKTHDR) {
685		m->m_pkthdr.rcvif = NULL;
686		m->m_pkthdr.len = 0;
687		m->m_pkthdr.header = NULL;
688		m->m_pkthdr.csum_flags = 0;
689		m->m_pkthdr.csum_data = 0;
690		m->m_pkthdr.tso_segsz = 0;
691		m->m_pkthdr.ether_vtag = 0;
692		m->m_pkthdr.flowid = 0;
693		SLIST_INIT(&m->m_pkthdr.tags);
694#ifdef MAC
695		/* If the label init fails, fail the alloc */
696		error = mac_mbuf_init(m, how);
697		if (error)
698			return (error);
699#endif
700	}
701	/* m_ext is already initialized. */
702
703	return (0);
704}
705
706int
707m_pkthdr_init(struct mbuf *m, int how)
708{
709#ifdef MAC
710	int error;
711#endif
712	m->m_data = m->m_pktdat;
713	SLIST_INIT(&m->m_pkthdr.tags);
714	m->m_pkthdr.rcvif = NULL;
715	m->m_pkthdr.header = NULL;
716	m->m_pkthdr.len = 0;
717	m->m_pkthdr.flowid = 0;
718	m->m_pkthdr.csum_flags = 0;
719	m->m_pkthdr.csum_data = 0;
720	m->m_pkthdr.tso_segsz = 0;
721	m->m_pkthdr.ether_vtag = 0;
722#ifdef MAC
723	/* If the label init fails, fail the alloc */
724	error = mac_mbuf_init(m, how);
725	if (error)
726		return (error);
727#endif
728
729	return (0);
730}
731
732/*
733 * This is the protocol drain routine.
734 *
735 * No locks should be held when this is called.  The drain routines have to
736 * presently acquire some locks which raises the possibility of lock order
737 * reversal.
738 */
739static void
740mb_reclaim(void *junk)
741{
742	struct domain *dp;
743	struct protosw *pr;
744
745	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
746	    "mb_reclaim()");
747
748	for (dp = domains; dp != NULL; dp = dp->dom_next)
749		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
750			if (pr->pr_drain != NULL)
751				(*pr->pr_drain)();
752}
753