kern_mbuf.c revision 147537
1/*-
2 * Copyright (c) 2004, 2005,
3 * 	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 147537 2005-06-23 04:33:39Z silby $");
30
31#include "opt_mac.h"
32#include "opt_param.h"
33
34#include <sys/param.h>
35#include <sys/mac.h>
36#include <sys/malloc.h>
37#include <sys/systm.h>
38#include <sys/mbuf.h>
39#include <sys/domain.h>
40#include <sys/eventhandler.h>
41#include <sys/kernel.h>
42#include <sys/protosw.h>
43#include <sys/smp.h>
44#include <sys/sysctl.h>
45
46#include <vm/vm.h>
47#include <vm/vm_page.h>
48#include <vm/uma.h>
49#include <vm/uma_int.h>
50#include <vm/uma_dbg.h>
51
52/*
53 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
54 * Zones.
55 *
56 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
57 * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
58 * administrator so desires.
59 *
60 * Mbufs are allocated from a UMA Master Zone called the Mbuf
61 * Zone.
62 *
63 * Additionally, FreeBSD provides a Packet Zone, which it
64 * configures as a Secondary Zone to the Mbuf Master Zone,
65 * thus sharing backend Slab kegs with the Mbuf Master Zone.
66 *
67 * Thus common-case allocations and locking are simplified:
68 *
69 *  m_clget()                m_getcl()
70 *    |                         |
71 *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
72 *    |   |             [     Packet   ]            |
73 *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
74 *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
75 *        |                       \________         |
76 *  [ Cluster Keg   ]                      \       /
77 *        |    	                         [ Mbuf Keg   ]
78 *  [ Cluster Slabs ]                         |
79 *        |                              [ Mbuf Slabs ]
80 *         \____________(VM)_________________/
81 */
82
83int nmbclusters;
84struct mbstat mbstat;
85
86static void
87tunable_mbinit(void *dummy)
88{
89
90	/* This has to be done before VM init. */
91	nmbclusters = 1024 + maxusers * 64;
92	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
93}
94SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
95
96SYSCTL_DECL(_kern_ipc);
97SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0,
98    "Maximum number of mbuf clusters allowed");
99SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
100    "Mbuf general information and statistics");
101
102/*
103 * Zones from which we allocate.
104 */
105uma_zone_t	zone_mbuf;
106uma_zone_t	zone_clust;
107uma_zone_t	zone_pack;
108
109/*
110 * Local prototypes.
111 */
112static int	mb_ctor_mbuf(void *, int, void *, int);
113static int	mb_ctor_clust(void *, int, void *, int);
114static int	mb_ctor_pack(void *, int, void *, int);
115static void	mb_dtor_mbuf(void *, int, void *);
116static void	mb_dtor_clust(void *, int, void *);	/* XXX */
117static void	mb_dtor_pack(void *, int, void *);	/* XXX */
118static int	mb_init_pack(void *, int, int);
119static void	mb_fini_pack(void *, int);
120
121static void	mb_reclaim(void *);
122static void	mbuf_init(void *);
123
124/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
125CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
126
127/*
128 * Initialize FreeBSD Network buffer allocation.
129 */
130SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
131static void
132mbuf_init(void *dummy)
133{
134
135	/*
136	 * Configure UMA zones for Mbufs, Clusters, and Packets.
137	 */
138	zone_mbuf = uma_zcreate("Mbuf", MSIZE, mb_ctor_mbuf, mb_dtor_mbuf,
139#ifdef INVARIANTS
140	    trash_init, trash_fini, MSIZE - 1, UMA_ZONE_MAXBUCKET);
141#else
142	    NULL, NULL, MSIZE - 1, UMA_ZONE_MAXBUCKET);
143#endif
144	zone_clust = uma_zcreate("MbufClust", MCLBYTES, mb_ctor_clust,
145#ifdef INVARIANTS
146	    mb_dtor_clust, trash_init, trash_fini, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
147#else
148	    mb_dtor_clust, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
149#endif
150	if (nmbclusters > 0)
151		uma_zone_set_max(zone_clust, nmbclusters);
152	zone_pack = uma_zsecond_create("Packet", mb_ctor_pack, mb_dtor_pack,
153	    mb_init_pack, mb_fini_pack, zone_mbuf);
154
155	/* uma_prealloc() goes here */
156
157	/*
158	 * Hook event handler for low-memory situation, used to
159	 * drain protocols and push data back to the caches (UMA
160	 * later pushes it back to VM).
161	 */
162	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
163	    EVENTHANDLER_PRI_FIRST);
164
165	/*
166	 * [Re]set counters and local statistics knobs.
167	 * XXX Some of these should go and be replaced, but UMA stat
168	 * gathering needs to be revised.
169	 */
170	mbstat.m_mbufs = 0;
171	mbstat.m_mclusts = 0;
172	mbstat.m_drain = 0;
173	mbstat.m_msize = MSIZE;
174	mbstat.m_mclbytes = MCLBYTES;
175	mbstat.m_minclsize = MINCLSIZE;
176	mbstat.m_mlen = MLEN;
177	mbstat.m_mhlen = MHLEN;
178	mbstat.m_numtypes = MT_NTYPES;
179
180	mbstat.m_mcfail = mbstat.m_mpfail = 0;
181	mbstat.sf_iocnt = 0;
182	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
183}
184
185/*
186 * Constructor for Mbuf master zone.
187 *
188 * The 'arg' pointer points to a mb_args structure which
189 * contains call-specific information required to support the
190 * mbuf allocation API.
191 */
192static int
193mb_ctor_mbuf(void *mem, int size, void *arg, int how)
194{
195	struct mbuf *m;
196	struct mb_args *args;
197#ifdef MAC
198	int error;
199#endif
200	int flags;
201	short type;
202
203#ifdef INVARIANTS
204	trash_ctor(mem, size, arg, how);
205#endif
206	m = (struct mbuf *)mem;
207	args = (struct mb_args *)arg;
208	flags = args->flags;
209	type = args->type;
210
211	m->m_type = type;
212	m->m_next = NULL;
213	m->m_nextpkt = NULL;
214	m->m_flags = flags;
215	if (flags & M_PKTHDR) {
216		m->m_data = m->m_pktdat;
217		m->m_pkthdr.rcvif = NULL;
218		m->m_pkthdr.csum_flags = 0;
219		SLIST_INIT(&m->m_pkthdr.tags);
220#ifdef MAC
221		/* If the label init fails, fail the alloc */
222		error = mac_init_mbuf(m, how);
223		if (error)
224			return (error);
225#endif
226	} else
227		m->m_data = m->m_dat;
228	mbstat.m_mbufs += 1;	/* XXX */
229	return (0);
230}
231
232/*
233 * The Mbuf master zone and Packet secondary zone destructor.
234 */
235static void
236mb_dtor_mbuf(void *mem, int size, void *arg)
237{
238	struct mbuf *m;
239
240	m = (struct mbuf *)mem;
241	if ((m->m_flags & M_PKTHDR) != 0)
242		m_tag_delete_chain(m, NULL);
243#ifdef INVARIANTS
244	trash_dtor(mem, size, arg);
245#endif
246	mbstat.m_mbufs -= 1;	/* XXX */
247}
248
249/* XXX Only because of stats */
250static void
251mb_dtor_pack(void *mem, int size, void *arg)
252{
253	struct mbuf *m;
254
255	m = (struct mbuf *)mem;
256	if ((m->m_flags & M_PKTHDR) != 0)
257		m_tag_delete_chain(m, NULL);
258#ifdef INVARIANTS
259	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
260#endif
261	mbstat.m_mbufs -= 1;	/* XXX */
262	mbstat.m_mclusts -= 1;	/* XXX */
263}
264
265/*
266 * The Cluster zone constructor.
267 *
268 * Here the 'arg' pointer points to the Mbuf which we
269 * are configuring cluster storage for.
270 */
271static int
272mb_ctor_clust(void *mem, int size, void *arg, int how)
273{
274	struct mbuf *m;
275
276#ifdef INVARIANTS
277	trash_ctor(mem, size, arg, how);
278#endif
279	m = (struct mbuf *)arg;
280	m->m_ext.ext_buf = (caddr_t)mem;
281	m->m_data = m->m_ext.ext_buf;
282	m->m_flags |= M_EXT;
283	m->m_ext.ext_free = NULL;
284	m->m_ext.ext_args = NULL;
285	m->m_ext.ext_size = MCLBYTES;
286	m->m_ext.ext_type = EXT_CLUSTER;
287	m->m_ext.ref_cnt = NULL;	/* Lazy counter assign. */
288	mbstat.m_mclusts += 1;	/* XXX */
289	return (0);
290}
291
292/* XXX */
293static void
294mb_dtor_clust(void *mem, int size, void *arg)
295{
296#ifdef INVARIANTS
297	trash_dtor(mem, size, arg);
298#endif
299	mbstat.m_mclusts -= 1;	/* XXX */
300}
301
302/*
303 * The Packet secondary zone's init routine, executed on the
304 * object's transition from keg slab to zone cache.
305 */
306static int
307mb_init_pack(void *mem, int size, int how)
308{
309	struct mbuf *m;
310
311	m = (struct mbuf *)mem;
312	m->m_ext.ext_buf = NULL;
313	uma_zalloc_arg(zone_clust, m, how);
314	if (m->m_ext.ext_buf == NULL)
315		return (ENOMEM);
316#ifdef INVARIANTS
317	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
318#endif
319	mbstat.m_mclusts -= 1;	/* XXX */
320	return (0);
321}
322
323/*
324 * The Packet secondary zone's fini routine, executed on the
325 * object's transition from zone cache to keg slab.
326 */
327static void
328mb_fini_pack(void *mem, int size)
329{
330	struct mbuf *m;
331
332	m = (struct mbuf *)mem;
333#ifdef INVARIANTS
334	trash_fini(m->m_ext.ext_buf, MCLBYTES);
335#endif
336	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
337	m->m_ext.ext_buf = NULL;
338	mbstat.m_mclusts += 1;	/* XXX */
339}
340
341/*
342 * The "packet" keg constructor.
343 */
344static int
345mb_ctor_pack(void *mem, int size, void *arg, int how)
346{
347	struct mbuf *m;
348	struct mb_args *args;
349#ifdef MAC
350	int error;
351#endif
352	int flags;
353	short type;
354
355	m = (struct mbuf *)mem;
356	args = (struct mb_args *)arg;
357	flags = args->flags;
358	type = args->type;
359
360#ifdef INVARIANTS
361	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
362#endif
363	m->m_type = type;
364	m->m_next = NULL;
365	m->m_nextpkt = NULL;
366	m->m_data = m->m_ext.ext_buf;
367	m->m_flags = flags|M_EXT;
368	m->m_ext.ext_free = NULL;
369	m->m_ext.ext_args = NULL;
370	m->m_ext.ext_size = MCLBYTES;
371	m->m_ext.ext_type = EXT_PACKET;
372	m->m_ext.ref_cnt = NULL;	/* Lazy counter assign. */
373
374	if (flags & M_PKTHDR) {
375		m->m_pkthdr.rcvif = NULL;
376		m->m_pkthdr.csum_flags = 0;
377		SLIST_INIT(&m->m_pkthdr.tags);
378#ifdef MAC
379		/* If the label init fails, fail the alloc */
380		error = mac_init_mbuf(m, how);
381		if (error)
382			return (error);
383#endif
384	}
385	mbstat.m_mbufs += 1;	/* XXX */
386	mbstat.m_mclusts += 1;	/* XXX */
387	return (0);
388}
389
390/*
391 * This is the protocol drain routine.
392 *
393 * No locks should be held when this is called.  The drain routines have to
394 * presently acquire some locks which raises the possibility of lock order
395 * reversal.
396 */
397static void
398mb_reclaim(void *junk)
399{
400	struct domain *dp;
401	struct protosw *pr;
402
403	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
404	    "mb_reclaim()");
405
406	mbstat.m_drain++;
407	for (dp = domains; dp != NULL; dp = dp->dom_next)
408		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
409			if (pr->pr_drain != NULL)
410				(*pr->pr_drain)();
411}
412