Deleted Added
full compact
kern_mbuf.c (243999) kern_mbuf.c (245575)
1/*-
2 * Copyright (c) 2004, 2005,
3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004, 2005,
3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 243999 2012-12-07 22:30:30Z pjd $");
29__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 245575 2013-01-17 21:28:31Z andre $");
30
31#include "opt_param.h"
32
33#include <sys/param.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/domain.h>
38#include <sys/eventhandler.h>
39#include <sys/kernel.h>
40#include <sys/protosw.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43
44#include <security/mac/mac_framework.h>
45
46#include <vm/vm.h>
47#include <vm/vm_extern.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_page.h>
30
31#include "opt_param.h"
32
33#include <sys/param.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/domain.h>
38#include <sys/eventhandler.h>
39#include <sys/kernel.h>
40#include <sys/protosw.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43
44#include <security/mac/mac_framework.h>
45
46#include <vm/vm.h>
47#include <vm/vm_extern.h>
48#include <vm/vm_kern.h>
49#include <vm/vm_page.h>
50#include <vm/vm_map.h>
50#include <vm/uma.h>
51#include <vm/uma_int.h>
52#include <vm/uma_dbg.h>
53
54/*
55 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
56 * Zones.
57 *
58 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
59 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the
60 * administrator so desires.
61 *
62 * Mbufs are allocated from a UMA Master Zone called the Mbuf
63 * Zone.
64 *
65 * Additionally, FreeBSD provides a Packet Zone, which it
66 * configures as a Secondary Zone to the Mbuf Master Zone,
67 * thus sharing backend Slab kegs with the Mbuf Master Zone.
68 *
69 * Thus common-case allocations and locking are simplified:
70 *
71 * m_clget() m_getcl()
72 * | |
73 * | .------------>[(Packet Cache)] m_get(), m_gethdr()
74 * | | [ Packet ] |
75 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ]
76 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
77 * | \________ |
78 * [ Cluster Keg ] \ /
79 * | [ Mbuf Keg ]
80 * [ Cluster Slabs ] |
81 * | [ Mbuf Slabs ]
82 * \____________(VM)_________________/
83 *
84 *
85 * Whenever an object is allocated with uma_zalloc() out of
86 * one of the Zones its _ctor_ function is executed. The same
87 * for any deallocation through uma_zfree() the _dtor_ function
88 * is executed.
89 *
90 * Caches are per-CPU and are filled from the Master Zone.
91 *
92 * Whenever an object is allocated from the underlying global
93 * memory pool it gets pre-initialized with the _zinit_ functions.
94 * When the Keg's are overfull objects get decomissioned with
95 * _zfini_ functions and free'd back to the global memory pool.
96 *
97 */
98
99int nmbufs; /* limits number of mbufs */
100int nmbclusters; /* limits number of mbuf clusters */
101int nmbjumbop; /* limits number of page size jumbo clusters */
102int nmbjumbo9; /* limits number of 9k jumbo clusters */
103int nmbjumbo16; /* limits number of 16k jumbo clusters */
104struct mbstat mbstat;
105
106/*
51#include <vm/uma.h>
52#include <vm/uma_int.h>
53#include <vm/uma_dbg.h>
54
55/*
56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
57 * Zones.
58 *
59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
60 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the
61 * administrator so desires.
62 *
63 * Mbufs are allocated from a UMA Master Zone called the Mbuf
64 * Zone.
65 *
66 * Additionally, FreeBSD provides a Packet Zone, which it
67 * configures as a Secondary Zone to the Mbuf Master Zone,
68 * thus sharing backend Slab kegs with the Mbuf Master Zone.
69 *
70 * Thus common-case allocations and locking are simplified:
71 *
72 * m_clget() m_getcl()
73 * | |
74 * | .------------>[(Packet Cache)] m_get(), m_gethdr()
75 * | | [ Packet ] |
76 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ]
77 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
78 * | \________ |
79 * [ Cluster Keg ] \ /
80 * | [ Mbuf Keg ]
81 * [ Cluster Slabs ] |
82 * | [ Mbuf Slabs ]
83 * \____________(VM)_________________/
84 *
85 *
86 * Whenever an object is allocated with uma_zalloc() out of
87 * one of the Zones its _ctor_ function is executed. The same
88 * for any deallocation through uma_zfree() the _dtor_ function
89 * is executed.
90 *
91 * Caches are per-CPU and are filled from the Master Zone.
92 *
93 * Whenever an object is allocated from the underlying global
94 * memory pool it gets pre-initialized with the _zinit_ functions.
95 * When the Keg's are overfull objects get decomissioned with
96 * _zfini_ functions and free'd back to the global memory pool.
97 *
98 */
99
100int nmbufs; /* limits number of mbufs */
101int nmbclusters; /* limits number of mbuf clusters */
102int nmbjumbop; /* limits number of page size jumbo clusters */
103int nmbjumbo9; /* limits number of 9k jumbo clusters */
104int nmbjumbo16; /* limits number of 16k jumbo clusters */
105struct mbstat mbstat;
106
107/*
107 * tunable_mbinit() has to be run before init_maxsockets() thus
108 * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
109 * runs at SI_ORDER_ANY.
110 *
111 * NB: This has to be done before VM init.
108 * tunable_mbinit() has to be run before any mbuf allocations are done.
112 */
113static void
114tunable_mbinit(void *dummy)
115{
109 */
110static void
111tunable_mbinit(void *dummy)
112{
113 quad_t realmem, maxmbufmem;
116
114
115 /*
116 * The default limit for all mbuf related memory is 1/2 of all
117 * available kernel memory (physical or kmem).
118 * At most it can be 3/4 of available kernel memory.
119 */
120 realmem = qmin((quad_t)physmem * PAGE_SIZE,
121 vm_map_max(kernel_map) - vm_map_min(kernel_map));
122 maxmbufmem = realmem / 2;
123 TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
124 if (maxmbufmem > realmem / 4 * 3)
125 maxmbufmem = realmem / 4 * 3;
126
117 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
118 if (nmbclusters == 0)
119 nmbclusters = maxmbufmem / MCLBYTES / 4;
120
121 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
122 if (nmbjumbop == 0)
123 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
124
125 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
126 if (nmbjumbo9 == 0)
127 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
128
129 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
130 if (nmbjumbo16 == 0)
131 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
132
133 /*
134 * We need at least as many mbufs as we have clusters of
135 * the various types added together.
136 */
137 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
138 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
139 nmbufs = lmax(maxmbufmem / MSIZE / 5,
140 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
141}
127 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
128 if (nmbclusters == 0)
129 nmbclusters = maxmbufmem / MCLBYTES / 4;
130
131 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
132 if (nmbjumbop == 0)
133 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
134
135 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
136 if (nmbjumbo9 == 0)
137 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
138
139 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
140 if (nmbjumbo16 == 0)
141 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
142
143 /*
144 * We need at least as many mbufs as we have clusters of
145 * the various types added together.
146 */
147 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
148 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
149 nmbufs = lmax(maxmbufmem / MSIZE / 5,
150 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
151}
142SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
152SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
143
144static int
145sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
146{
147 int error, newnmbclusters;
148
149 newnmbclusters = nmbclusters;
150 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
151 if (error == 0 && req->newptr) {
152 if (newnmbclusters > nmbclusters &&
153 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
154 nmbclusters = newnmbclusters;
155 uma_zone_set_max(zone_clust, nmbclusters);
156 nmbclusters = uma_zone_get_max(zone_clust);
157 EVENTHANDLER_INVOKE(nmbclusters_change);
158 } else
159 error = EINVAL;
160 }
161 return (error);
162}
163SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
164&nmbclusters, 0, sysctl_nmbclusters, "IU",
165 "Maximum number of mbuf clusters allowed");
166
167static int
168sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
169{
170 int error, newnmbjumbop;
171
172 newnmbjumbop = nmbjumbop;
173 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
174 if (error == 0 && req->newptr) {
175 if (newnmbjumbop > nmbjumbop &&
176 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
177 nmbjumbop = newnmbjumbop;
178 uma_zone_set_max(zone_jumbop, nmbjumbop);
179 nmbjumbop = uma_zone_get_max(zone_jumbop);
180 } else
181 error = EINVAL;
182 }
183 return (error);
184}
185SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
186&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
187 "Maximum number of mbuf page size jumbo clusters allowed");
188
189static int
190sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
191{
192 int error, newnmbjumbo9;
193
194 newnmbjumbo9 = nmbjumbo9;
195 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
196 if (error == 0 && req->newptr) {
197 if (newnmbjumbo9 > nmbjumbo9&&
198 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
199 nmbjumbo9 = newnmbjumbo9;
200 uma_zone_set_max(zone_jumbo9, nmbjumbo9);
201 nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
202 } else
203 error = EINVAL;
204 }
205 return (error);
206}
207SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
208&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
209 "Maximum number of mbuf 9k jumbo clusters allowed");
210
211static int
212sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
213{
214 int error, newnmbjumbo16;
215
216 newnmbjumbo16 = nmbjumbo16;
217 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
218 if (error == 0 && req->newptr) {
219 if (newnmbjumbo16 > nmbjumbo16 &&
220 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
221 nmbjumbo16 = newnmbjumbo16;
222 uma_zone_set_max(zone_jumbo16, nmbjumbo16);
223 nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
224 } else
225 error = EINVAL;
226 }
227 return (error);
228}
229SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
230&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
231 "Maximum number of mbuf 16k jumbo clusters allowed");
232
233static int
234sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
235{
236 int error, newnmbufs;
237
238 newnmbufs = nmbufs;
239 error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
240 if (error == 0 && req->newptr) {
241 if (newnmbufs > nmbufs) {
242 nmbufs = newnmbufs;
243 uma_zone_set_max(zone_mbuf, nmbufs);
244 nmbufs = uma_zone_get_max(zone_mbuf);
245 EVENTHANDLER_INVOKE(nmbufs_change);
246 } else
247 error = EINVAL;
248 }
249 return (error);
250}
251SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
252&nmbufs, 0, sysctl_nmbufs, "IU",
253 "Maximum number of mbufs allowed");
254
255SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
256 "Mbuf general information and statistics");
257
258/*
259 * Zones from which we allocate.
260 */
261uma_zone_t zone_mbuf;
262uma_zone_t zone_clust;
263uma_zone_t zone_pack;
264uma_zone_t zone_jumbop;
265uma_zone_t zone_jumbo9;
266uma_zone_t zone_jumbo16;
267uma_zone_t zone_ext_refcnt;
268
269/*
270 * Local prototypes.
271 */
272static int mb_ctor_mbuf(void *, int, void *, int);
273static int mb_ctor_clust(void *, int, void *, int);
274static int mb_ctor_pack(void *, int, void *, int);
275static void mb_dtor_mbuf(void *, int, void *);
276static void mb_dtor_clust(void *, int, void *);
277static void mb_dtor_pack(void *, int, void *);
278static int mb_zinit_pack(void *, int, int);
279static void mb_zfini_pack(void *, int);
280
281static void mb_reclaim(void *);
153
154static int
155sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
156{
157 int error, newnmbclusters;
158
159 newnmbclusters = nmbclusters;
160 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
161 if (error == 0 && req->newptr) {
162 if (newnmbclusters > nmbclusters &&
163 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
164 nmbclusters = newnmbclusters;
165 uma_zone_set_max(zone_clust, nmbclusters);
166 nmbclusters = uma_zone_get_max(zone_clust);
167 EVENTHANDLER_INVOKE(nmbclusters_change);
168 } else
169 error = EINVAL;
170 }
171 return (error);
172}
173SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
174&nmbclusters, 0, sysctl_nmbclusters, "IU",
175 "Maximum number of mbuf clusters allowed");
176
177static int
178sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
179{
180 int error, newnmbjumbop;
181
182 newnmbjumbop = nmbjumbop;
183 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
184 if (error == 0 && req->newptr) {
185 if (newnmbjumbop > nmbjumbop &&
186 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
187 nmbjumbop = newnmbjumbop;
188 uma_zone_set_max(zone_jumbop, nmbjumbop);
189 nmbjumbop = uma_zone_get_max(zone_jumbop);
190 } else
191 error = EINVAL;
192 }
193 return (error);
194}
195SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
196&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
197 "Maximum number of mbuf page size jumbo clusters allowed");
198
199static int
200sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
201{
202 int error, newnmbjumbo9;
203
204 newnmbjumbo9 = nmbjumbo9;
205 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
206 if (error == 0 && req->newptr) {
207 if (newnmbjumbo9 > nmbjumbo9&&
208 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
209 nmbjumbo9 = newnmbjumbo9;
210 uma_zone_set_max(zone_jumbo9, nmbjumbo9);
211 nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
212 } else
213 error = EINVAL;
214 }
215 return (error);
216}
217SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
218&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
219 "Maximum number of mbuf 9k jumbo clusters allowed");
220
221static int
222sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
223{
224 int error, newnmbjumbo16;
225
226 newnmbjumbo16 = nmbjumbo16;
227 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
228 if (error == 0 && req->newptr) {
229 if (newnmbjumbo16 > nmbjumbo16 &&
230 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
231 nmbjumbo16 = newnmbjumbo16;
232 uma_zone_set_max(zone_jumbo16, nmbjumbo16);
233 nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
234 } else
235 error = EINVAL;
236 }
237 return (error);
238}
239SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
240&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
241 "Maximum number of mbuf 16k jumbo clusters allowed");
242
243static int
244sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
245{
246 int error, newnmbufs;
247
248 newnmbufs = nmbufs;
249 error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
250 if (error == 0 && req->newptr) {
251 if (newnmbufs > nmbufs) {
252 nmbufs = newnmbufs;
253 uma_zone_set_max(zone_mbuf, nmbufs);
254 nmbufs = uma_zone_get_max(zone_mbuf);
255 EVENTHANDLER_INVOKE(nmbufs_change);
256 } else
257 error = EINVAL;
258 }
259 return (error);
260}
261SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
262&nmbufs, 0, sysctl_nmbufs, "IU",
263 "Maximum number of mbufs allowed");
264
265SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
266 "Mbuf general information and statistics");
267
268/*
269 * Zones from which we allocate.
270 */
271uma_zone_t zone_mbuf;
272uma_zone_t zone_clust;
273uma_zone_t zone_pack;
274uma_zone_t zone_jumbop;
275uma_zone_t zone_jumbo9;
276uma_zone_t zone_jumbo16;
277uma_zone_t zone_ext_refcnt;
278
279/*
280 * Local prototypes.
281 */
282static int mb_ctor_mbuf(void *, int, void *, int);
283static int mb_ctor_clust(void *, int, void *, int);
284static int mb_ctor_pack(void *, int, void *, int);
285static void mb_dtor_mbuf(void *, int, void *);
286static void mb_dtor_clust(void *, int, void *);
287static void mb_dtor_pack(void *, int, void *);
288static int mb_zinit_pack(void *, int, int);
289static void mb_zfini_pack(void *, int);
290
291static void mb_reclaim(void *);
282static void mbuf_init(void *);
283static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
284
292static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
293
285/* Ensure that MSIZE must be a power of 2. */
294/* Ensure that MSIZE is a power of 2. */
286CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
287
288/*
289 * Initialize FreeBSD Network buffer allocation.
290 */
295CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
296
297/*
298 * Initialize FreeBSD Network buffer allocation.
299 */
291SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
292static void
293mbuf_init(void *dummy)
294{
295
296 /*
297 * Configure UMA zones for Mbufs, Clusters, and Packets.
298 */
299 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
300 mb_ctor_mbuf, mb_dtor_mbuf,
301#ifdef INVARIANTS
302 trash_init, trash_fini,
303#else
304 NULL, NULL,
305#endif
306 MSIZE - 1, UMA_ZONE_MAXBUCKET);
307 if (nmbufs > 0)
308 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
309 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
310
311 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
312 mb_ctor_clust, mb_dtor_clust,
313#ifdef INVARIANTS
314 trash_init, trash_fini,
315#else
316 NULL, NULL,
317#endif
318 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
319 if (nmbclusters > 0)
320 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
321 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
322
323 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
324 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
325
326 /* Make jumbo frame zone too. Page size, 9k and 16k. */
327 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
328 mb_ctor_clust, mb_dtor_clust,
329#ifdef INVARIANTS
330 trash_init, trash_fini,
331#else
332 NULL, NULL,
333#endif
334 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
335 if (nmbjumbop > 0)
336 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
337 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
338
339 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
340 mb_ctor_clust, mb_dtor_clust,
341#ifdef INVARIANTS
342 trash_init, trash_fini,
343#else
344 NULL, NULL,
345#endif
346 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
347 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
348 if (nmbjumbo9 > 0)
349 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
350 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
351
352 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
353 mb_ctor_clust, mb_dtor_clust,
354#ifdef INVARIANTS
355 trash_init, trash_fini,
356#else
357 NULL, NULL,
358#endif
359 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
360 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
361 if (nmbjumbo16 > 0)
362 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
363 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
364
365 zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
366 NULL, NULL,
367 NULL, NULL,
368 UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
369
370 /* uma_prealloc() goes here... */
371
372 /*
373 * Hook event handler for low-memory situation, used to
374 * drain protocols and push data back to the caches (UMA
375 * later pushes it back to VM).
376 */
377 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
378 EVENTHANDLER_PRI_FIRST);
379
380 /*
381 * [Re]set counters and local statistics knobs.
382 * XXX Some of these should go and be replaced, but UMA stat
383 * gathering needs to be revised.
384 */
385 mbstat.m_mbufs = 0;
386 mbstat.m_mclusts = 0;
387 mbstat.m_drain = 0;
388 mbstat.m_msize = MSIZE;
389 mbstat.m_mclbytes = MCLBYTES;
390 mbstat.m_minclsize = MINCLSIZE;
391 mbstat.m_mlen = MLEN;
392 mbstat.m_mhlen = MHLEN;
393 mbstat.m_numtypes = MT_NTYPES;
394
395 mbstat.m_mcfail = mbstat.m_mpfail = 0;
396 mbstat.sf_iocnt = 0;
397 mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
398}
300static void
301mbuf_init(void *dummy)
302{
303
304 /*
305 * Configure UMA zones for Mbufs, Clusters, and Packets.
306 */
307 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
308 mb_ctor_mbuf, mb_dtor_mbuf,
309#ifdef INVARIANTS
310 trash_init, trash_fini,
311#else
312 NULL, NULL,
313#endif
314 MSIZE - 1, UMA_ZONE_MAXBUCKET);
315 if (nmbufs > 0)
316 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
317 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
318
319 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
320 mb_ctor_clust, mb_dtor_clust,
321#ifdef INVARIANTS
322 trash_init, trash_fini,
323#else
324 NULL, NULL,
325#endif
326 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
327 if (nmbclusters > 0)
328 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
329 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
330
331 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
332 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
333
334 /* Make jumbo frame zone too. Page size, 9k and 16k. */
335 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
336 mb_ctor_clust, mb_dtor_clust,
337#ifdef INVARIANTS
338 trash_init, trash_fini,
339#else
340 NULL, NULL,
341#endif
342 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
343 if (nmbjumbop > 0)
344 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
345 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
346
347 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
348 mb_ctor_clust, mb_dtor_clust,
349#ifdef INVARIANTS
350 trash_init, trash_fini,
351#else
352 NULL, NULL,
353#endif
354 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
355 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
356 if (nmbjumbo9 > 0)
357 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
358 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
359
360 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
361 mb_ctor_clust, mb_dtor_clust,
362#ifdef INVARIANTS
363 trash_init, trash_fini,
364#else
365 NULL, NULL,
366#endif
367 UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
368 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
369 if (nmbjumbo16 > 0)
370 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
371 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
372
373 zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
374 NULL, NULL,
375 NULL, NULL,
376 UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
377
378 /* uma_prealloc() goes here... */
379
380 /*
381 * Hook event handler for low-memory situation, used to
382 * drain protocols and push data back to the caches (UMA
383 * later pushes it back to VM).
384 */
385 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
386 EVENTHANDLER_PRI_FIRST);
387
388 /*
389 * [Re]set counters and local statistics knobs.
390 * XXX Some of these should go and be replaced, but UMA stat
391 * gathering needs to be revised.
392 */
393 mbstat.m_mbufs = 0;
394 mbstat.m_mclusts = 0;
395 mbstat.m_drain = 0;
396 mbstat.m_msize = MSIZE;
397 mbstat.m_mclbytes = MCLBYTES;
398 mbstat.m_minclsize = MINCLSIZE;
399 mbstat.m_mlen = MLEN;
400 mbstat.m_mhlen = MHLEN;
401 mbstat.m_numtypes = MT_NTYPES;
402
403 mbstat.m_mcfail = mbstat.m_mpfail = 0;
404 mbstat.sf_iocnt = 0;
405 mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
406}
407SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
399
400/*
401 * UMA backend page allocator for the jumbo frame zones.
402 *
403 * Allocates kernel virtual memory that is backed by contiguous physical
404 * pages.
405 */
406static void *
407mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
408{
409
410 /* Inform UMA that this allocator uses kernel_map/object. */
411 *flags = UMA_SLAB_KERNEL;
412 return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
413 (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
414}
415
416/*
417 * Constructor for Mbuf master zone.
418 *
419 * The 'arg' pointer points to a mb_args structure which
420 * contains call-specific information required to support the
421 * mbuf allocation API. See mbuf.h.
422 */
423static int
424mb_ctor_mbuf(void *mem, int size, void *arg, int how)
425{
426 struct mbuf *m;
427 struct mb_args *args;
428#ifdef MAC
429 int error;
430#endif
431 int flags;
432 short type;
433
434#ifdef INVARIANTS
435 trash_ctor(mem, size, arg, how);
436#endif
437 m = (struct mbuf *)mem;
438 args = (struct mb_args *)arg;
439 flags = args->flags;
440 type = args->type;
441
442 /*
443 * The mbuf is initialized later. The caller has the
444 * responsibility to set up any MAC labels too.
445 */
446 if (type == MT_NOINIT)
447 return (0);
448
449 m->m_next = NULL;
450 m->m_nextpkt = NULL;
451 m->m_len = 0;
452 m->m_flags = flags;
453 m->m_type = type;
454 if (flags & M_PKTHDR) {
455 m->m_data = m->m_pktdat;
456 m->m_pkthdr.rcvif = NULL;
457 m->m_pkthdr.header = NULL;
458 m->m_pkthdr.len = 0;
459 m->m_pkthdr.csum_flags = 0;
460 m->m_pkthdr.csum_data = 0;
461 m->m_pkthdr.tso_segsz = 0;
462 m->m_pkthdr.ether_vtag = 0;
463 m->m_pkthdr.flowid = 0;
464 SLIST_INIT(&m->m_pkthdr.tags);
465#ifdef MAC
466 /* If the label init fails, fail the alloc */
467 error = mac_mbuf_init(m, how);
468 if (error)
469 return (error);
470#endif
471 } else
472 m->m_data = m->m_dat;
473 return (0);
474}
475
476/*
477 * The Mbuf master zone destructor.
478 */
479static void
480mb_dtor_mbuf(void *mem, int size, void *arg)
481{
482 struct mbuf *m;
483 unsigned long flags;
484
485 m = (struct mbuf *)mem;
486 flags = (unsigned long)arg;
487
488 if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
489 m_tag_delete_chain(m, NULL);
490 KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
491 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
492#ifdef INVARIANTS
493 trash_dtor(mem, size, arg);
494#endif
495}
496
497/*
498 * The Mbuf Packet zone destructor.
499 */
500static void
501mb_dtor_pack(void *mem, int size, void *arg)
502{
503 struct mbuf *m;
504
505 m = (struct mbuf *)mem;
506 if ((m->m_flags & M_PKTHDR) != 0)
507 m_tag_delete_chain(m, NULL);
508
509 /* Make sure we've got a clean cluster back. */
510 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
511 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
512 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
513 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
514 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
515 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
516 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
517 KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
518#ifdef INVARIANTS
519 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
520#endif
521 /*
522 * If there are processes blocked on zone_clust, waiting for pages
523 * to be freed up, * cause them to be woken up by draining the
524 * packet zone. We are exposed to a race here * (in the check for
525 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
526 * is deliberate. We don't want to acquire the zone lock for every
527 * mbuf free.
528 */
529 if (uma_zone_exhausted_nolock(zone_clust))
530 zone_drain(zone_pack);
531}
532
533/*
534 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
535 *
536 * Here the 'arg' pointer points to the Mbuf which we
537 * are configuring cluster storage for. If 'arg' is
538 * empty we allocate just the cluster without setting
539 * the mbuf to it. See mbuf.h.
540 */
541static int
542mb_ctor_clust(void *mem, int size, void *arg, int how)
543{
544 struct mbuf *m;
545 u_int *refcnt;
546 int type;
547 uma_zone_t zone;
548
549#ifdef INVARIANTS
550 trash_ctor(mem, size, arg, how);
551#endif
552 switch (size) {
553 case MCLBYTES:
554 type = EXT_CLUSTER;
555 zone = zone_clust;
556 break;
557#if MJUMPAGESIZE != MCLBYTES
558 case MJUMPAGESIZE:
559 type = EXT_JUMBOP;
560 zone = zone_jumbop;
561 break;
562#endif
563 case MJUM9BYTES:
564 type = EXT_JUMBO9;
565 zone = zone_jumbo9;
566 break;
567 case MJUM16BYTES:
568 type = EXT_JUMBO16;
569 zone = zone_jumbo16;
570 break;
571 default:
572 panic("unknown cluster size");
573 break;
574 }
575
576 m = (struct mbuf *)arg;
577 refcnt = uma_find_refcnt(zone, mem);
578 *refcnt = 1;
579 if (m != NULL) {
580 m->m_ext.ext_buf = (caddr_t)mem;
581 m->m_data = m->m_ext.ext_buf;
582 m->m_flags |= M_EXT;
583 m->m_ext.ext_free = NULL;
584 m->m_ext.ext_arg1 = NULL;
585 m->m_ext.ext_arg2 = NULL;
586 m->m_ext.ext_size = size;
587 m->m_ext.ext_type = type;
588 m->m_ext.ref_cnt = refcnt;
589 }
590
591 return (0);
592}
593
594/*
595 * The Mbuf Cluster zone destructor.
596 */
597static void
598mb_dtor_clust(void *mem, int size, void *arg)
599{
600#ifdef INVARIANTS
601 uma_zone_t zone;
602
603 zone = m_getzone(size);
604 KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
605 ("%s: refcnt incorrect %u", __func__,
606 *(uma_find_refcnt(zone, mem))) );
607
608 trash_dtor(mem, size, arg);
609#endif
610}
611
612/*
613 * The Packet secondary zone's init routine, executed on the
614 * object's transition from mbuf keg slab to zone cache.
615 */
616static int
617mb_zinit_pack(void *mem, int size, int how)
618{
619 struct mbuf *m;
620
621 m = (struct mbuf *)mem; /* m is virgin. */
622 if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
623 m->m_ext.ext_buf == NULL)
624 return (ENOMEM);
625 m->m_ext.ext_type = EXT_PACKET; /* Override. */
626#ifdef INVARIANTS
627 trash_init(m->m_ext.ext_buf, MCLBYTES, how);
628#endif
629 return (0);
630}
631
632/*
633 * The Packet secondary zone's fini routine, executed on the
634 * object's transition from zone cache to keg slab.
635 */
636static void
637mb_zfini_pack(void *mem, int size)
638{
639 struct mbuf *m;
640
641 m = (struct mbuf *)mem;
642#ifdef INVARIANTS
643 trash_fini(m->m_ext.ext_buf, MCLBYTES);
644#endif
645 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
646#ifdef INVARIANTS
647 trash_dtor(mem, size, NULL);
648#endif
649}
650
651/*
652 * The "packet" keg constructor.
653 */
654static int
655mb_ctor_pack(void *mem, int size, void *arg, int how)
656{
657 struct mbuf *m;
658 struct mb_args *args;
659#ifdef MAC
660 int error;
661#endif
662 int flags;
663 short type;
664
665 m = (struct mbuf *)mem;
666 args = (struct mb_args *)arg;
667 flags = args->flags;
668 type = args->type;
669
670#ifdef INVARIANTS
671 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
672#endif
673 m->m_next = NULL;
674 m->m_nextpkt = NULL;
675 m->m_data = m->m_ext.ext_buf;
676 m->m_len = 0;
677 m->m_flags = (flags | M_EXT);
678 m->m_type = type;
679
680 if (flags & M_PKTHDR) {
681 m->m_pkthdr.rcvif = NULL;
682 m->m_pkthdr.len = 0;
683 m->m_pkthdr.header = NULL;
684 m->m_pkthdr.csum_flags = 0;
685 m->m_pkthdr.csum_data = 0;
686 m->m_pkthdr.tso_segsz = 0;
687 m->m_pkthdr.ether_vtag = 0;
688 m->m_pkthdr.flowid = 0;
689 SLIST_INIT(&m->m_pkthdr.tags);
690#ifdef MAC
691 /* If the label init fails, fail the alloc */
692 error = mac_mbuf_init(m, how);
693 if (error)
694 return (error);
695#endif
696 }
697 /* m_ext is already initialized. */
698
699 return (0);
700}
701
702int
703m_pkthdr_init(struct mbuf *m, int how)
704{
705#ifdef MAC
706 int error;
707#endif
708 m->m_data = m->m_pktdat;
709 SLIST_INIT(&m->m_pkthdr.tags);
710 m->m_pkthdr.rcvif = NULL;
711 m->m_pkthdr.header = NULL;
712 m->m_pkthdr.len = 0;
713 m->m_pkthdr.flowid = 0;
714 m->m_pkthdr.csum_flags = 0;
715 m->m_pkthdr.csum_data = 0;
716 m->m_pkthdr.tso_segsz = 0;
717 m->m_pkthdr.ether_vtag = 0;
718#ifdef MAC
719 /* If the label init fails, fail the alloc */
720 error = mac_mbuf_init(m, how);
721 if (error)
722 return (error);
723#endif
724
725 return (0);
726}
727
728/*
729 * This is the protocol drain routine.
730 *
731 * No locks should be held when this is called. The drain routines have to
732 * presently acquire some locks which raises the possibility of lock order
733 * reversal.
734 */
735static void
736mb_reclaim(void *junk)
737{
738 struct domain *dp;
739 struct protosw *pr;
740
741 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
742 "mb_reclaim()");
743
744 for (dp = domains; dp != NULL; dp = dp->dom_next)
745 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
746 if (pr->pr_drain != NULL)
747 (*pr->pr_drain)();
748}
408
409/*
410 * UMA backend page allocator for the jumbo frame zones.
411 *
412 * Allocates kernel virtual memory that is backed by contiguous physical
413 * pages.
414 */
415static void *
416mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
417{
418
419 /* Inform UMA that this allocator uses kernel_map/object. */
420 *flags = UMA_SLAB_KERNEL;
421 return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
422 (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
423}
424
425/*
426 * Constructor for Mbuf master zone.
427 *
428 * The 'arg' pointer points to a mb_args structure which
429 * contains call-specific information required to support the
430 * mbuf allocation API. See mbuf.h.
431 */
432static int
433mb_ctor_mbuf(void *mem, int size, void *arg, int how)
434{
435 struct mbuf *m;
436 struct mb_args *args;
437#ifdef MAC
438 int error;
439#endif
440 int flags;
441 short type;
442
443#ifdef INVARIANTS
444 trash_ctor(mem, size, arg, how);
445#endif
446 m = (struct mbuf *)mem;
447 args = (struct mb_args *)arg;
448 flags = args->flags;
449 type = args->type;
450
451 /*
452 * The mbuf is initialized later. The caller has the
453 * responsibility to set up any MAC labels too.
454 */
455 if (type == MT_NOINIT)
456 return (0);
457
458 m->m_next = NULL;
459 m->m_nextpkt = NULL;
460 m->m_len = 0;
461 m->m_flags = flags;
462 m->m_type = type;
463 if (flags & M_PKTHDR) {
464 m->m_data = m->m_pktdat;
465 m->m_pkthdr.rcvif = NULL;
466 m->m_pkthdr.header = NULL;
467 m->m_pkthdr.len = 0;
468 m->m_pkthdr.csum_flags = 0;
469 m->m_pkthdr.csum_data = 0;
470 m->m_pkthdr.tso_segsz = 0;
471 m->m_pkthdr.ether_vtag = 0;
472 m->m_pkthdr.flowid = 0;
473 SLIST_INIT(&m->m_pkthdr.tags);
474#ifdef MAC
475 /* If the label init fails, fail the alloc */
476 error = mac_mbuf_init(m, how);
477 if (error)
478 return (error);
479#endif
480 } else
481 m->m_data = m->m_dat;
482 return (0);
483}
484
485/*
486 * The Mbuf master zone destructor.
487 */
488static void
489mb_dtor_mbuf(void *mem, int size, void *arg)
490{
491 struct mbuf *m;
492 unsigned long flags;
493
494 m = (struct mbuf *)mem;
495 flags = (unsigned long)arg;
496
497 if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
498 m_tag_delete_chain(m, NULL);
499 KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
500 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
501#ifdef INVARIANTS
502 trash_dtor(mem, size, arg);
503#endif
504}
505
506/*
507 * The Mbuf Packet zone destructor.
508 */
509static void
510mb_dtor_pack(void *mem, int size, void *arg)
511{
512 struct mbuf *m;
513
514 m = (struct mbuf *)mem;
515 if ((m->m_flags & M_PKTHDR) != 0)
516 m_tag_delete_chain(m, NULL);
517
518 /* Make sure we've got a clean cluster back. */
519 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
520 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
521 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
522 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
523 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
524 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
525 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
526 KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
527#ifdef INVARIANTS
528 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
529#endif
530 /*
531 * If there are processes blocked on zone_clust, waiting for pages
532 * to be freed up, * cause them to be woken up by draining the
533 * packet zone. We are exposed to a race here * (in the check for
534 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
535 * is deliberate. We don't want to acquire the zone lock for every
536 * mbuf free.
537 */
538 if (uma_zone_exhausted_nolock(zone_clust))
539 zone_drain(zone_pack);
540}
541
542/*
543 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
544 *
545 * Here the 'arg' pointer points to the Mbuf which we
546 * are configuring cluster storage for. If 'arg' is
547 * empty we allocate just the cluster without setting
548 * the mbuf to it. See mbuf.h.
549 */
550static int
551mb_ctor_clust(void *mem, int size, void *arg, int how)
552{
553 struct mbuf *m;
554 u_int *refcnt;
555 int type;
556 uma_zone_t zone;
557
558#ifdef INVARIANTS
559 trash_ctor(mem, size, arg, how);
560#endif
561 switch (size) {
562 case MCLBYTES:
563 type = EXT_CLUSTER;
564 zone = zone_clust;
565 break;
566#if MJUMPAGESIZE != MCLBYTES
567 case MJUMPAGESIZE:
568 type = EXT_JUMBOP;
569 zone = zone_jumbop;
570 break;
571#endif
572 case MJUM9BYTES:
573 type = EXT_JUMBO9;
574 zone = zone_jumbo9;
575 break;
576 case MJUM16BYTES:
577 type = EXT_JUMBO16;
578 zone = zone_jumbo16;
579 break;
580 default:
581 panic("unknown cluster size");
582 break;
583 }
584
585 m = (struct mbuf *)arg;
586 refcnt = uma_find_refcnt(zone, mem);
587 *refcnt = 1;
588 if (m != NULL) {
589 m->m_ext.ext_buf = (caddr_t)mem;
590 m->m_data = m->m_ext.ext_buf;
591 m->m_flags |= M_EXT;
592 m->m_ext.ext_free = NULL;
593 m->m_ext.ext_arg1 = NULL;
594 m->m_ext.ext_arg2 = NULL;
595 m->m_ext.ext_size = size;
596 m->m_ext.ext_type = type;
597 m->m_ext.ref_cnt = refcnt;
598 }
599
600 return (0);
601}
602
603/*
604 * The Mbuf Cluster zone destructor.
605 */
606static void
607mb_dtor_clust(void *mem, int size, void *arg)
608{
609#ifdef INVARIANTS
610 uma_zone_t zone;
611
612 zone = m_getzone(size);
613 KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
614 ("%s: refcnt incorrect %u", __func__,
615 *(uma_find_refcnt(zone, mem))) );
616
617 trash_dtor(mem, size, arg);
618#endif
619}
620
621/*
622 * The Packet secondary zone's init routine, executed on the
623 * object's transition from mbuf keg slab to zone cache.
624 */
625static int
626mb_zinit_pack(void *mem, int size, int how)
627{
628 struct mbuf *m;
629
630 m = (struct mbuf *)mem; /* m is virgin. */
631 if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
632 m->m_ext.ext_buf == NULL)
633 return (ENOMEM);
634 m->m_ext.ext_type = EXT_PACKET; /* Override. */
635#ifdef INVARIANTS
636 trash_init(m->m_ext.ext_buf, MCLBYTES, how);
637#endif
638 return (0);
639}
640
641/*
642 * The Packet secondary zone's fini routine, executed on the
643 * object's transition from zone cache to keg slab.
644 */
645static void
646mb_zfini_pack(void *mem, int size)
647{
648 struct mbuf *m;
649
650 m = (struct mbuf *)mem;
651#ifdef INVARIANTS
652 trash_fini(m->m_ext.ext_buf, MCLBYTES);
653#endif
654 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
655#ifdef INVARIANTS
656 trash_dtor(mem, size, NULL);
657#endif
658}
659
660/*
661 * The "packet" keg constructor.
662 */
663static int
664mb_ctor_pack(void *mem, int size, void *arg, int how)
665{
666 struct mbuf *m;
667 struct mb_args *args;
668#ifdef MAC
669 int error;
670#endif
671 int flags;
672 short type;
673
674 m = (struct mbuf *)mem;
675 args = (struct mb_args *)arg;
676 flags = args->flags;
677 type = args->type;
678
679#ifdef INVARIANTS
680 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
681#endif
682 m->m_next = NULL;
683 m->m_nextpkt = NULL;
684 m->m_data = m->m_ext.ext_buf;
685 m->m_len = 0;
686 m->m_flags = (flags | M_EXT);
687 m->m_type = type;
688
689 if (flags & M_PKTHDR) {
690 m->m_pkthdr.rcvif = NULL;
691 m->m_pkthdr.len = 0;
692 m->m_pkthdr.header = NULL;
693 m->m_pkthdr.csum_flags = 0;
694 m->m_pkthdr.csum_data = 0;
695 m->m_pkthdr.tso_segsz = 0;
696 m->m_pkthdr.ether_vtag = 0;
697 m->m_pkthdr.flowid = 0;
698 SLIST_INIT(&m->m_pkthdr.tags);
699#ifdef MAC
700 /* If the label init fails, fail the alloc */
701 error = mac_mbuf_init(m, how);
702 if (error)
703 return (error);
704#endif
705 }
706 /* m_ext is already initialized. */
707
708 return (0);
709}
710
711int
712m_pkthdr_init(struct mbuf *m, int how)
713{
714#ifdef MAC
715 int error;
716#endif
717 m->m_data = m->m_pktdat;
718 SLIST_INIT(&m->m_pkthdr.tags);
719 m->m_pkthdr.rcvif = NULL;
720 m->m_pkthdr.header = NULL;
721 m->m_pkthdr.len = 0;
722 m->m_pkthdr.flowid = 0;
723 m->m_pkthdr.csum_flags = 0;
724 m->m_pkthdr.csum_data = 0;
725 m->m_pkthdr.tso_segsz = 0;
726 m->m_pkthdr.ether_vtag = 0;
727#ifdef MAC
728 /* If the label init fails, fail the alloc */
729 error = mac_mbuf_init(m, how);
730 if (error)
731 return (error);
732#endif
733
734 return (0);
735}
736
737/*
738 * This is the protocol drain routine.
739 *
740 * No locks should be held when this is called. The drain routines have to
741 * presently acquire some locks which raises the possibility of lock order
742 * reversal.
743 */
744static void
745mb_reclaim(void *junk)
746{
747 struct domain *dp;
748 struct protosw *pr;
749
750 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
751 "mb_reclaim()");
752
753 for (dp = domains; dp != NULL; dp = dp->dom_next)
754 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
755 if (pr->pr_drain != NULL)
756 (*pr->pr_drain)();
757}