Deleted Added
full compact
uipc_mbuf.c (123740) uipc_mbuf.c (123823)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
34 */
35
36#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 123740 2003-12-23 02:36:43Z peter $");
37__FBSDID("$FreeBSD: head/sys/kern/uipc_mbuf.c 123823 2003-12-25 01:17:27Z silby $");
38
39#include "opt_mac.h"
40#include "opt_param.h"
41#include "opt_mbuf_stress_test.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mac.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/sysctl.h>
51#include <sys/domain.h>
52#include <sys/protosw.h>
53
54int max_linkhdr;
55int max_protohdr;
56int max_hdr;
57int max_datalen;
58#ifdef MBUF_STRESS_TEST
59int m_defragpackets;
60int m_defragbytes;
61int m_defraguseless;
62int m_defragfailure;
63int m_defragrandomfailures;
64#endif
65
66/*
67 * sysctl(8) exported objects
68 */
69SYSCTL_DECL(_kern_ipc);
70SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
71 &max_linkhdr, 0, "");
72SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
73 &max_protohdr, 0, "");
74SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
75SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
76 &max_datalen, 0, "");
77#ifdef MBUF_STRESS_TEST
78SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
79 &m_defragpackets, 0, "");
80SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
81 &m_defragbytes, 0, "");
82SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
83 &m_defraguseless, 0, "");
84SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
85 &m_defragfailure, 0, "");
86SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
87 &m_defragrandomfailures, 0, "");
88#endif
89
90/*
91 * "Move" mbuf pkthdr from "from" to "to".
92 * "from" must have M_PKTHDR set, and "to" must be empty.
93 */
94void
95m_move_pkthdr(struct mbuf *to, struct mbuf *from)
96{
97
98#if 0
99 /* see below for why these are not enabled */
100 M_ASSERTPKTHDR(to);
101 /* Note: with MAC, this may not be a good assertion. */
102 KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
103 ("m_move_pkthdr: to has tags"));
104#endif
105 KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
106#ifdef MAC
107 /*
108 * XXXMAC: It could be this should also occur for non-MAC?
109 */
110 if (to->m_flags & M_PKTHDR)
111 m_tag_delete_chain(to, NULL);
112#endif
113 to->m_flags = from->m_flags & M_COPYFLAGS;
114 to->m_data = to->m_pktdat;
115 to->m_pkthdr = from->m_pkthdr; /* especially tags */
116 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
117 from->m_flags &= ~M_PKTHDR;
118}
119
120/*
121 * Duplicate "from"'s mbuf pkthdr in "to".
122 * "from" must have M_PKTHDR set, and "to" must be empty.
123 * In particular, this does a deep copy of the packet tags.
124 */
125int
126m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
127{
128
129#if 0
130 /*
131 * The mbuf allocator only initializes the pkthdr
132 * when the mbuf is allocated with MGETHDR. Many users
133 * (e.g. m_copy*, m_prepend) use MGET and then
134 * smash the pkthdr as needed causing these
135 * assertions to trip. For now just disable them.
136 */
137 M_ASSERTPKTHDR(to);
138 /* Note: with MAC, this may not be a good assertion. */
139 KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
140#endif
141#ifdef MAC
142 if (to->m_flags & M_PKTHDR)
143 m_tag_delete_chain(to, NULL);
144#endif
145 to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
146 if ((to->m_flags & M_EXT) == 0)
147 to->m_data = to->m_pktdat;
148 to->m_pkthdr = from->m_pkthdr;
149 SLIST_INIT(&to->m_pkthdr.tags);
150 return (m_tag_copy_chain(to, from, MBTOM(how)));
151}
152
153/*
154 * Lesser-used path for M_PREPEND:
155 * allocate new mbuf to prepend to chain,
156 * copy junk along.
157 */
158struct mbuf *
159m_prepend(struct mbuf *m, int len, int how)
160{
161 struct mbuf *mn;
162
163 if (m->m_flags & M_PKTHDR)
164 MGETHDR(mn, how, m->m_type);
165 else
166 MGET(mn, how, m->m_type);
167 if (mn == NULL) {
168 m_freem(m);
169 return (NULL);
170 }
171 if (m->m_flags & M_PKTHDR)
172 M_MOVE_PKTHDR(mn, m);
173 mn->m_next = m;
174 m = mn;
175 if (len < MHLEN)
176 MH_ALIGN(m, len);
177 m->m_len = len;
178 return (m);
179}
180
181/*
182 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
183 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
184 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
185 * Note that the copy is read-only, because clusters are not copied,
186 * only their reference counts are incremented.
187 */
188struct mbuf *
189m_copym(struct mbuf *m, int off0, int len, int wait)
190{
191 struct mbuf *n, **np;
192 int off = off0;
193 struct mbuf *top;
194 int copyhdr = 0;
195
196 KASSERT(off >= 0, ("m_copym, negative off %d", off));
197 KASSERT(len >= 0, ("m_copym, negative len %d", len));
198 if (off == 0 && m->m_flags & M_PKTHDR)
199 copyhdr = 1;
200 while (off > 0) {
201 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
202 if (off < m->m_len)
203 break;
204 off -= m->m_len;
205 m = m->m_next;
206 }
207 np = &top;
208 top = 0;
209 while (len > 0) {
210 if (m == NULL) {
211 KASSERT(len == M_COPYALL,
212 ("m_copym, length > size of mbuf chain"));
213 break;
214 }
215 if (copyhdr)
216 MGETHDR(n, wait, m->m_type);
217 else
218 MGET(n, wait, m->m_type);
219 *np = n;
220 if (n == NULL)
221 goto nospace;
222 if (copyhdr) {
223 if (!m_dup_pkthdr(n, m, wait))
224 goto nospace;
225 if (len == M_COPYALL)
226 n->m_pkthdr.len -= off0;
227 else
228 n->m_pkthdr.len = len;
229 copyhdr = 0;
230 }
231 n->m_len = min(len, m->m_len - off);
232 if (m->m_flags & M_EXT) {
233 n->m_data = m->m_data + off;
234 n->m_ext = m->m_ext;
235 n->m_flags |= M_EXT;
236 MEXT_ADD_REF(m);
237 } else
238 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
239 (u_int)n->m_len);
240 if (len != M_COPYALL)
241 len -= n->m_len;
242 off = 0;
243 m = m->m_next;
244 np = &n->m_next;
245 }
246 if (top == NULL)
247 mbstat.m_mcfail++; /* XXX: No consistency. */
248
249 return (top);
250nospace:
251 m_freem(top);
252 mbstat.m_mcfail++; /* XXX: No consistency. */
253 return (NULL);
254}
255
256/*
257 * Copy an entire packet, including header (which must be present).
258 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
259 * Note that the copy is read-only, because clusters are not copied,
260 * only their reference counts are incremented.
261 * Preserve alignment of the first mbuf so if the creator has left
262 * some room at the beginning (e.g. for inserting protocol headers)
263 * the copies still have the room available.
264 */
265struct mbuf *
266m_copypacket(struct mbuf *m, int how)
267{
268 struct mbuf *top, *n, *o;
269
270 MGET(n, how, m->m_type);
271 top = n;
272 if (n == NULL)
273 goto nospace;
274
275 if (!m_dup_pkthdr(n, m, how))
276 goto nospace;
277 n->m_len = m->m_len;
278 if (m->m_flags & M_EXT) {
279 n->m_data = m->m_data;
280 n->m_ext = m->m_ext;
281 n->m_flags |= M_EXT;
282 MEXT_ADD_REF(m);
283 } else {
284 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
285 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
286 }
287
288 m = m->m_next;
289 while (m) {
290 MGET(o, how, m->m_type);
291 if (o == NULL)
292 goto nospace;
293
294 n->m_next = o;
295 n = n->m_next;
296
297 n->m_len = m->m_len;
298 if (m->m_flags & M_EXT) {
299 n->m_data = m->m_data;
300 n->m_ext = m->m_ext;
301 n->m_flags |= M_EXT;
302 MEXT_ADD_REF(m);
303 } else {
304 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
305 }
306
307 m = m->m_next;
308 }
309 return top;
310nospace:
311 m_freem(top);
312 mbstat.m_mcfail++; /* XXX: No consistency. */
313 return (NULL);
314}
315
316/*
317 * Copy data from an mbuf chain starting "off" bytes from the beginning,
318 * continuing for "len" bytes, into the indicated buffer.
319 */
320void
321m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
322{
323 u_int count;
324
325 KASSERT(off >= 0, ("m_copydata, negative off %d", off));
326 KASSERT(len >= 0, ("m_copydata, negative len %d", len));
327 while (off > 0) {
328 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
329 if (off < m->m_len)
330 break;
331 off -= m->m_len;
332 m = m->m_next;
333 }
334 while (len > 0) {
335 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
336 count = min(m->m_len - off, len);
337 bcopy(mtod(m, caddr_t) + off, cp, count);
338 len -= count;
339 cp += count;
340 off = 0;
341 m = m->m_next;
342 }
343}
344
345/*
346 * Copy a packet header mbuf chain into a completely new chain, including
347 * copying any mbuf clusters. Use this instead of m_copypacket() when
348 * you need a writable copy of an mbuf chain.
349 */
350struct mbuf *
351m_dup(struct mbuf *m, int how)
352{
353 struct mbuf **p, *top = NULL;
354 int remain, moff, nsize;
355
356 /* Sanity check */
357 if (m == NULL)
358 return (NULL);
359 M_ASSERTPKTHDR(m);
360
361 /* While there's more data, get a new mbuf, tack it on, and fill it */
362 remain = m->m_pkthdr.len;
363 moff = 0;
364 p = &top;
365 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */
366 struct mbuf *n;
367
368 /* Get the next new mbuf */
369 MGET(n, how, m->m_type);
370 if (n == NULL)
371 goto nospace;
372 if (top == NULL) { /* first one, must be PKTHDR */
373 if (!m_dup_pkthdr(n, m, how))
374 goto nospace;
375 nsize = MHLEN;
376 } else /* not the first one */
377 nsize = MLEN;
378 if (remain >= MINCLSIZE) {
379 MCLGET(n, how);
380 if ((n->m_flags & M_EXT) == 0) {
381 (void)m_free(n);
382 goto nospace;
383 }
384 nsize = MCLBYTES;
385 }
386 n->m_len = 0;
387
388 /* Link it into the new chain */
389 *p = n;
390 p = &n->m_next;
391
392 /* Copy data from original mbuf(s) into new mbuf */
393 while (n->m_len < nsize && m != NULL) {
394 int chunk = min(nsize - n->m_len, m->m_len - moff);
395
396 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
397 moff += chunk;
398 n->m_len += chunk;
399 remain -= chunk;
400 if (moff == m->m_len) {
401 m = m->m_next;
402 moff = 0;
403 }
404 }
405
406 /* Check correct total mbuf length */
407 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
408 ("%s: bogus m_pkthdr.len", __func__));
409 }
410 return (top);
411
412nospace:
413 m_freem(top);
414 mbstat.m_mcfail++; /* XXX: No consistency. */
415 return (NULL);
416}
417
418/*
419 * Concatenate mbuf chain n to m.
420 * Both chains must be of the same type (e.g. MT_DATA).
421 * Any m_pkthdr is not updated.
422 */
423void
424m_cat(struct mbuf *m, struct mbuf *n)
425{
426 while (m->m_next)
427 m = m->m_next;
428 while (n) {
429 if (m->m_flags & M_EXT ||
430 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
431 /* just join the two chains */
432 m->m_next = n;
433 return;
434 }
435 /* splat the data from one into the other */
436 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
437 (u_int)n->m_len);
438 m->m_len += n->m_len;
439 n = m_free(n);
440 }
441}
442
443void
444m_adj(struct mbuf *mp, int req_len)
445{
446 int len = req_len;
447 struct mbuf *m;
448 int count;
449
450 if ((m = mp) == NULL)
451 return;
452 if (len >= 0) {
453 /*
454 * Trim from head.
455 */
456 while (m != NULL && len > 0) {
457 if (m->m_len <= len) {
458 len -= m->m_len;
459 m->m_len = 0;
460 m = m->m_next;
461 } else {
462 m->m_len -= len;
463 m->m_data += len;
464 len = 0;
465 }
466 }
467 m = mp;
468 if (mp->m_flags & M_PKTHDR)
469 m->m_pkthdr.len -= (req_len - len);
470 } else {
471 /*
472 * Trim from tail. Scan the mbuf chain,
473 * calculating its length and finding the last mbuf.
474 * If the adjustment only affects this mbuf, then just
475 * adjust and return. Otherwise, rescan and truncate
476 * after the remaining size.
477 */
478 len = -len;
479 count = 0;
480 for (;;) {
481 count += m->m_len;
482 if (m->m_next == (struct mbuf *)0)
483 break;
484 m = m->m_next;
485 }
486 if (m->m_len >= len) {
487 m->m_len -= len;
488 if (mp->m_flags & M_PKTHDR)
489 mp->m_pkthdr.len -= len;
490 return;
491 }
492 count -= len;
493 if (count < 0)
494 count = 0;
495 /*
496 * Correct length for chain is "count".
497 * Find the mbuf with last data, adjust its length,
498 * and toss data from remaining mbufs on chain.
499 */
500 m = mp;
501 if (m->m_flags & M_PKTHDR)
502 m->m_pkthdr.len = count;
503 for (; m; m = m->m_next) {
504 if (m->m_len >= count) {
505 m->m_len = count;
506 break;
507 }
508 count -= m->m_len;
509 }
510 while (m->m_next)
511 (m = m->m_next) ->m_len = 0;
512 }
513}
514
515/*
516 * Rearange an mbuf chain so that len bytes are contiguous
517 * and in the data area of an mbuf (so that mtod and dtom
518 * will work for a structure of size len). Returns the resulting
519 * mbuf chain on success, frees it and returns null on failure.
520 * If there is room, it will add up to max_protohdr-len extra bytes to the
521 * contiguous region in an attempt to avoid being called next time.
522 */
523struct mbuf *
524m_pullup(struct mbuf *n, int len)
525{
526 struct mbuf *m;
527 int count;
528 int space;
529
530 /*
531 * If first mbuf has no cluster, and has room for len bytes
532 * without shifting current data, pullup into it,
533 * otherwise allocate a new mbuf to prepend to the chain.
534 */
535 if ((n->m_flags & M_EXT) == 0 &&
536 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
537 if (n->m_len >= len)
538 return (n);
539 m = n;
540 n = n->m_next;
541 len -= m->m_len;
542 } else {
543 if (len > MHLEN)
544 goto bad;
545 MGET(m, M_DONTWAIT, n->m_type);
546 if (m == NULL)
547 goto bad;
548 m->m_len = 0;
549 if (n->m_flags & M_PKTHDR)
550 M_MOVE_PKTHDR(m, n);
551 }
552 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
553 do {
554 count = min(min(max(len, max_protohdr), space), n->m_len);
555 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
556 (u_int)count);
557 len -= count;
558 m->m_len += count;
559 n->m_len -= count;
560 space -= count;
561 if (n->m_len)
562 n->m_data += count;
563 else
564 n = m_free(n);
565 } while (len > 0 && n);
566 if (len > 0) {
567 (void) m_free(m);
568 goto bad;
569 }
570 m->m_next = n;
571 return (m);
572bad:
573 m_freem(n);
574 mbstat.m_mpfail++; /* XXX: No consistency. */
575 return (NULL);
576}
577
578/*
579 * Partition an mbuf chain in two pieces, returning the tail --
580 * all but the first len0 bytes. In case of failure, it returns NULL and
581 * attempts to restore the chain to its original state.
582 *
583 * Note that the resulting mbufs might be read-only, because the new
584 * mbuf can end up sharing an mbuf cluster with the original mbuf if
585 * the "breaking point" happens to lie within a cluster mbuf. Use the
586 * M_WRITABLE() macro to check for this case.
587 */
588struct mbuf *
589m_split(struct mbuf *m0, int len0, int wait)
590{
591 struct mbuf *m, *n;
592 u_int len = len0, remain;
593
594 for (m = m0; m && len > m->m_len; m = m->m_next)
595 len -= m->m_len;
596 if (m == NULL)
597 return (NULL);
598 remain = m->m_len - len;
599 if (m0->m_flags & M_PKTHDR) {
600 MGETHDR(n, wait, m0->m_type);
601 if (n == NULL)
602 return (NULL);
603 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
604 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
605 m0->m_pkthdr.len = len0;
606 if (m->m_flags & M_EXT)
607 goto extpacket;
608 if (remain > MHLEN) {
609 /* m can't be the lead packet */
610 MH_ALIGN(n, 0);
611 n->m_next = m_split(m, len, wait);
612 if (n->m_next == NULL) {
613 (void) m_free(n);
614 return (NULL);
615 } else {
616 n->m_len = 0;
617 return (n);
618 }
619 } else
620 MH_ALIGN(n, remain);
621 } else if (remain == 0) {
622 n = m->m_next;
623 m->m_next = NULL;
624 return (n);
625 } else {
626 MGET(n, wait, m->m_type);
627 if (n == NULL)
628 return (NULL);
629 M_ALIGN(n, remain);
630 }
631extpacket:
632 if (m->m_flags & M_EXT) {
633 n->m_flags |= M_EXT;
634 n->m_ext = m->m_ext;
635 MEXT_ADD_REF(m);
636 n->m_data = m->m_data + len;
637 } else {
638 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
639 }
640 n->m_len = remain;
641 m->m_len = len;
642 n->m_next = m->m_next;
643 m->m_next = NULL;
644 return (n);
645}
646/*
647 * Routine to copy from device local memory into mbufs.
648 * Note that `off' argument is offset into first mbuf of target chain from
649 * which to begin copying the data to.
650 */
651struct mbuf *
652m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
653 void (*copy)(char *from, caddr_t to, u_int len))
654{
655 struct mbuf *m;
656 struct mbuf *top = 0, **mp = &top;
657 int len;
658
659 if (off < 0 || off > MHLEN)
660 return (NULL);
661
662 MGETHDR(m, M_DONTWAIT, MT_DATA);
663 if (m == NULL)
664 return (NULL);
665 m->m_pkthdr.rcvif = ifp;
666 m->m_pkthdr.len = totlen;
667 len = MHLEN;
668
669 while (totlen > 0) {
670 if (top) {
671 MGET(m, M_DONTWAIT, MT_DATA);
672 if (m == NULL) {
673 m_freem(top);
674 return (NULL);
675 }
676 len = MLEN;
677 }
678 if (totlen + off >= MINCLSIZE) {
679 MCLGET(m, M_DONTWAIT);
680 if (m->m_flags & M_EXT)
681 len = MCLBYTES;
682 } else {
683 /*
684 * Place initial small packet/header at end of mbuf.
685 */
686 if (top == NULL && totlen + off + max_linkhdr <= len) {
687 m->m_data += max_linkhdr;
688 len -= max_linkhdr;
689 }
690 }
691 if (off) {
692 m->m_data += off;
693 len -= off;
694 off = 0;
695 }
696 m->m_len = len = min(totlen, len);
697 if (copy)
698 copy(buf, mtod(m, caddr_t), (u_int)len);
699 else
700 bcopy(buf, mtod(m, caddr_t), (u_int)len);
701 buf += len;
702 *mp = m;
703 mp = &m->m_next;
704 totlen -= len;
705 }
706 return (top);
707}
708
709/*
710 * Copy data from a buffer back into the indicated mbuf chain,
711 * starting "off" bytes from the beginning, extending the mbuf
712 * chain if necessary.
713 */
714void
715m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
716{
717 int mlen;
718 struct mbuf *m = m0, *n;
719 int totlen = 0;
720
721 if (m0 == NULL)
722 return;
723 while (off > (mlen = m->m_len)) {
724 off -= mlen;
725 totlen += mlen;
726 if (m->m_next == NULL) {
727 n = m_get_clrd(M_DONTWAIT, m->m_type);
728 if (n == NULL)
729 goto out;
730 n->m_len = min(MLEN, len + off);
731 m->m_next = n;
732 }
733 m = m->m_next;
734 }
735 while (len > 0) {
736 mlen = min (m->m_len - off, len);
737 bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
738 cp += mlen;
739 len -= mlen;
740 mlen += off;
741 off = 0;
742 totlen += mlen;
743 if (len == 0)
744 break;
745 if (m->m_next == NULL) {
746 n = m_get(M_DONTWAIT, m->m_type);
747 if (n == NULL)
748 break;
749 n->m_len = min(MLEN, len);
750 m->m_next = n;
751 }
752 m = m->m_next;
753 }
754out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
755 m->m_pkthdr.len = totlen;
756}
757
758/*
759 * Apply function f to the data in an mbuf chain starting "off" bytes from
760 * the beginning, continuing for "len" bytes.
761 */
762int
763m_apply(struct mbuf *m, int off, int len,
764 int (*f)(void *, void *, u_int), void *arg)
765{
766 u_int count;
767 int rval;
768
769 KASSERT(off >= 0, ("m_apply, negative off %d", off));
770 KASSERT(len >= 0, ("m_apply, negative len %d", len));
771 while (off > 0) {
772 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
773 if (off < m->m_len)
774 break;
775 off -= m->m_len;
776 m = m->m_next;
777 }
778 while (len > 0) {
779 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
780 count = min(m->m_len - off, len);
781 rval = (*f)(arg, mtod(m, caddr_t) + off, count);
782 if (rval)
783 return (rval);
784 len -= count;
785 off = 0;
786 m = m->m_next;
787 }
788 return (0);
789}
790
791/*
792 * Return a pointer to mbuf/offset of location in mbuf chain.
793 */
794struct mbuf *
795m_getptr(struct mbuf *m, int loc, int *off)
796{
797
798 while (loc >= 0) {
799 /* Normal end of search. */
800 if (m->m_len > loc) {
801 *off = loc;
802 return (m);
803 } else {
804 loc -= m->m_len;
805 if (m->m_next == NULL) {
806 if (loc == 0) {
807 /* Point at the end of valid data. */
808 *off = m->m_len;
809 return (m);
810 }
811 return (NULL);
812 }
813 m = m->m_next;
814 }
815 }
816 return (NULL);
817}
818
819void
820m_print(const struct mbuf *m)
821{
822 int len;
823 const struct mbuf *m2;
824
825 len = m->m_pkthdr.len;
826 m2 = m;
827 while (len) {
828 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
829 len -= m2->m_len;
830 m2 = m2->m_next;
831 }
832 return;
833}
834
835u_int
836m_fixhdr(struct mbuf *m0)
837{
838 u_int len;
839
840 len = m_length(m0, NULL);
841 m0->m_pkthdr.len = len;
842 return (len);
843}
844
845u_int
846m_length(struct mbuf *m0, struct mbuf **last)
847{
848 struct mbuf *m;
849 u_int len;
850
851 len = 0;
852 for (m = m0; m != NULL; m = m->m_next) {
853 len += m->m_len;
854 if (m->m_next == NULL)
855 break;
856 }
857 if (last != NULL)
858 *last = m;
859 return (len);
860}
861
862/*
863 * Defragment a mbuf chain, returning the shortest possible
864 * chain of mbufs and clusters. If allocation fails and
865 * this cannot be completed, NULL will be returned, but
866 * the passed in chain will be unchanged. Upon success,
867 * the original chain will be freed, and the new chain
868 * will be returned.
869 *
870 * If a non-packet header is passed in, the original
871 * mbuf (chain?) will be returned unharmed.
872 */
873struct mbuf *
874m_defrag(struct mbuf *m0, int how)
875{
876 struct mbuf *m_new = NULL, *m_final = NULL;
877 int progress = 0, length;
878
879 if (!(m0->m_flags & M_PKTHDR))
880 return (m0);
881
882 m_fixhdr(m0); /* Needed sanity check */
883
884#ifdef MBUF_STRESS_TEST
885 if (m_defragrandomfailures) {
886 int temp = arc4random() & 0xff;
887 if (temp == 0xba)
888 goto nospace;
889 }
890#endif
891
892 if (m0->m_pkthdr.len > MHLEN)
893 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
894 else
895 m_final = m_gethdr(how, MT_DATA);
896
897 if (m_final == NULL)
898 goto nospace;
899
900 if (m_dup_pkthdr(m_final, m0, how) == 0)
901 goto nospace;
902
903 m_new = m_final;
904
905 while (progress < m0->m_pkthdr.len) {
906 length = m0->m_pkthdr.len - progress;
907 if (length > MCLBYTES)
908 length = MCLBYTES;
909
910 if (m_new == NULL) {
911 if (length > MLEN)
912 m_new = m_getcl(how, MT_DATA, 0);
913 else
914 m_new = m_get(how, MT_DATA);
915 if (m_new == NULL)
916 goto nospace;
917 }
918
919 m_copydata(m0, progress, length, mtod(m_new, caddr_t));
920 progress += length;
921 m_new->m_len = length;
922 if (m_new != m_final)
923 m_cat(m_final, m_new);
924 m_new = NULL;
925 }
926#ifdef MBUF_STRESS_TEST
927 if (m0->m_next == NULL)
928 m_defraguseless++;
929#endif
930 m_freem(m0);
931 m0 = m_final;
932#ifdef MBUF_STRESS_TEST
933 m_defragpackets++;
934 m_defragbytes += m0->m_pkthdr.len;
935#endif
936 return (m0);
937nospace:
938#ifdef MBUF_STRESS_TEST
939 m_defragfailure++;
940#endif
941 if (m_new)
942 m_free(m_new);
943 if (m_final)
944 m_freem(m_final);
945 return (NULL);
946}
947
948#ifdef MBUF_STRESS_TEST
949
950/*
951 * Fragment an mbuf chain. There's no reason you'd ever want to do
952 * this in normal usage, but it's great for stress testing various
953 * mbuf consumers.
954 *
955 * If fragmentation is not possible, the original chain will be
956 * returned.
957 *
958 * Possible length values:
959 * 0 no fragmentation will occur
960 * > 0 each fragment will be of the specified length
961 * -1 each fragment will be the same random value in length
962 * -2 each fragment's length will be entirely random
963 * (Random values range from 1 to 256)
964 */
965struct mbuf *
966m_fragment(struct mbuf *m0, int how, int length)
967{
968 struct mbuf *m_new = NULL, *m_final = NULL;
969 int progress = 0;
970
971 if (!(m0->m_flags & M_PKTHDR))
972 return (m0);
973
974 if ((length == 0) || (length < -2))
975 return (m0);
976
977 m_fixhdr(m0); /* Needed sanity check */
978
979 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
980
981 if (m_final == NULL)
982 goto nospace;
983
38
39#include "opt_mac.h"
40#include "opt_param.h"
41#include "opt_mbuf_stress_test.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/mac.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/sysctl.h>
51#include <sys/domain.h>
52#include <sys/protosw.h>
53
54int max_linkhdr;
55int max_protohdr;
56int max_hdr;
57int max_datalen;
58#ifdef MBUF_STRESS_TEST
59int m_defragpackets;
60int m_defragbytes;
61int m_defraguseless;
62int m_defragfailure;
63int m_defragrandomfailures;
64#endif
65
66/*
67 * sysctl(8) exported objects
68 */
69SYSCTL_DECL(_kern_ipc);
70SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
71 &max_linkhdr, 0, "");
72SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
73 &max_protohdr, 0, "");
74SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
75SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
76 &max_datalen, 0, "");
77#ifdef MBUF_STRESS_TEST
78SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
79 &m_defragpackets, 0, "");
80SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
81 &m_defragbytes, 0, "");
82SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
83 &m_defraguseless, 0, "");
84SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
85 &m_defragfailure, 0, "");
86SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
87 &m_defragrandomfailures, 0, "");
88#endif
89
90/*
91 * "Move" mbuf pkthdr from "from" to "to".
92 * "from" must have M_PKTHDR set, and "to" must be empty.
93 */
94void
95m_move_pkthdr(struct mbuf *to, struct mbuf *from)
96{
97
98#if 0
99 /* see below for why these are not enabled */
100 M_ASSERTPKTHDR(to);
101 /* Note: with MAC, this may not be a good assertion. */
102 KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
103 ("m_move_pkthdr: to has tags"));
104#endif
105 KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
106#ifdef MAC
107 /*
108 * XXXMAC: It could be this should also occur for non-MAC?
109 */
110 if (to->m_flags & M_PKTHDR)
111 m_tag_delete_chain(to, NULL);
112#endif
113 to->m_flags = from->m_flags & M_COPYFLAGS;
114 to->m_data = to->m_pktdat;
115 to->m_pkthdr = from->m_pkthdr; /* especially tags */
116 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
117 from->m_flags &= ~M_PKTHDR;
118}
119
120/*
121 * Duplicate "from"'s mbuf pkthdr in "to".
122 * "from" must have M_PKTHDR set, and "to" must be empty.
123 * In particular, this does a deep copy of the packet tags.
124 */
125int
126m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
127{
128
129#if 0
130 /*
131 * The mbuf allocator only initializes the pkthdr
132 * when the mbuf is allocated with MGETHDR. Many users
133 * (e.g. m_copy*, m_prepend) use MGET and then
134 * smash the pkthdr as needed causing these
135 * assertions to trip. For now just disable them.
136 */
137 M_ASSERTPKTHDR(to);
138 /* Note: with MAC, this may not be a good assertion. */
139 KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
140#endif
141#ifdef MAC
142 if (to->m_flags & M_PKTHDR)
143 m_tag_delete_chain(to, NULL);
144#endif
145 to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
146 if ((to->m_flags & M_EXT) == 0)
147 to->m_data = to->m_pktdat;
148 to->m_pkthdr = from->m_pkthdr;
149 SLIST_INIT(&to->m_pkthdr.tags);
150 return (m_tag_copy_chain(to, from, MBTOM(how)));
151}
152
153/*
154 * Lesser-used path for M_PREPEND:
155 * allocate new mbuf to prepend to chain,
156 * copy junk along.
157 */
158struct mbuf *
159m_prepend(struct mbuf *m, int len, int how)
160{
161 struct mbuf *mn;
162
163 if (m->m_flags & M_PKTHDR)
164 MGETHDR(mn, how, m->m_type);
165 else
166 MGET(mn, how, m->m_type);
167 if (mn == NULL) {
168 m_freem(m);
169 return (NULL);
170 }
171 if (m->m_flags & M_PKTHDR)
172 M_MOVE_PKTHDR(mn, m);
173 mn->m_next = m;
174 m = mn;
175 if (len < MHLEN)
176 MH_ALIGN(m, len);
177 m->m_len = len;
178 return (m);
179}
180
181/*
182 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
183 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
184 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
185 * Note that the copy is read-only, because clusters are not copied,
186 * only their reference counts are incremented.
187 */
188struct mbuf *
189m_copym(struct mbuf *m, int off0, int len, int wait)
190{
191 struct mbuf *n, **np;
192 int off = off0;
193 struct mbuf *top;
194 int copyhdr = 0;
195
196 KASSERT(off >= 0, ("m_copym, negative off %d", off));
197 KASSERT(len >= 0, ("m_copym, negative len %d", len));
198 if (off == 0 && m->m_flags & M_PKTHDR)
199 copyhdr = 1;
200 while (off > 0) {
201 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
202 if (off < m->m_len)
203 break;
204 off -= m->m_len;
205 m = m->m_next;
206 }
207 np = &top;
208 top = 0;
209 while (len > 0) {
210 if (m == NULL) {
211 KASSERT(len == M_COPYALL,
212 ("m_copym, length > size of mbuf chain"));
213 break;
214 }
215 if (copyhdr)
216 MGETHDR(n, wait, m->m_type);
217 else
218 MGET(n, wait, m->m_type);
219 *np = n;
220 if (n == NULL)
221 goto nospace;
222 if (copyhdr) {
223 if (!m_dup_pkthdr(n, m, wait))
224 goto nospace;
225 if (len == M_COPYALL)
226 n->m_pkthdr.len -= off0;
227 else
228 n->m_pkthdr.len = len;
229 copyhdr = 0;
230 }
231 n->m_len = min(len, m->m_len - off);
232 if (m->m_flags & M_EXT) {
233 n->m_data = m->m_data + off;
234 n->m_ext = m->m_ext;
235 n->m_flags |= M_EXT;
236 MEXT_ADD_REF(m);
237 } else
238 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
239 (u_int)n->m_len);
240 if (len != M_COPYALL)
241 len -= n->m_len;
242 off = 0;
243 m = m->m_next;
244 np = &n->m_next;
245 }
246 if (top == NULL)
247 mbstat.m_mcfail++; /* XXX: No consistency. */
248
249 return (top);
250nospace:
251 m_freem(top);
252 mbstat.m_mcfail++; /* XXX: No consistency. */
253 return (NULL);
254}
255
256/*
257 * Copy an entire packet, including header (which must be present).
258 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
259 * Note that the copy is read-only, because clusters are not copied,
260 * only their reference counts are incremented.
261 * Preserve alignment of the first mbuf so if the creator has left
262 * some room at the beginning (e.g. for inserting protocol headers)
263 * the copies still have the room available.
264 */
265struct mbuf *
266m_copypacket(struct mbuf *m, int how)
267{
268 struct mbuf *top, *n, *o;
269
270 MGET(n, how, m->m_type);
271 top = n;
272 if (n == NULL)
273 goto nospace;
274
275 if (!m_dup_pkthdr(n, m, how))
276 goto nospace;
277 n->m_len = m->m_len;
278 if (m->m_flags & M_EXT) {
279 n->m_data = m->m_data;
280 n->m_ext = m->m_ext;
281 n->m_flags |= M_EXT;
282 MEXT_ADD_REF(m);
283 } else {
284 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
285 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
286 }
287
288 m = m->m_next;
289 while (m) {
290 MGET(o, how, m->m_type);
291 if (o == NULL)
292 goto nospace;
293
294 n->m_next = o;
295 n = n->m_next;
296
297 n->m_len = m->m_len;
298 if (m->m_flags & M_EXT) {
299 n->m_data = m->m_data;
300 n->m_ext = m->m_ext;
301 n->m_flags |= M_EXT;
302 MEXT_ADD_REF(m);
303 } else {
304 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
305 }
306
307 m = m->m_next;
308 }
309 return top;
310nospace:
311 m_freem(top);
312 mbstat.m_mcfail++; /* XXX: No consistency. */
313 return (NULL);
314}
315
316/*
317 * Copy data from an mbuf chain starting "off" bytes from the beginning,
318 * continuing for "len" bytes, into the indicated buffer.
319 */
320void
321m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
322{
323 u_int count;
324
325 KASSERT(off >= 0, ("m_copydata, negative off %d", off));
326 KASSERT(len >= 0, ("m_copydata, negative len %d", len));
327 while (off > 0) {
328 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
329 if (off < m->m_len)
330 break;
331 off -= m->m_len;
332 m = m->m_next;
333 }
334 while (len > 0) {
335 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
336 count = min(m->m_len - off, len);
337 bcopy(mtod(m, caddr_t) + off, cp, count);
338 len -= count;
339 cp += count;
340 off = 0;
341 m = m->m_next;
342 }
343}
344
345/*
346 * Copy a packet header mbuf chain into a completely new chain, including
347 * copying any mbuf clusters. Use this instead of m_copypacket() when
348 * you need a writable copy of an mbuf chain.
349 */
350struct mbuf *
351m_dup(struct mbuf *m, int how)
352{
353 struct mbuf **p, *top = NULL;
354 int remain, moff, nsize;
355
356 /* Sanity check */
357 if (m == NULL)
358 return (NULL);
359 M_ASSERTPKTHDR(m);
360
361 /* While there's more data, get a new mbuf, tack it on, and fill it */
362 remain = m->m_pkthdr.len;
363 moff = 0;
364 p = &top;
365 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */
366 struct mbuf *n;
367
368 /* Get the next new mbuf */
369 MGET(n, how, m->m_type);
370 if (n == NULL)
371 goto nospace;
372 if (top == NULL) { /* first one, must be PKTHDR */
373 if (!m_dup_pkthdr(n, m, how))
374 goto nospace;
375 nsize = MHLEN;
376 } else /* not the first one */
377 nsize = MLEN;
378 if (remain >= MINCLSIZE) {
379 MCLGET(n, how);
380 if ((n->m_flags & M_EXT) == 0) {
381 (void)m_free(n);
382 goto nospace;
383 }
384 nsize = MCLBYTES;
385 }
386 n->m_len = 0;
387
388 /* Link it into the new chain */
389 *p = n;
390 p = &n->m_next;
391
392 /* Copy data from original mbuf(s) into new mbuf */
393 while (n->m_len < nsize && m != NULL) {
394 int chunk = min(nsize - n->m_len, m->m_len - moff);
395
396 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
397 moff += chunk;
398 n->m_len += chunk;
399 remain -= chunk;
400 if (moff == m->m_len) {
401 m = m->m_next;
402 moff = 0;
403 }
404 }
405
406 /* Check correct total mbuf length */
407 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
408 ("%s: bogus m_pkthdr.len", __func__));
409 }
410 return (top);
411
412nospace:
413 m_freem(top);
414 mbstat.m_mcfail++; /* XXX: No consistency. */
415 return (NULL);
416}
417
418/*
419 * Concatenate mbuf chain n to m.
420 * Both chains must be of the same type (e.g. MT_DATA).
421 * Any m_pkthdr is not updated.
422 */
423void
424m_cat(struct mbuf *m, struct mbuf *n)
425{
426 while (m->m_next)
427 m = m->m_next;
428 while (n) {
429 if (m->m_flags & M_EXT ||
430 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
431 /* just join the two chains */
432 m->m_next = n;
433 return;
434 }
435 /* splat the data from one into the other */
436 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
437 (u_int)n->m_len);
438 m->m_len += n->m_len;
439 n = m_free(n);
440 }
441}
442
443void
444m_adj(struct mbuf *mp, int req_len)
445{
446 int len = req_len;
447 struct mbuf *m;
448 int count;
449
450 if ((m = mp) == NULL)
451 return;
452 if (len >= 0) {
453 /*
454 * Trim from head.
455 */
456 while (m != NULL && len > 0) {
457 if (m->m_len <= len) {
458 len -= m->m_len;
459 m->m_len = 0;
460 m = m->m_next;
461 } else {
462 m->m_len -= len;
463 m->m_data += len;
464 len = 0;
465 }
466 }
467 m = mp;
468 if (mp->m_flags & M_PKTHDR)
469 m->m_pkthdr.len -= (req_len - len);
470 } else {
471 /*
472 * Trim from tail. Scan the mbuf chain,
473 * calculating its length and finding the last mbuf.
474 * If the adjustment only affects this mbuf, then just
475 * adjust and return. Otherwise, rescan and truncate
476 * after the remaining size.
477 */
478 len = -len;
479 count = 0;
480 for (;;) {
481 count += m->m_len;
482 if (m->m_next == (struct mbuf *)0)
483 break;
484 m = m->m_next;
485 }
486 if (m->m_len >= len) {
487 m->m_len -= len;
488 if (mp->m_flags & M_PKTHDR)
489 mp->m_pkthdr.len -= len;
490 return;
491 }
492 count -= len;
493 if (count < 0)
494 count = 0;
495 /*
496 * Correct length for chain is "count".
497 * Find the mbuf with last data, adjust its length,
498 * and toss data from remaining mbufs on chain.
499 */
500 m = mp;
501 if (m->m_flags & M_PKTHDR)
502 m->m_pkthdr.len = count;
503 for (; m; m = m->m_next) {
504 if (m->m_len >= count) {
505 m->m_len = count;
506 break;
507 }
508 count -= m->m_len;
509 }
510 while (m->m_next)
511 (m = m->m_next) ->m_len = 0;
512 }
513}
514
515/*
516 * Rearange an mbuf chain so that len bytes are contiguous
517 * and in the data area of an mbuf (so that mtod and dtom
518 * will work for a structure of size len). Returns the resulting
519 * mbuf chain on success, frees it and returns null on failure.
520 * If there is room, it will add up to max_protohdr-len extra bytes to the
521 * contiguous region in an attempt to avoid being called next time.
522 */
523struct mbuf *
524m_pullup(struct mbuf *n, int len)
525{
526 struct mbuf *m;
527 int count;
528 int space;
529
530 /*
531 * If first mbuf has no cluster, and has room for len bytes
532 * without shifting current data, pullup into it,
533 * otherwise allocate a new mbuf to prepend to the chain.
534 */
535 if ((n->m_flags & M_EXT) == 0 &&
536 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
537 if (n->m_len >= len)
538 return (n);
539 m = n;
540 n = n->m_next;
541 len -= m->m_len;
542 } else {
543 if (len > MHLEN)
544 goto bad;
545 MGET(m, M_DONTWAIT, n->m_type);
546 if (m == NULL)
547 goto bad;
548 m->m_len = 0;
549 if (n->m_flags & M_PKTHDR)
550 M_MOVE_PKTHDR(m, n);
551 }
552 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
553 do {
554 count = min(min(max(len, max_protohdr), space), n->m_len);
555 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
556 (u_int)count);
557 len -= count;
558 m->m_len += count;
559 n->m_len -= count;
560 space -= count;
561 if (n->m_len)
562 n->m_data += count;
563 else
564 n = m_free(n);
565 } while (len > 0 && n);
566 if (len > 0) {
567 (void) m_free(m);
568 goto bad;
569 }
570 m->m_next = n;
571 return (m);
572bad:
573 m_freem(n);
574 mbstat.m_mpfail++; /* XXX: No consistency. */
575 return (NULL);
576}
577
578/*
579 * Partition an mbuf chain in two pieces, returning the tail --
580 * all but the first len0 bytes. In case of failure, it returns NULL and
581 * attempts to restore the chain to its original state.
582 *
583 * Note that the resulting mbufs might be read-only, because the new
584 * mbuf can end up sharing an mbuf cluster with the original mbuf if
585 * the "breaking point" happens to lie within a cluster mbuf. Use the
586 * M_WRITABLE() macro to check for this case.
587 */
588struct mbuf *
589m_split(struct mbuf *m0, int len0, int wait)
590{
591 struct mbuf *m, *n;
592 u_int len = len0, remain;
593
594 for (m = m0; m && len > m->m_len; m = m->m_next)
595 len -= m->m_len;
596 if (m == NULL)
597 return (NULL);
598 remain = m->m_len - len;
599 if (m0->m_flags & M_PKTHDR) {
600 MGETHDR(n, wait, m0->m_type);
601 if (n == NULL)
602 return (NULL);
603 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
604 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
605 m0->m_pkthdr.len = len0;
606 if (m->m_flags & M_EXT)
607 goto extpacket;
608 if (remain > MHLEN) {
609 /* m can't be the lead packet */
610 MH_ALIGN(n, 0);
611 n->m_next = m_split(m, len, wait);
612 if (n->m_next == NULL) {
613 (void) m_free(n);
614 return (NULL);
615 } else {
616 n->m_len = 0;
617 return (n);
618 }
619 } else
620 MH_ALIGN(n, remain);
621 } else if (remain == 0) {
622 n = m->m_next;
623 m->m_next = NULL;
624 return (n);
625 } else {
626 MGET(n, wait, m->m_type);
627 if (n == NULL)
628 return (NULL);
629 M_ALIGN(n, remain);
630 }
631extpacket:
632 if (m->m_flags & M_EXT) {
633 n->m_flags |= M_EXT;
634 n->m_ext = m->m_ext;
635 MEXT_ADD_REF(m);
636 n->m_data = m->m_data + len;
637 } else {
638 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
639 }
640 n->m_len = remain;
641 m->m_len = len;
642 n->m_next = m->m_next;
643 m->m_next = NULL;
644 return (n);
645}
646/*
647 * Routine to copy from device local memory into mbufs.
648 * Note that `off' argument is offset into first mbuf of target chain from
649 * which to begin copying the data to.
650 */
651struct mbuf *
652m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
653 void (*copy)(char *from, caddr_t to, u_int len))
654{
655 struct mbuf *m;
656 struct mbuf *top = 0, **mp = &top;
657 int len;
658
659 if (off < 0 || off > MHLEN)
660 return (NULL);
661
662 MGETHDR(m, M_DONTWAIT, MT_DATA);
663 if (m == NULL)
664 return (NULL);
665 m->m_pkthdr.rcvif = ifp;
666 m->m_pkthdr.len = totlen;
667 len = MHLEN;
668
669 while (totlen > 0) {
670 if (top) {
671 MGET(m, M_DONTWAIT, MT_DATA);
672 if (m == NULL) {
673 m_freem(top);
674 return (NULL);
675 }
676 len = MLEN;
677 }
678 if (totlen + off >= MINCLSIZE) {
679 MCLGET(m, M_DONTWAIT);
680 if (m->m_flags & M_EXT)
681 len = MCLBYTES;
682 } else {
683 /*
684 * Place initial small packet/header at end of mbuf.
685 */
686 if (top == NULL && totlen + off + max_linkhdr <= len) {
687 m->m_data += max_linkhdr;
688 len -= max_linkhdr;
689 }
690 }
691 if (off) {
692 m->m_data += off;
693 len -= off;
694 off = 0;
695 }
696 m->m_len = len = min(totlen, len);
697 if (copy)
698 copy(buf, mtod(m, caddr_t), (u_int)len);
699 else
700 bcopy(buf, mtod(m, caddr_t), (u_int)len);
701 buf += len;
702 *mp = m;
703 mp = &m->m_next;
704 totlen -= len;
705 }
706 return (top);
707}
708
709/*
710 * Copy data from a buffer back into the indicated mbuf chain,
711 * starting "off" bytes from the beginning, extending the mbuf
712 * chain if necessary.
713 */
714void
715m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
716{
717 int mlen;
718 struct mbuf *m = m0, *n;
719 int totlen = 0;
720
721 if (m0 == NULL)
722 return;
723 while (off > (mlen = m->m_len)) {
724 off -= mlen;
725 totlen += mlen;
726 if (m->m_next == NULL) {
727 n = m_get_clrd(M_DONTWAIT, m->m_type);
728 if (n == NULL)
729 goto out;
730 n->m_len = min(MLEN, len + off);
731 m->m_next = n;
732 }
733 m = m->m_next;
734 }
735 while (len > 0) {
736 mlen = min (m->m_len - off, len);
737 bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
738 cp += mlen;
739 len -= mlen;
740 mlen += off;
741 off = 0;
742 totlen += mlen;
743 if (len == 0)
744 break;
745 if (m->m_next == NULL) {
746 n = m_get(M_DONTWAIT, m->m_type);
747 if (n == NULL)
748 break;
749 n->m_len = min(MLEN, len);
750 m->m_next = n;
751 }
752 m = m->m_next;
753 }
754out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
755 m->m_pkthdr.len = totlen;
756}
757
758/*
759 * Apply function f to the data in an mbuf chain starting "off" bytes from
760 * the beginning, continuing for "len" bytes.
761 */
762int
763m_apply(struct mbuf *m, int off, int len,
764 int (*f)(void *, void *, u_int), void *arg)
765{
766 u_int count;
767 int rval;
768
769 KASSERT(off >= 0, ("m_apply, negative off %d", off));
770 KASSERT(len >= 0, ("m_apply, negative len %d", len));
771 while (off > 0) {
772 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
773 if (off < m->m_len)
774 break;
775 off -= m->m_len;
776 m = m->m_next;
777 }
778 while (len > 0) {
779 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
780 count = min(m->m_len - off, len);
781 rval = (*f)(arg, mtod(m, caddr_t) + off, count);
782 if (rval)
783 return (rval);
784 len -= count;
785 off = 0;
786 m = m->m_next;
787 }
788 return (0);
789}
790
791/*
792 * Return a pointer to mbuf/offset of location in mbuf chain.
793 */
794struct mbuf *
795m_getptr(struct mbuf *m, int loc, int *off)
796{
797
798 while (loc >= 0) {
799 /* Normal end of search. */
800 if (m->m_len > loc) {
801 *off = loc;
802 return (m);
803 } else {
804 loc -= m->m_len;
805 if (m->m_next == NULL) {
806 if (loc == 0) {
807 /* Point at the end of valid data. */
808 *off = m->m_len;
809 return (m);
810 }
811 return (NULL);
812 }
813 m = m->m_next;
814 }
815 }
816 return (NULL);
817}
818
819void
820m_print(const struct mbuf *m)
821{
822 int len;
823 const struct mbuf *m2;
824
825 len = m->m_pkthdr.len;
826 m2 = m;
827 while (len) {
828 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
829 len -= m2->m_len;
830 m2 = m2->m_next;
831 }
832 return;
833}
834
835u_int
836m_fixhdr(struct mbuf *m0)
837{
838 u_int len;
839
840 len = m_length(m0, NULL);
841 m0->m_pkthdr.len = len;
842 return (len);
843}
844
845u_int
846m_length(struct mbuf *m0, struct mbuf **last)
847{
848 struct mbuf *m;
849 u_int len;
850
851 len = 0;
852 for (m = m0; m != NULL; m = m->m_next) {
853 len += m->m_len;
854 if (m->m_next == NULL)
855 break;
856 }
857 if (last != NULL)
858 *last = m;
859 return (len);
860}
861
862/*
863 * Defragment a mbuf chain, returning the shortest possible
864 * chain of mbufs and clusters. If allocation fails and
865 * this cannot be completed, NULL will be returned, but
866 * the passed in chain will be unchanged. Upon success,
867 * the original chain will be freed, and the new chain
868 * will be returned.
869 *
870 * If a non-packet header is passed in, the original
871 * mbuf (chain?) will be returned unharmed.
872 */
873struct mbuf *
874m_defrag(struct mbuf *m0, int how)
875{
876 struct mbuf *m_new = NULL, *m_final = NULL;
877 int progress = 0, length;
878
879 if (!(m0->m_flags & M_PKTHDR))
880 return (m0);
881
882 m_fixhdr(m0); /* Needed sanity check */
883
884#ifdef MBUF_STRESS_TEST
885 if (m_defragrandomfailures) {
886 int temp = arc4random() & 0xff;
887 if (temp == 0xba)
888 goto nospace;
889 }
890#endif
891
892 if (m0->m_pkthdr.len > MHLEN)
893 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
894 else
895 m_final = m_gethdr(how, MT_DATA);
896
897 if (m_final == NULL)
898 goto nospace;
899
900 if (m_dup_pkthdr(m_final, m0, how) == 0)
901 goto nospace;
902
903 m_new = m_final;
904
905 while (progress < m0->m_pkthdr.len) {
906 length = m0->m_pkthdr.len - progress;
907 if (length > MCLBYTES)
908 length = MCLBYTES;
909
910 if (m_new == NULL) {
911 if (length > MLEN)
912 m_new = m_getcl(how, MT_DATA, 0);
913 else
914 m_new = m_get(how, MT_DATA);
915 if (m_new == NULL)
916 goto nospace;
917 }
918
919 m_copydata(m0, progress, length, mtod(m_new, caddr_t));
920 progress += length;
921 m_new->m_len = length;
922 if (m_new != m_final)
923 m_cat(m_final, m_new);
924 m_new = NULL;
925 }
926#ifdef MBUF_STRESS_TEST
927 if (m0->m_next == NULL)
928 m_defraguseless++;
929#endif
930 m_freem(m0);
931 m0 = m_final;
932#ifdef MBUF_STRESS_TEST
933 m_defragpackets++;
934 m_defragbytes += m0->m_pkthdr.len;
935#endif
936 return (m0);
937nospace:
938#ifdef MBUF_STRESS_TEST
939 m_defragfailure++;
940#endif
941 if (m_new)
942 m_free(m_new);
943 if (m_final)
944 m_freem(m_final);
945 return (NULL);
946}
947
948#ifdef MBUF_STRESS_TEST
949
950/*
951 * Fragment an mbuf chain. There's no reason you'd ever want to do
952 * this in normal usage, but it's great for stress testing various
953 * mbuf consumers.
954 *
955 * If fragmentation is not possible, the original chain will be
956 * returned.
957 *
958 * Possible length values:
959 * 0 no fragmentation will occur
960 * > 0 each fragment will be of the specified length
961 * -1 each fragment will be the same random value in length
962 * -2 each fragment's length will be entirely random
963 * (Random values range from 1 to 256)
964 */
965struct mbuf *
966m_fragment(struct mbuf *m0, int how, int length)
967{
968 struct mbuf *m_new = NULL, *m_final = NULL;
969 int progress = 0;
970
971 if (!(m0->m_flags & M_PKTHDR))
972 return (m0);
973
974 if ((length == 0) || (length < -2))
975 return (m0);
976
977 m_fixhdr(m0); /* Needed sanity check */
978
979 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
980
981 if (m_final == NULL)
982 goto nospace;
983
984 if (m_dup_pkthdr(m_final, m0, how) == NULL)
984 if (m_dup_pkthdr(m_final, m0, how) == 0)
985 goto nospace;
986
987 m_new = m_final;
988
989 if (length == -1)
990 length = 1 + (arc4random() & 255);
991
992 while (progress < m0->m_pkthdr.len) {
993 int fraglen;
994
995 if (length > 0)
996 fraglen = length;
997 else
998 fraglen = 1 + (arc4random() & 255);
999 if (fraglen > m0->m_pkthdr.len - progress)
1000 fraglen = m0->m_pkthdr.len - progress;
1001
1002 if (fraglen > MCLBYTES)
1003 fraglen = MCLBYTES;
1004
1005 if (m_new == NULL) {
1006 m_new = m_getcl(how, MT_DATA, 0);
1007 if (m_new == NULL)
1008 goto nospace;
1009 }
1010
1011 m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1012 progress += fraglen;
1013 m_new->m_len = fraglen;
1014 if (m_new != m_final)
1015 m_cat(m_final, m_new);
1016 m_new = NULL;
1017 }
1018 m_freem(m0);
1019 m0 = m_final;
1020 return (m0);
1021nospace:
1022 if (m_new)
1023 m_free(m_new);
1024 if (m_final)
1025 m_freem(m_final);
1026 /* Return the original chain on failure */
1027 return (m0);
1028}
1029
1030#endif
985 goto nospace;
986
987 m_new = m_final;
988
989 if (length == -1)
990 length = 1 + (arc4random() & 255);
991
992 while (progress < m0->m_pkthdr.len) {
993 int fraglen;
994
995 if (length > 0)
996 fraglen = length;
997 else
998 fraglen = 1 + (arc4random() & 255);
999 if (fraglen > m0->m_pkthdr.len - progress)
1000 fraglen = m0->m_pkthdr.len - progress;
1001
1002 if (fraglen > MCLBYTES)
1003 fraglen = MCLBYTES;
1004
1005 if (m_new == NULL) {
1006 m_new = m_getcl(how, MT_DATA, 0);
1007 if (m_new == NULL)
1008 goto nospace;
1009 }
1010
1011 m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
1012 progress += fraglen;
1013 m_new->m_len = fraglen;
1014 if (m_new != m_final)
1015 m_cat(m_final, m_new);
1016 m_new = NULL;
1017 }
1018 m_freem(m0);
1019 m0 = m_final;
1020 return (m0);
1021nospace:
1022 if (m_new)
1023 m_free(m_new);
1024 if (m_final)
1025 m_freem(m_final);
1026 /* Return the original chain on failure */
1027 return (m0);
1028}
1029
1030#endif