uipc_mbuf.c revision 100960
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/kern/uipc_mbuf.c 100960 2002-07-30 18:28:58Z rwatson $
35 */
36
37#include "opt_param.h"
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/sysctl.h>
45#include <sys/domain.h>
46#include <sys/protosw.h>
47
48int	max_linkhdr;
49int	max_protohdr;
50int	max_hdr;
51int	max_datalen;
52
53/*
54 * sysctl(8) exported objects
55 */
56SYSCTL_DECL(_kern_ipc);
57SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
58	   &max_linkhdr, 0, "");
59SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
60	   &max_protohdr, 0, "");
61SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
62SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
63	   &max_datalen, 0, "");
64
65/*
66 * Copy mbuf pkthdr from "from" to "to".
67 * "from" must have M_PKTHDR set, and "to" must be empty.
68 * aux pointer will be moved to "to".
69 */
70void
71m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
72{
73
74#if 0
75	KASSERT(to->m_flags & M_PKTHDR,
76	    ("m_copy_pkthdr() called on non-header"));
77#endif
78	to->m_data = to->m_pktdat;
79	to->m_flags = from->m_flags & M_COPYFLAGS;
80	to->m_pkthdr = from->m_pkthdr;
81	from->m_pkthdr.aux = NULL;
82}
83
84/*
85 * Lesser-used path for M_PREPEND:
86 * allocate new mbuf to prepend to chain,
87 * copy junk along.
88 */
89struct mbuf *
90m_prepend(struct mbuf *m, int len, int how)
91{
92	struct mbuf *mn;
93
94	MGET(mn, how, m->m_type);
95	if (mn == NULL) {
96		m_freem(m);
97		return (NULL);
98	}
99	if (m->m_flags & M_PKTHDR) {
100		M_COPY_PKTHDR(mn, m);
101		m->m_flags &= ~M_PKTHDR;
102	}
103	mn->m_next = m;
104	m = mn;
105	if (len < MHLEN)
106		MH_ALIGN(m, len);
107	m->m_len = len;
108	return (m);
109}
110
111/*
112 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
113 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
114 * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
115 * Note that the copy is read-only, because clusters are not copied,
116 * only their reference counts are incremented.
117 */
118struct mbuf *
119m_copym(struct mbuf *m, int off0, int len, int wait)
120{
121	struct mbuf *n, **np;
122	int off = off0;
123	struct mbuf *top;
124	int copyhdr = 0;
125
126	KASSERT(off >= 0, ("m_copym, negative off %d", off));
127	KASSERT(len >= 0, ("m_copym, negative len %d", len));
128	if (off == 0 && m->m_flags & M_PKTHDR)
129		copyhdr = 1;
130	while (off > 0) {
131		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
132		if (off < m->m_len)
133			break;
134		off -= m->m_len;
135		m = m->m_next;
136	}
137	np = &top;
138	top = 0;
139	while (len > 0) {
140		if (m == NULL) {
141			KASSERT(len == M_COPYALL,
142			    ("m_copym, length > size of mbuf chain"));
143			break;
144		}
145		MGET(n, wait, m->m_type);
146		*np = n;
147		if (n == NULL)
148			goto nospace;
149		if (copyhdr) {
150			M_COPY_PKTHDR(n, m);
151			if (len == M_COPYALL)
152				n->m_pkthdr.len -= off0;
153			else
154				n->m_pkthdr.len = len;
155			copyhdr = 0;
156		}
157		n->m_len = min(len, m->m_len - off);
158		if (m->m_flags & M_EXT) {
159			n->m_data = m->m_data + off;
160			n->m_ext = m->m_ext;
161			n->m_flags |= M_EXT;
162			MEXT_ADD_REF(m);
163		} else
164			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
165			    (unsigned)n->m_len);
166		if (len != M_COPYALL)
167			len -= n->m_len;
168		off = 0;
169		m = m->m_next;
170		np = &n->m_next;
171	}
172	if (top == NULL)
173		mbstat.m_mcfail++;	/* XXX: No consistency. */
174
175	return (top);
176nospace:
177	m_freem(top);
178	mbstat.m_mcfail++;	/* XXX: No consistency. */
179	return (NULL);
180}
181
182/*
183 * Copy an entire packet, including header (which must be present).
184 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
185 * Note that the copy is read-only, because clusters are not copied,
186 * only their reference counts are incremented.
187 * Preserve alignment of the first mbuf so if the creator has left
188 * some room at the beginning (e.g. for inserting protocol headers)
189 * the copies still have the room available.
190 */
191struct mbuf *
192m_copypacket(struct mbuf *m, int how)
193{
194	struct mbuf *top, *n, *o;
195
196	MGET(n, how, m->m_type);
197	top = n;
198	if (n == NULL)
199		goto nospace;
200
201	M_COPY_PKTHDR(n, m);
202	n->m_len = m->m_len;
203	if (m->m_flags & M_EXT) {
204		n->m_data = m->m_data;
205		n->m_ext = m->m_ext;
206		n->m_flags |= M_EXT;
207		MEXT_ADD_REF(m);
208	} else {
209		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
210		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
211	}
212
213	m = m->m_next;
214	while (m) {
215		MGET(o, how, m->m_type);
216		if (o == NULL)
217			goto nospace;
218
219		n->m_next = o;
220		n = n->m_next;
221
222		n->m_len = m->m_len;
223		if (m->m_flags & M_EXT) {
224			n->m_data = m->m_data;
225			n->m_ext = m->m_ext;
226			n->m_flags |= M_EXT;
227			MEXT_ADD_REF(m);
228		} else {
229			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
230		}
231
232		m = m->m_next;
233	}
234	return top;
235nospace:
236	m_freem(top);
237	mbstat.m_mcfail++;	/* XXX: No consistency. */
238	return (NULL);
239}
240
241/*
242 * Copy data from an mbuf chain starting "off" bytes from the beginning,
243 * continuing for "len" bytes, into the indicated buffer.
244 */
245void
246m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
247{
248	unsigned count;
249
250	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
251	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
252	while (off > 0) {
253		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
254		if (off < m->m_len)
255			break;
256		off -= m->m_len;
257		m = m->m_next;
258	}
259	while (len > 0) {
260		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
261		count = min(m->m_len - off, len);
262		bcopy(mtod(m, caddr_t) + off, cp, count);
263		len -= count;
264		cp += count;
265		off = 0;
266		m = m->m_next;
267	}
268}
269
270/*
271 * Copy a packet header mbuf chain into a completely new chain, including
272 * copying any mbuf clusters.  Use this instead of m_copypacket() when
273 * you need a writable copy of an mbuf chain.
274 */
275struct mbuf *
276m_dup(struct mbuf *m, int how)
277{
278	struct mbuf **p, *top = NULL;
279	int remain, moff, nsize;
280
281	/* Sanity check */
282	if (m == NULL)
283		return (NULL);
284	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__));
285
286	/* While there's more data, get a new mbuf, tack it on, and fill it */
287	remain = m->m_pkthdr.len;
288	moff = 0;
289	p = &top;
290	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
291		struct mbuf *n;
292
293		/* Get the next new mbuf */
294		MGET(n, how, m->m_type);
295		if (n == NULL)
296			goto nospace;
297		if (top == NULL) {		/* first one, must be PKTHDR */
298			M_COPY_PKTHDR(n, m);
299			nsize = MHLEN;
300		} else				/* not the first one */
301			nsize = MLEN;
302		if (remain >= MINCLSIZE) {
303			MCLGET(n, how);
304			if ((n->m_flags & M_EXT) == 0) {
305				(void)m_free(n);
306				goto nospace;
307			}
308			nsize = MCLBYTES;
309		}
310		n->m_len = 0;
311
312		/* Link it into the new chain */
313		*p = n;
314		p = &n->m_next;
315
316		/* Copy data from original mbuf(s) into new mbuf */
317		while (n->m_len < nsize && m != NULL) {
318			int chunk = min(nsize - n->m_len, m->m_len - moff);
319
320			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
321			moff += chunk;
322			n->m_len += chunk;
323			remain -= chunk;
324			if (moff == m->m_len) {
325				m = m->m_next;
326				moff = 0;
327			}
328		}
329
330		/* Check correct total mbuf length */
331		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
332		    	("%s: bogus m_pkthdr.len", __func__));
333	}
334	return (top);
335
336nospace:
337	m_freem(top);
338	mbstat.m_mcfail++;	/* XXX: No consistency. */
339	return (NULL);
340}
341
342/*
343 * Concatenate mbuf chain n to m.
344 * Both chains must be of the same type (e.g. MT_DATA).
345 * Any m_pkthdr is not updated.
346 */
347void
348m_cat(struct mbuf *m, struct mbuf *n)
349{
350	while (m->m_next)
351		m = m->m_next;
352	while (n) {
353		if (m->m_flags & M_EXT ||
354		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
355			/* just join the two chains */
356			m->m_next = n;
357			return;
358		}
359		/* splat the data from one into the other */
360		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
361		    (u_int)n->m_len);
362		m->m_len += n->m_len;
363		n = m_free(n);
364	}
365}
366
367void
368m_adj(struct mbuf *mp, int req_len)
369{
370	int len = req_len;
371	struct mbuf *m;
372	int count;
373
374	if ((m = mp) == NULL)
375		return;
376	if (len >= 0) {
377		/*
378		 * Trim from head.
379		 */
380		while (m != NULL && len > 0) {
381			if (m->m_len <= len) {
382				len -= m->m_len;
383				m->m_len = 0;
384				m = m->m_next;
385			} else {
386				m->m_len -= len;
387				m->m_data += len;
388				len = 0;
389			}
390		}
391		m = mp;
392		if (mp->m_flags & M_PKTHDR)
393			m->m_pkthdr.len -= (req_len - len);
394	} else {
395		/*
396		 * Trim from tail.  Scan the mbuf chain,
397		 * calculating its length and finding the last mbuf.
398		 * If the adjustment only affects this mbuf, then just
399		 * adjust and return.  Otherwise, rescan and truncate
400		 * after the remaining size.
401		 */
402		len = -len;
403		count = 0;
404		for (;;) {
405			count += m->m_len;
406			if (m->m_next == (struct mbuf *)0)
407				break;
408			m = m->m_next;
409		}
410		if (m->m_len >= len) {
411			m->m_len -= len;
412			if (mp->m_flags & M_PKTHDR)
413				mp->m_pkthdr.len -= len;
414			return;
415		}
416		count -= len;
417		if (count < 0)
418			count = 0;
419		/*
420		 * Correct length for chain is "count".
421		 * Find the mbuf with last data, adjust its length,
422		 * and toss data from remaining mbufs on chain.
423		 */
424		m = mp;
425		if (m->m_flags & M_PKTHDR)
426			m->m_pkthdr.len = count;
427		for (; m; m = m->m_next) {
428			if (m->m_len >= count) {
429				m->m_len = count;
430				break;
431			}
432			count -= m->m_len;
433		}
434		while (m->m_next)
435			(m = m->m_next) ->m_len = 0;
436	}
437}
438
439/*
440 * Rearange an mbuf chain so that len bytes are contiguous
441 * and in the data area of an mbuf (so that mtod and dtom
442 * will work for a structure of size len).  Returns the resulting
443 * mbuf chain on success, frees it and returns null on failure.
444 * If there is room, it will add up to max_protohdr-len extra bytes to the
445 * contiguous region in an attempt to avoid being called next time.
446 */
447struct mbuf *
448m_pullup(struct mbuf *n, int len)
449{
450	struct mbuf *m;
451	int count;
452	int space;
453
454	/*
455	 * If first mbuf has no cluster, and has room for len bytes
456	 * without shifting current data, pullup into it,
457	 * otherwise allocate a new mbuf to prepend to the chain.
458	 */
459	if ((n->m_flags & M_EXT) == 0 &&
460	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
461		if (n->m_len >= len)
462			return (n);
463		m = n;
464		n = n->m_next;
465		len -= m->m_len;
466	} else {
467		if (len > MHLEN)
468			goto bad;
469		MGET(m, M_DONTWAIT, n->m_type);
470		if (m == NULL)
471			goto bad;
472		m->m_len = 0;
473		if (n->m_flags & M_PKTHDR) {
474			M_COPY_PKTHDR(m, n);
475			n->m_flags &= ~M_PKTHDR;
476		}
477	}
478	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
479	do {
480		count = min(min(max(len, max_protohdr), space), n->m_len);
481		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
482		  (unsigned)count);
483		len -= count;
484		m->m_len += count;
485		n->m_len -= count;
486		space -= count;
487		if (n->m_len)
488			n->m_data += count;
489		else
490			n = m_free(n);
491	} while (len > 0 && n);
492	if (len > 0) {
493		(void) m_free(m);
494		goto bad;
495	}
496	m->m_next = n;
497	return (m);
498bad:
499	m_freem(n);
500	mbstat.m_mpfail++;	/* XXX: No consistency. */
501	return (NULL);
502}
503
504/*
505 * Partition an mbuf chain in two pieces, returning the tail --
506 * all but the first len0 bytes.  In case of failure, it returns NULL and
507 * attempts to restore the chain to its original state.
508 *
509 * Note that the resulting mbufs might be read-only, because the new
510 * mbuf can end up sharing an mbuf cluster with the original mbuf if
511 * the "breaking point" happens to lie within a cluster mbuf. Use the
512 * M_WRITABLE() macro to check for this case.
513 */
514struct mbuf *
515m_split(struct mbuf *m0, int len0, int wait)
516{
517	struct mbuf *m, *n;
518	unsigned len = len0, remain;
519
520	for (m = m0; m && len > m->m_len; m = m->m_next)
521		len -= m->m_len;
522	if (m == NULL)
523		return (NULL);
524	remain = m->m_len - len;
525	if (m0->m_flags & M_PKTHDR) {
526		MGETHDR(n, wait, m0->m_type);
527		if (n == NULL)
528			return (NULL);
529		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
530		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
531		m0->m_pkthdr.len = len0;
532		if (m->m_flags & M_EXT)
533			goto extpacket;
534		if (remain > MHLEN) {
535			/* m can't be the lead packet */
536			MH_ALIGN(n, 0);
537			n->m_next = m_split(m, len, wait);
538			if (n->m_next == NULL) {
539				(void) m_free(n);
540				return (NULL);
541			} else {
542				n->m_len = 0;
543				return (n);
544			}
545		} else
546			MH_ALIGN(n, remain);
547	} else if (remain == 0) {
548		n = m->m_next;
549		m->m_next = NULL;
550		return (n);
551	} else {
552		MGET(n, wait, m->m_type);
553		if (n == NULL)
554			return (NULL);
555		M_ALIGN(n, remain);
556	}
557extpacket:
558	if (m->m_flags & M_EXT) {
559		n->m_flags |= M_EXT;
560		n->m_ext = m->m_ext;
561		MEXT_ADD_REF(m);
562		n->m_data = m->m_data + len;
563	} else {
564		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
565	}
566	n->m_len = remain;
567	m->m_len = len;
568	n->m_next = m->m_next;
569	m->m_next = NULL;
570	return (n);
571}
572/*
573 * Routine to copy from device local memory into mbufs.
574 * Note that `off' argument is offset into first mbuf of target chain from
575 * which to begin copying the data to.
576 */
577struct mbuf *
578m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
579	 void (*copy)(char *from, caddr_t to, u_int len))
580{
581	struct mbuf *m;
582	struct mbuf *top = 0, **mp = &top;
583	int len;
584
585	if (off < 0 || off > MHLEN)
586		return (NULL);
587
588	MGETHDR(m, M_DONTWAIT, MT_DATA);
589	if (m == NULL)
590		return (NULL);
591	m->m_pkthdr.rcvif = ifp;
592	m->m_pkthdr.len = totlen;
593	len = MHLEN;
594
595	while (totlen > 0) {
596		if (top) {
597			MGET(m, M_DONTWAIT, MT_DATA);
598			if (m == NULL) {
599				m_freem(top);
600				return (NULL);
601			}
602			len = MLEN;
603		}
604		if (totlen + off >= MINCLSIZE) {
605			MCLGET(m, M_DONTWAIT);
606			if (m->m_flags & M_EXT)
607				len = MCLBYTES;
608		} else {
609			/*
610			 * Place initial small packet/header at end of mbuf.
611			 */
612			if (top == NULL && totlen + off + max_linkhdr <= len) {
613				m->m_data += max_linkhdr;
614				len -= max_linkhdr;
615			}
616		}
617		if (off) {
618			m->m_data += off;
619			len -= off;
620			off = 0;
621		}
622		m->m_len = len = min(totlen, len);
623		if (copy)
624			copy(buf, mtod(m, caddr_t), (unsigned)len);
625		else
626			bcopy(buf, mtod(m, caddr_t), (unsigned)len);
627		buf += len;
628		*mp = m;
629		mp = &m->m_next;
630		totlen -= len;
631	}
632	return (top);
633}
634
635/*
636 * Copy data from a buffer back into the indicated mbuf chain,
637 * starting "off" bytes from the beginning, extending the mbuf
638 * chain if necessary.
639 */
640void
641m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
642{
643	int mlen;
644	struct mbuf *m = m0, *n;
645	int totlen = 0;
646
647	if (m0 == NULL)
648		return;
649	while (off > (mlen = m->m_len)) {
650		off -= mlen;
651		totlen += mlen;
652		if (m->m_next == NULL) {
653			n = m_get_clrd(M_DONTWAIT, m->m_type);
654			if (n == NULL)
655				goto out;
656			n->m_len = min(MLEN, len + off);
657			m->m_next = n;
658		}
659		m = m->m_next;
660	}
661	while (len > 0) {
662		mlen = min (m->m_len - off, len);
663		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
664		cp += mlen;
665		len -= mlen;
666		mlen += off;
667		off = 0;
668		totlen += mlen;
669		if (len == 0)
670			break;
671		if (m->m_next == NULL) {
672			n = m_get(M_DONTWAIT, m->m_type);
673			if (n == NULL)
674				break;
675			n->m_len = min(MLEN, len);
676			m->m_next = n;
677		}
678		m = m->m_next;
679	}
680out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
681		m->m_pkthdr.len = totlen;
682}
683
684void
685m_print(const struct mbuf *m)
686{
687	int len;
688	const struct mbuf *m2;
689
690	len = m->m_pkthdr.len;
691	m2 = m;
692	while (len) {
693		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
694		len -= m2->m_len;
695		m2 = m2->m_next;
696	}
697	return;
698}
699