uipc_mbuf.c revision 11921
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34 * $Id: uipc_mbuf.c,v 1.12 1995/09/09 18:10:12 davidg Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/malloc.h>
41#define MBTYPES
42#include <sys/mbuf.h>
43#include <sys/kernel.h>
44#include <sys/syslog.h>
45#include <sys/domain.h>
46#include <sys/protosw.h>
47
48#include <vm/vm.h>
49#include <vm/vm_kern.h>
50
51/*
52 * System initialization
53 */
54
55static void mbinit __P((void *));
56SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
57
58
59struct mbuf *mbutl;
60char	*mclrefcnt;
61struct mbstat mbstat;
62union mcluster *mclfree;
63int	max_linkhdr;
64int	max_protohdr;
65int	max_hdr;
66int	max_datalen;
67
68/* ARGSUSED*/
69static void
70mbinit(udata)
71	void *udata;		/* not used*/
72{
73	int s;
74
75#if CLBYTES < 4096
76#define NCL_INIT	(4096/CLBYTES)
77#else
78#define NCL_INIT	1
79#endif
80	s = splimp();
81	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
82		goto bad;
83	splx(s);
84	return;
85bad:
86	panic("mbinit");
87}
88
89/*
90 * Allocate some number of mbuf clusters
91 * and place on cluster free list.
92 * Must be called at splimp.
93 */
94/* ARGSUSED */
95int
96m_clalloc(ncl, nowait)
97	register int ncl;
98	int nowait;
99{
100	register caddr_t p;
101	register int i;
102	int npg;
103
104	/*
105	 * Once we run out of map space, it will be impossible
106	 * to get any more (nothing is ever freed back to the
107	 * map).
108	 */
109	if (mb_map_full)
110		return (0);
111
112	npg = ncl * CLSIZE;
113	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
114				 nowait ? M_NOWAIT : M_WAITOK);
115	/*
116	 * Either the map is now full, or this is nowait and there
117	 * are no pages left.
118	 */
119	if (p == NULL)
120		return (0);
121
122	ncl = ncl * CLBYTES / MCLBYTES;
123	for (i = 0; i < ncl; i++) {
124		((union mcluster *)p)->mcl_next = mclfree;
125		mclfree = (union mcluster *)p;
126		p += MCLBYTES;
127		mbstat.m_clfree++;
128	}
129	mbstat.m_clusters += ncl;
130	return (1);
131}
132
133/*
134 * When MGET failes, ask protocols to free space when short of memory,
135 * then re-attempt to allocate an mbuf.
136 */
137struct mbuf *
138m_retry(i, t)
139	int i, t;
140{
141	register struct mbuf *m;
142
143	m_reclaim();
144#define m_retry(i, t)	(struct mbuf *)0
145	MGET(m, i, t);
146#undef m_retry
147	if (m != NULL)
148		mbstat.m_wait++;
149	else
150		mbstat.m_drops++;
151	return (m);
152}
153
154/*
155 * As above; retry an MGETHDR.
156 */
157struct mbuf *
158m_retryhdr(i, t)
159	int i, t;
160{
161	register struct mbuf *m;
162
163	m_reclaim();
164#define m_retryhdr(i, t) (struct mbuf *)0
165	MGETHDR(m, i, t);
166#undef m_retryhdr
167	if (m != NULL)
168		mbstat.m_wait++;
169	else
170		mbstat.m_drops++;
171	return (m);
172}
173
174void
175m_reclaim()
176{
177	register struct domain *dp;
178	register struct protosw *pr;
179	int s = splimp();
180
181	for (dp = domains; dp; dp = dp->dom_next)
182		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
183			if (pr->pr_drain)
184				(*pr->pr_drain)();
185	splx(s);
186	mbstat.m_drain++;
187}
188
189/*
190 * Space allocation routines.
191 * These are also available as macros
192 * for critical paths.
193 */
194struct mbuf *
195m_get(nowait, type)
196	int nowait, type;
197{
198	register struct mbuf *m;
199
200	MGET(m, nowait, type);
201	return (m);
202}
203
204struct mbuf *
205m_gethdr(nowait, type)
206	int nowait, type;
207{
208	register struct mbuf *m;
209
210	MGETHDR(m, nowait, type);
211	return (m);
212}
213
214struct mbuf *
215m_getclr(nowait, type)
216	int nowait, type;
217{
218	register struct mbuf *m;
219
220	MGET(m, nowait, type);
221	if (m == 0)
222		return (0);
223	bzero(mtod(m, caddr_t), MLEN);
224	return (m);
225}
226
227struct mbuf *
228m_free(m)
229	struct mbuf *m;
230{
231	register struct mbuf *n;
232
233	MFREE(m, n);
234	return (n);
235}
236
237void
238m_freem(m)
239	register struct mbuf *m;
240{
241	register struct mbuf *n;
242
243	if (m == NULL)
244		return;
245	do {
246		MFREE(m, n);
247		m = n;
248	} while (m);
249}
250
251/*
252 * Mbuffer utility routines.
253 */
254
255/*
256 * Lesser-used path for M_PREPEND:
257 * allocate new mbuf to prepend to chain,
258 * copy junk along.
259 */
260struct mbuf *
261m_prepend(m, len, how)
262	register struct mbuf *m;
263	int len, how;
264{
265	struct mbuf *mn;
266
267	MGET(mn, how, m->m_type);
268	if (mn == (struct mbuf *)NULL) {
269		m_freem(m);
270		return ((struct mbuf *)NULL);
271	}
272	if (m->m_flags & M_PKTHDR) {
273		M_COPY_PKTHDR(mn, m);
274		m->m_flags &= ~M_PKTHDR;
275	}
276	mn->m_next = m;
277	m = mn;
278	if (len < MHLEN)
279		MH_ALIGN(m, len);
280	m->m_len = len;
281	return (m);
282}
283
284/*
285 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
286 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
287 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
288 */
289int MCFail;
290
291struct mbuf *
292m_copym(m, off0, len, wait)
293	register struct mbuf *m;
294	int off0, wait;
295	register int len;
296{
297	register struct mbuf *n, **np;
298	register int off = off0;
299	struct mbuf *top;
300	int copyhdr = 0;
301
302	if (off < 0 || len < 0)
303		panic("m_copym");
304	if (off == 0 && m->m_flags & M_PKTHDR)
305		copyhdr = 1;
306	while (off > 0) {
307		if (m == 0)
308			panic("m_copym");
309		if (off < m->m_len)
310			break;
311		off -= m->m_len;
312		m = m->m_next;
313	}
314	np = &top;
315	top = 0;
316	while (len > 0) {
317		if (m == 0) {
318			if (len != M_COPYALL)
319				panic("m_copym");
320			break;
321		}
322		MGET(n, wait, m->m_type);
323		*np = n;
324		if (n == 0)
325			goto nospace;
326		if (copyhdr) {
327			M_COPY_PKTHDR(n, m);
328			if (len == M_COPYALL)
329				n->m_pkthdr.len -= off0;
330			else
331				n->m_pkthdr.len = len;
332			copyhdr = 0;
333		}
334		n->m_len = min(len, m->m_len - off);
335		if (m->m_flags & M_EXT) {
336			n->m_data = m->m_data + off;
337			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
338			n->m_ext = m->m_ext;
339			n->m_flags |= M_EXT;
340		} else
341			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
342			    (unsigned)n->m_len);
343		if (len != M_COPYALL)
344			len -= n->m_len;
345		off = 0;
346		m = m->m_next;
347		np = &n->m_next;
348	}
349	if (top == 0)
350		MCFail++;
351	return (top);
352nospace:
353	m_freem(top);
354	MCFail++;
355	return (0);
356}
357
358/*
359 * Copy data from an mbuf chain starting "off" bytes from the beginning,
360 * continuing for "len" bytes, into the indicated buffer.
361 */
362void
363m_copydata(m, off, len, cp)
364	register struct mbuf *m;
365	register int off;
366	register int len;
367	caddr_t cp;
368{
369	register unsigned count;
370
371	if (off < 0 || len < 0)
372		panic("m_copydata");
373	while (off > 0) {
374		if (m == 0)
375			panic("m_copydata");
376		if (off < m->m_len)
377			break;
378		off -= m->m_len;
379		m = m->m_next;
380	}
381	while (len > 0) {
382		if (m == 0)
383			panic("m_copydata");
384		count = min(m->m_len - off, len);
385		bcopy(mtod(m, caddr_t) + off, cp, count);
386		len -= count;
387		cp += count;
388		off = 0;
389		m = m->m_next;
390	}
391}
392
393/*
394 * Concatenate mbuf chain n to m.
395 * Both chains must be of the same type (e.g. MT_DATA).
396 * Any m_pkthdr is not updated.
397 */
398void
399m_cat(m, n)
400	register struct mbuf *m, *n;
401{
402	while (m->m_next)
403		m = m->m_next;
404	while (n) {
405		if (m->m_flags & M_EXT ||
406		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
407			/* just join the two chains */
408			m->m_next = n;
409			return;
410		}
411		/* splat the data from one into the other */
412		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
413		    (u_int)n->m_len);
414		m->m_len += n->m_len;
415		n = m_free(n);
416	}
417}
418
419void
420m_adj(mp, req_len)
421	struct mbuf *mp;
422	int req_len;
423{
424	register int len = req_len;
425	register struct mbuf *m;
426	register count;
427
428	if ((m = mp) == NULL)
429		return;
430	if (len >= 0) {
431		/*
432		 * Trim from head.
433		 */
434		while (m != NULL && len > 0) {
435			if (m->m_len <= len) {
436				len -= m->m_len;
437				m->m_len = 0;
438				m = m->m_next;
439			} else {
440				m->m_len -= len;
441				m->m_data += len;
442				len = 0;
443			}
444		}
445		m = mp;
446		if (mp->m_flags & M_PKTHDR)
447			m->m_pkthdr.len -= (req_len - len);
448	} else {
449		/*
450		 * Trim from tail.  Scan the mbuf chain,
451		 * calculating its length and finding the last mbuf.
452		 * If the adjustment only affects this mbuf, then just
453		 * adjust and return.  Otherwise, rescan and truncate
454		 * after the remaining size.
455		 */
456		len = -len;
457		count = 0;
458		for (;;) {
459			count += m->m_len;
460			if (m->m_next == (struct mbuf *)0)
461				break;
462			m = m->m_next;
463		}
464		if (m->m_len >= len) {
465			m->m_len -= len;
466			if (mp->m_flags & M_PKTHDR)
467				mp->m_pkthdr.len -= len;
468			return;
469		}
470		count -= len;
471		if (count < 0)
472			count = 0;
473		/*
474		 * Correct length for chain is "count".
475		 * Find the mbuf with last data, adjust its length,
476		 * and toss data from remaining mbufs on chain.
477		 */
478		m = mp;
479		if (m->m_flags & M_PKTHDR)
480			m->m_pkthdr.len = count;
481		for (; m; m = m->m_next) {
482			if (m->m_len >= count) {
483				m->m_len = count;
484				break;
485			}
486			count -= m->m_len;
487		}
488		while (m->m_next)
489			(m = m->m_next) ->m_len = 0;
490	}
491}
492
493/*
494 * Rearange an mbuf chain so that len bytes are contiguous
495 * and in the data area of an mbuf (so that mtod and dtom
496 * will work for a structure of size len).  Returns the resulting
497 * mbuf chain on success, frees it and returns null on failure.
498 * If there is room, it will add up to max_protohdr-len extra bytes to the
499 * contiguous region in an attempt to avoid being called next time.
500 */
501int MPFail;
502
503struct mbuf *
504m_pullup(n, len)
505	register struct mbuf *n;
506	int len;
507{
508	register struct mbuf *m;
509	register int count;
510	int space;
511
512	/*
513	 * If first mbuf has no cluster, and has room for len bytes
514	 * without shifting current data, pullup into it,
515	 * otherwise allocate a new mbuf to prepend to the chain.
516	 */
517	if ((n->m_flags & M_EXT) == 0 &&
518	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
519		if (n->m_len >= len)
520			return (n);
521		m = n;
522		n = n->m_next;
523		len -= m->m_len;
524	} else {
525		if (len > MHLEN)
526			goto bad;
527		MGET(m, M_DONTWAIT, n->m_type);
528		if (m == 0)
529			goto bad;
530		m->m_len = 0;
531		if (n->m_flags & M_PKTHDR) {
532			M_COPY_PKTHDR(m, n);
533			n->m_flags &= ~M_PKTHDR;
534		}
535	}
536	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
537	do {
538		count = min(min(max(len, max_protohdr), space), n->m_len);
539		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
540		  (unsigned)count);
541		len -= count;
542		m->m_len += count;
543		n->m_len -= count;
544		space -= count;
545		if (n->m_len)
546			n->m_data += count;
547		else
548			n = m_free(n);
549	} while (len > 0 && n);
550	if (len > 0) {
551		(void) m_free(m);
552		goto bad;
553	}
554	m->m_next = n;
555	return (m);
556bad:
557	m_freem(n);
558	MPFail++;
559	return (0);
560}
561
562/*
563 * Partition an mbuf chain in two pieces, returning the tail --
564 * all but the first len0 bytes.  In case of failure, it returns NULL and
565 * attempts to restore the chain to its original state.
566 */
567struct mbuf *
568m_split(m0, len0, wait)
569	register struct mbuf *m0;
570	int len0, wait;
571{
572	register struct mbuf *m, *n;
573	unsigned len = len0, remain;
574
575	for (m = m0; m && len > m->m_len; m = m->m_next)
576		len -= m->m_len;
577	if (m == 0)
578		return (0);
579	remain = m->m_len - len;
580	if (m0->m_flags & M_PKTHDR) {
581		MGETHDR(n, wait, m0->m_type);
582		if (n == 0)
583			return (0);
584		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
585		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
586		m0->m_pkthdr.len = len0;
587		if (m->m_flags & M_EXT)
588			goto extpacket;
589		if (remain > MHLEN) {
590			/* m can't be the lead packet */
591			MH_ALIGN(n, 0);
592			n->m_next = m_split(m, len, wait);
593			if (n->m_next == 0) {
594				(void) m_free(n);
595				return (0);
596			} else
597				return (n);
598		} else
599			MH_ALIGN(n, remain);
600	} else if (remain == 0) {
601		n = m->m_next;
602		m->m_next = 0;
603		return (n);
604	} else {
605		MGET(n, wait, m->m_type);
606		if (n == 0)
607			return (0);
608		M_ALIGN(n, remain);
609	}
610extpacket:
611	if (m->m_flags & M_EXT) {
612		n->m_flags |= M_EXT;
613		n->m_ext = m->m_ext;
614		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
615		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
616		n->m_data = m->m_data + len;
617	} else {
618		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
619	}
620	n->m_len = remain;
621	m->m_len = len;
622	n->m_next = m->m_next;
623	m->m_next = 0;
624	return (n);
625}
626/*
627 * Routine to copy from device local memory into mbufs.
628 */
629struct mbuf *
630m_devget(buf, totlen, off0, ifp, copy)
631	char *buf;
632	int totlen, off0;
633	struct ifnet *ifp;
634	void (*copy)();
635{
636	register struct mbuf *m;
637	struct mbuf *top = 0, **mp = &top;
638	register int off = off0, len;
639	register char *cp;
640	char *epkt;
641
642	cp = buf;
643	epkt = cp + totlen;
644	if (off) {
645		cp += off + 2 * sizeof(u_short);
646		totlen -= 2 * sizeof(u_short);
647	}
648	MGETHDR(m, M_DONTWAIT, MT_DATA);
649	if (m == 0)
650		return (0);
651	m->m_pkthdr.rcvif = ifp;
652	m->m_pkthdr.len = totlen;
653	m->m_len = MHLEN;
654
655	while (totlen > 0) {
656		if (top) {
657			MGET(m, M_DONTWAIT, MT_DATA);
658			if (m == 0) {
659				m_freem(top);
660				return (0);
661			}
662			m->m_len = MLEN;
663		}
664		len = min(totlen, epkt - cp);
665		if (len >= MINCLSIZE) {
666			MCLGET(m, M_DONTWAIT);
667			if (m->m_flags & M_EXT)
668				m->m_len = len = min(len, MCLBYTES);
669			else
670				len = m->m_len;
671		} else {
672			/*
673			 * Place initial small packet/header at end of mbuf.
674			 */
675			if (len < m->m_len) {
676				if (top == 0 && len + max_linkhdr <= m->m_len)
677					m->m_data += max_linkhdr;
678				m->m_len = len;
679			} else
680				len = m->m_len;
681		}
682		if (copy)
683			copy(cp, mtod(m, caddr_t), (unsigned)len);
684		else
685			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
686		cp += len;
687		*mp = m;
688		mp = &m->m_next;
689		totlen -= len;
690		if (cp == epkt)
691			cp = buf;
692	}
693	return (top);
694}
695
696/*
697 * Copy data from a buffer back into the indicated mbuf chain,
698 * starting "off" bytes from the beginning, extending the mbuf
699 * chain if necessary.
700 */
701void
702m_copyback(m0, off, len, cp)
703	struct	mbuf *m0;
704	register int off;
705	register int len;
706	caddr_t cp;
707{
708	register int mlen;
709	register struct mbuf *m = m0, *n;
710	int totlen = 0;
711
712	if (m0 == 0)
713		return;
714	while (off > (mlen = m->m_len)) {
715		off -= mlen;
716		totlen += mlen;
717		if (m->m_next == 0) {
718			n = m_getclr(M_DONTWAIT, m->m_type);
719			if (n == 0)
720				goto out;
721			n->m_len = min(MLEN, len + off);
722			m->m_next = n;
723		}
724		m = m->m_next;
725	}
726	while (len > 0) {
727		mlen = min (m->m_len - off, len);
728		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
729		cp += mlen;
730		len -= mlen;
731		mlen += off;
732		off = 0;
733		totlen += mlen;
734		if (len == 0)
735			break;
736		if (m->m_next == 0) {
737			n = m_get(M_DONTWAIT, m->m_type);
738			if (n == 0)
739				break;
740			n->m_len = min(MLEN, len);
741			m->m_next = n;
742		}
743		m = m->m_next;
744	}
745out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
746		m->m_pkthdr.len = totlen;
747}
748