uipc_mbuf.c revision 15543
1/*
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34 * $Id: uipc_mbuf.c,v 1.17 1995/12/14 08:32:06 phk Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/malloc.h>
41#define MBTYPES
42#include <sys/mbuf.h>
43#include <sys/kernel.h>
44#include <sys/syslog.h>
45#include <sys/domain.h>
46#include <sys/protosw.h>
47
48#include <vm/vm.h>
49#include <vm/vm_param.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_extern.h>
52
53static void mbinit __P((void *));
54SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
55
56struct mbuf *mbutl;
57char	*mclrefcnt;
58struct mbstat mbstat;
59union mcluster *mclfree;
60int	max_linkhdr;
61int	max_protohdr;
62int	max_hdr;
63int	max_datalen;
64
65static void	m_reclaim __P((void));
66
67/* ARGSUSED*/
68static void
69mbinit(dummy)
70	void *dummy;
71{
72	int s;
73
74#if PAGE_SIZE < 4096
75#define NCL_INIT	(4096/PAGE_SIZE)
76#else
77#define NCL_INIT	1
78#endif
79	s = splimp();
80	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
81		goto bad;
82	splx(s);
83	return;
84bad:
85	panic("mbinit");
86}
87
88/*
89 * Allocate some number of mbuf clusters
90 * and place on cluster free list.
91 * Must be called at splimp.
92 */
93/* ARGSUSED */
94int
95m_clalloc(ncl, nowait)
96	register int ncl;
97	int nowait;
98{
99	register caddr_t p;
100	register int i;
101	int npg;
102
103	/*
104	 * Once we run out of map space, it will be impossible
105	 * to get any more (nothing is ever freed back to the
106	 * map).
107	 */
108	if (mb_map_full)
109		return (0);
110
111	npg = ncl;
112	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
113				 nowait ? M_NOWAIT : M_WAITOK);
114	/*
115	 * Either the map is now full, or this is nowait and there
116	 * are no pages left.
117	 */
118	if (p == NULL)
119		return (0);
120
121	ncl = ncl * PAGE_SIZE / MCLBYTES;
122	for (i = 0; i < ncl; i++) {
123		((union mcluster *)p)->mcl_next = mclfree;
124		mclfree = (union mcluster *)p;
125		p += MCLBYTES;
126		mbstat.m_clfree++;
127	}
128	mbstat.m_clusters += ncl;
129	return (1);
130}
131
132/*
133 * When MGET failes, ask protocols to free space when short of memory,
134 * then re-attempt to allocate an mbuf.
135 */
136struct mbuf *
137m_retry(i, t)
138	int i, t;
139{
140	register struct mbuf *m;
141
142	m_reclaim();
143#define m_retry(i, t)	(struct mbuf *)0
144	MGET(m, i, t);
145#undef m_retry
146	if (m != NULL)
147		mbstat.m_wait++;
148	else
149		mbstat.m_drops++;
150	return (m);
151}
152
153/*
154 * As above; retry an MGETHDR.
155 */
156struct mbuf *
157m_retryhdr(i, t)
158	int i, t;
159{
160	register struct mbuf *m;
161
162	m_reclaim();
163#define m_retryhdr(i, t) (struct mbuf *)0
164	MGETHDR(m, i, t);
165#undef m_retryhdr
166	if (m != NULL)
167		mbstat.m_wait++;
168	else
169		mbstat.m_drops++;
170	return (m);
171}
172
173static void
174m_reclaim()
175{
176	register struct domain *dp;
177	register struct protosw *pr;
178	int s = splimp();
179
180	for (dp = domains; dp; dp = dp->dom_next)
181		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
182			if (pr->pr_drain)
183				(*pr->pr_drain)();
184	splx(s);
185	mbstat.m_drain++;
186}
187
188/*
189 * Space allocation routines.
190 * These are also available as macros
191 * for critical paths.
192 */
193struct mbuf *
194m_get(nowait, type)
195	int nowait, type;
196{
197	register struct mbuf *m;
198
199	MGET(m, nowait, type);
200	return (m);
201}
202
203struct mbuf *
204m_gethdr(nowait, type)
205	int nowait, type;
206{
207	register struct mbuf *m;
208
209	MGETHDR(m, nowait, type);
210	return (m);
211}
212
213struct mbuf *
214m_getclr(nowait, type)
215	int nowait, type;
216{
217	register struct mbuf *m;
218
219	MGET(m, nowait, type);
220	if (m == 0)
221		return (0);
222	bzero(mtod(m, caddr_t), MLEN);
223	return (m);
224}
225
226struct mbuf *
227m_free(m)
228	struct mbuf *m;
229{
230	register struct mbuf *n;
231
232	MFREE(m, n);
233	return (n);
234}
235
236void
237m_freem(m)
238	register struct mbuf *m;
239{
240	register struct mbuf *n;
241
242	if (m == NULL)
243		return;
244	do {
245		MFREE(m, n);
246		m = n;
247	} while (m);
248}
249
250/*
251 * Mbuffer utility routines.
252 */
253
254/*
255 * Lesser-used path for M_PREPEND:
256 * allocate new mbuf to prepend to chain,
257 * copy junk along.
258 */
259struct mbuf *
260m_prepend(m, len, how)
261	register struct mbuf *m;
262	int len, how;
263{
264	struct mbuf *mn;
265
266	MGET(mn, how, m->m_type);
267	if (mn == (struct mbuf *)NULL) {
268		m_freem(m);
269		return ((struct mbuf *)NULL);
270	}
271	if (m->m_flags & M_PKTHDR) {
272		M_COPY_PKTHDR(mn, m);
273		m->m_flags &= ~M_PKTHDR;
274	}
275	mn->m_next = m;
276	m = mn;
277	if (len < MHLEN)
278		MH_ALIGN(m, len);
279	m->m_len = len;
280	return (m);
281}
282
283/*
284 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
285 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
286 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
287 */
288static int MCFail;
289
290struct mbuf *
291m_copym(m, off0, len, wait)
292	register struct mbuf *m;
293	int off0, wait;
294	register int len;
295{
296	register struct mbuf *n, **np;
297	register int off = off0;
298	struct mbuf *top;
299	int copyhdr = 0;
300
301	if (off < 0 || len < 0)
302		panic("m_copym");
303	if (off == 0 && m->m_flags & M_PKTHDR)
304		copyhdr = 1;
305	while (off > 0) {
306		if (m == 0)
307			panic("m_copym");
308		if (off < m->m_len)
309			break;
310		off -= m->m_len;
311		m = m->m_next;
312	}
313	np = &top;
314	top = 0;
315	while (len > 0) {
316		if (m == 0) {
317			if (len != M_COPYALL)
318				panic("m_copym");
319			break;
320		}
321		MGET(n, wait, m->m_type);
322		*np = n;
323		if (n == 0)
324			goto nospace;
325		if (copyhdr) {
326			M_COPY_PKTHDR(n, m);
327			if (len == M_COPYALL)
328				n->m_pkthdr.len -= off0;
329			else
330				n->m_pkthdr.len = len;
331			copyhdr = 0;
332		}
333		n->m_len = min(len, m->m_len - off);
334		if (m->m_flags & M_EXT) {
335			n->m_data = m->m_data + off;
336			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
337			n->m_ext = m->m_ext;
338			n->m_flags |= M_EXT;
339		} else
340			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
341			    (unsigned)n->m_len);
342		if (len != M_COPYALL)
343			len -= n->m_len;
344		off = 0;
345		m = m->m_next;
346		np = &n->m_next;
347	}
348	if (top == 0)
349		MCFail++;
350	return (top);
351nospace:
352	m_freem(top);
353	MCFail++;
354	return (0);
355}
356
357/*
358 * Copy data from an mbuf chain starting "off" bytes from the beginning,
359 * continuing for "len" bytes, into the indicated buffer.
360 */
361void
362m_copydata(m, off, len, cp)
363	register struct mbuf *m;
364	register int off;
365	register int len;
366	caddr_t cp;
367{
368	register unsigned count;
369
370	if (off < 0 || len < 0)
371		panic("m_copydata");
372	while (off > 0) {
373		if (m == 0)
374			panic("m_copydata");
375		if (off < m->m_len)
376			break;
377		off -= m->m_len;
378		m = m->m_next;
379	}
380	while (len > 0) {
381		if (m == 0)
382			panic("m_copydata");
383		count = min(m->m_len - off, len);
384		bcopy(mtod(m, caddr_t) + off, cp, count);
385		len -= count;
386		cp += count;
387		off = 0;
388		m = m->m_next;
389	}
390}
391
392/*
393 * Concatenate mbuf chain n to m.
394 * Both chains must be of the same type (e.g. MT_DATA).
395 * Any m_pkthdr is not updated.
396 */
397void
398m_cat(m, n)
399	register struct mbuf *m, *n;
400{
401	while (m->m_next)
402		m = m->m_next;
403	while (n) {
404		if (m->m_flags & M_EXT ||
405		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
406			/* just join the two chains */
407			m->m_next = n;
408			return;
409		}
410		/* splat the data from one into the other */
411		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
412		    (u_int)n->m_len);
413		m->m_len += n->m_len;
414		n = m_free(n);
415	}
416}
417
418void
419m_adj(mp, req_len)
420	struct mbuf *mp;
421	int req_len;
422{
423	register int len = req_len;
424	register struct mbuf *m;
425	register count;
426
427	if ((m = mp) == NULL)
428		return;
429	if (len >= 0) {
430		/*
431		 * Trim from head.
432		 */
433		while (m != NULL && len > 0) {
434			if (m->m_len <= len) {
435				len -= m->m_len;
436				m->m_len = 0;
437				m = m->m_next;
438			} else {
439				m->m_len -= len;
440				m->m_data += len;
441				len = 0;
442			}
443		}
444		m = mp;
445		if (mp->m_flags & M_PKTHDR)
446			m->m_pkthdr.len -= (req_len - len);
447	} else {
448		/*
449		 * Trim from tail.  Scan the mbuf chain,
450		 * calculating its length and finding the last mbuf.
451		 * If the adjustment only affects this mbuf, then just
452		 * adjust and return.  Otherwise, rescan and truncate
453		 * after the remaining size.
454		 */
455		len = -len;
456		count = 0;
457		for (;;) {
458			count += m->m_len;
459			if (m->m_next == (struct mbuf *)0)
460				break;
461			m = m->m_next;
462		}
463		if (m->m_len >= len) {
464			m->m_len -= len;
465			if (mp->m_flags & M_PKTHDR)
466				mp->m_pkthdr.len -= len;
467			return;
468		}
469		count -= len;
470		if (count < 0)
471			count = 0;
472		/*
473		 * Correct length for chain is "count".
474		 * Find the mbuf with last data, adjust its length,
475		 * and toss data from remaining mbufs on chain.
476		 */
477		m = mp;
478		if (m->m_flags & M_PKTHDR)
479			m->m_pkthdr.len = count;
480		for (; m; m = m->m_next) {
481			if (m->m_len >= count) {
482				m->m_len = count;
483				break;
484			}
485			count -= m->m_len;
486		}
487		while (m->m_next)
488			(m = m->m_next) ->m_len = 0;
489	}
490}
491
492/*
493 * Rearange an mbuf chain so that len bytes are contiguous
494 * and in the data area of an mbuf (so that mtod and dtom
495 * will work for a structure of size len).  Returns the resulting
496 * mbuf chain on success, frees it and returns null on failure.
497 * If there is room, it will add up to max_protohdr-len extra bytes to the
498 * contiguous region in an attempt to avoid being called next time.
499 */
500static int MPFail;
501
502struct mbuf *
503m_pullup(n, len)
504	register struct mbuf *n;
505	int len;
506{
507	register struct mbuf *m;
508	register int count;
509	int space;
510
511	/*
512	 * If first mbuf has no cluster, and has room for len bytes
513	 * without shifting current data, pullup into it,
514	 * otherwise allocate a new mbuf to prepend to the chain.
515	 */
516	if ((n->m_flags & M_EXT) == 0 &&
517	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
518		if (n->m_len >= len)
519			return (n);
520		m = n;
521		n = n->m_next;
522		len -= m->m_len;
523	} else {
524		if (len > MHLEN)
525			goto bad;
526		MGET(m, M_DONTWAIT, n->m_type);
527		if (m == 0)
528			goto bad;
529		m->m_len = 0;
530		if (n->m_flags & M_PKTHDR) {
531			M_COPY_PKTHDR(m, n);
532			n->m_flags &= ~M_PKTHDR;
533		}
534	}
535	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
536	do {
537		count = min(min(max(len, max_protohdr), space), n->m_len);
538		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
539		  (unsigned)count);
540		len -= count;
541		m->m_len += count;
542		n->m_len -= count;
543		space -= count;
544		if (n->m_len)
545			n->m_data += count;
546		else
547			n = m_free(n);
548	} while (len > 0 && n);
549	if (len > 0) {
550		(void) m_free(m);
551		goto bad;
552	}
553	m->m_next = n;
554	return (m);
555bad:
556	m_freem(n);
557	MPFail++;
558	return (0);
559}
560
561/*
562 * Partition an mbuf chain in two pieces, returning the tail --
563 * all but the first len0 bytes.  In case of failure, it returns NULL and
564 * attempts to restore the chain to its original state.
565 */
566struct mbuf *
567m_split(m0, len0, wait)
568	register struct mbuf *m0;
569	int len0, wait;
570{
571	register struct mbuf *m, *n;
572	unsigned len = len0, remain;
573
574	for (m = m0; m && len > m->m_len; m = m->m_next)
575		len -= m->m_len;
576	if (m == 0)
577		return (0);
578	remain = m->m_len - len;
579	if (m0->m_flags & M_PKTHDR) {
580		MGETHDR(n, wait, m0->m_type);
581		if (n == 0)
582			return (0);
583		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
584		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
585		m0->m_pkthdr.len = len0;
586		if (m->m_flags & M_EXT)
587			goto extpacket;
588		if (remain > MHLEN) {
589			/* m can't be the lead packet */
590			MH_ALIGN(n, 0);
591			n->m_next = m_split(m, len, wait);
592			if (n->m_next == 0) {
593				(void) m_free(n);
594				return (0);
595			} else
596				return (n);
597		} else
598			MH_ALIGN(n, remain);
599	} else if (remain == 0) {
600		n = m->m_next;
601		m->m_next = 0;
602		return (n);
603	} else {
604		MGET(n, wait, m->m_type);
605		if (n == 0)
606			return (0);
607		M_ALIGN(n, remain);
608	}
609extpacket:
610	if (m->m_flags & M_EXT) {
611		n->m_flags |= M_EXT;
612		n->m_ext = m->m_ext;
613		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
614		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
615		n->m_data = m->m_data + len;
616	} else {
617		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
618	}
619	n->m_len = remain;
620	m->m_len = len;
621	n->m_next = m->m_next;
622	m->m_next = 0;
623	return (n);
624}
625/*
626 * Routine to copy from device local memory into mbufs.
627 */
628struct mbuf *
629m_devget(buf, totlen, off0, ifp, copy)
630	char *buf;
631	int totlen, off0;
632	struct ifnet *ifp;
633	void (*copy) __P((char *from, caddr_t to, u_int len));
634{
635	register struct mbuf *m;
636	struct mbuf *top = 0, **mp = &top;
637	register int off = off0, len;
638	register char *cp;
639	char *epkt;
640
641	cp = buf;
642	epkt = cp + totlen;
643	if (off) {
644		cp += off + 2 * sizeof(u_short);
645		totlen -= 2 * sizeof(u_short);
646	}
647	MGETHDR(m, M_DONTWAIT, MT_DATA);
648	if (m == 0)
649		return (0);
650	m->m_pkthdr.rcvif = ifp;
651	m->m_pkthdr.len = totlen;
652	m->m_len = MHLEN;
653
654	while (totlen > 0) {
655		if (top) {
656			MGET(m, M_DONTWAIT, MT_DATA);
657			if (m == 0) {
658				m_freem(top);
659				return (0);
660			}
661			m->m_len = MLEN;
662		}
663		len = min(totlen, epkt - cp);
664		if (len >= MINCLSIZE) {
665			MCLGET(m, M_DONTWAIT);
666			if (m->m_flags & M_EXT)
667				m->m_len = len = min(len, MCLBYTES);
668			else
669				len = m->m_len;
670		} else {
671			/*
672			 * Place initial small packet/header at end of mbuf.
673			 */
674			if (len < m->m_len) {
675				if (top == 0 && len + max_linkhdr <= m->m_len)
676					m->m_data += max_linkhdr;
677				m->m_len = len;
678			} else
679				len = m->m_len;
680		}
681		if (copy)
682			copy(cp, mtod(m, caddr_t), (unsigned)len);
683		else
684			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
685		cp += len;
686		*mp = m;
687		mp = &m->m_next;
688		totlen -= len;
689		if (cp == epkt)
690			cp = buf;
691	}
692	return (top);
693}
694
695/*
696 * Copy data from a buffer back into the indicated mbuf chain,
697 * starting "off" bytes from the beginning, extending the mbuf
698 * chain if necessary.
699 */
700void
701m_copyback(m0, off, len, cp)
702	struct	mbuf *m0;
703	register int off;
704	register int len;
705	caddr_t cp;
706{
707	register int mlen;
708	register struct mbuf *m = m0, *n;
709	int totlen = 0;
710
711	if (m0 == 0)
712		return;
713	while (off > (mlen = m->m_len)) {
714		off -= mlen;
715		totlen += mlen;
716		if (m->m_next == 0) {
717			n = m_getclr(M_DONTWAIT, m->m_type);
718			if (n == 0)
719				goto out;
720			n->m_len = min(MLEN, len + off);
721			m->m_next = n;
722		}
723		m = m->m_next;
724	}
725	while (len > 0) {
726		mlen = min (m->m_len - off, len);
727		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
728		cp += mlen;
729		len -= mlen;
730		mlen += off;
731		off = 0;
732		totlen += mlen;
733		if (len == 0)
734			break;
735		if (m->m_next == 0) {
736			n = m_get(M_DONTWAIT, m->m_type);
737			if (n == 0)
738				break;
739			n->m_len = min(MLEN, len);
740			m->m_next = n;
741		}
742		m = m->m_next;
743	}
744out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
745		m->m_pkthdr.len = totlen;
746}
747