uipc_mbuf.c revision 12662
144743Smarkm/*
244743Smarkm * Copyright (c) 1982, 1986, 1988, 1991, 1993
344743Smarkm *	The Regents of the University of California.  All rights reserved.
444743Smarkm *
544743Smarkm * Redistribution and use in source and binary forms, with or without
656977Sshin * modification, are permitted provided that the following conditions
756977Sshin * are met:
844743Smarkm * 1. Redistributions of source code must retain the above copyright
944743Smarkm *    notice, this list of conditions and the following disclaimer.
1044743Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1144743Smarkm *    notice, this list of conditions and the following disclaimer in the
1244743Smarkm *    documentation and/or other materials provided with the distribution.
1344743Smarkm * 3. All advertising materials mentioning features or use of this software
1444743Smarkm *    must display the following acknowledgement:
1544743Smarkm *	This product includes software developed by the University of
1656977Sshin *	California, Berkeley and its contributors.
1756977Sshin * 4. Neither the name of the University nor the names of its contributors
1856977Sshin *    may be used to endorse or promote products derived from this software
1944743Smarkm *    without specific prior written permission.
2044743Smarkm *
2144743Smarkm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2244743Smarkm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2344743Smarkm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2444743Smarkm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2544743Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2644743Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2744743Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2844743Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2944743Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3044743Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3144743Smarkm * SUCH DAMAGE.
3244743Smarkm *
3344743Smarkm *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
3444743Smarkm * $Id: uipc_mbuf.c,v 1.15 1995/12/02 18:58:42 bde Exp $
3544743Smarkm */
3644743Smarkm
37210386Srpaulo#include <sys/param.h>
3844743Smarkm#include <sys/systm.h>
3944743Smarkm#include <sys/proc.h>
4044743Smarkm#include <sys/malloc.h>
4144743Smarkm#define MBTYPES
4244743Smarkm#include <sys/mbuf.h>
4344743Smarkm#include <sys/kernel.h>
4444743Smarkm#include <sys/syslog.h>
4544743Smarkm#include <sys/domain.h>
4644743Smarkm#include <sys/protosw.h>
4744743Smarkm
4844743Smarkm#include <vm/vm.h>
4944743Smarkm#include <vm/vm_param.h>
5056977Sshin#include <vm/vm_kern.h>
5156977Sshin#include <vm/vm_extern.h>
5256977Sshin
5344743Smarkmstatic void mbinit __P((void *));
5456977SshinSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
5556977Sshin
5656977Sshinstruct mbuf *mbutl;
5756977Sshinchar	*mclrefcnt;
5856977Sshinstruct mbstat mbstat;
5956977Sshinunion mcluster *mclfree;
6056977Sshinint	max_linkhdr;
6156977Sshinint	max_protohdr;
6256977Sshinint	max_hdr;
6356977Sshinint	max_datalen;
6456977Sshin
6556977Sshin/* ARGSUSED*/
6656977Sshinstatic void
6744743Smarkmmbinit(dummy)
6844743Smarkm	void *dummy;
6944743Smarkm{
7044743Smarkm	int s;
7144743Smarkm
7244743Smarkm#if CLBYTES < 4096
7344743Smarkm#define NCL_INIT	(4096/CLBYTES)
7444743Smarkm#else
7544743Smarkm#define NCL_INIT	1
7644743Smarkm#endif
7744743Smarkm	s = splimp();
7844743Smarkm	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
7944743Smarkm		goto bad;
8044743Smarkm	splx(s);
8144743Smarkm	return;
8244743Smarkmbad:
8344743Smarkm	panic("mbinit");
8444743Smarkm}
8544743Smarkm
8644743Smarkm/*
8744743Smarkm * Allocate some number of mbuf clusters
8844743Smarkm * and place on cluster free list.
8944743Smarkm * Must be called at splimp.
9044743Smarkm */
9144743Smarkm/* ARGSUSED */
9244743Smarkmint
9344743Smarkmm_clalloc(ncl, nowait)
9444743Smarkm	register int ncl;
9544743Smarkm	int nowait;
9644743Smarkm{
9744743Smarkm	register caddr_t p;
9844743Smarkm	register int i;
9944743Smarkm	int npg;
10044743Smarkm
10144743Smarkm	/*
10244743Smarkm	 * Once we run out of map space, it will be impossible
10344743Smarkm	 * to get any more (nothing is ever freed back to the
10444743Smarkm	 * map).
10544743Smarkm	 */
10644743Smarkm	if (mb_map_full)
10744743Smarkm		return (0);
10844743Smarkm
10944743Smarkm	npg = ncl * CLSIZE;
11044743Smarkm	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
11144743Smarkm				 nowait ? M_NOWAIT : M_WAITOK);
11244743Smarkm	/*
11344743Smarkm	 * Either the map is now full, or this is nowait and there
11444743Smarkm	 * are no pages left.
11544743Smarkm	 */
11644743Smarkm	if (p == NULL)
11744743Smarkm		return (0);
11844743Smarkm
11944743Smarkm	ncl = ncl * CLBYTES / MCLBYTES;
12044743Smarkm	for (i = 0; i < ncl; i++) {
12144743Smarkm		((union mcluster *)p)->mcl_next = mclfree;
12244743Smarkm		mclfree = (union mcluster *)p;
12344743Smarkm		p += MCLBYTES;
12444743Smarkm		mbstat.m_clfree++;
12544743Smarkm	}
12644743Smarkm	mbstat.m_clusters += ncl;
12744743Smarkm	return (1);
12844743Smarkm}
12944743Smarkm
13044743Smarkm/*
13144743Smarkm * When MGET failes, ask protocols to free space when short of memory,
132 * then re-attempt to allocate an mbuf.
133 */
134struct mbuf *
135m_retry(i, t)
136	int i, t;
137{
138	register struct mbuf *m;
139
140	m_reclaim();
141#define m_retry(i, t)	(struct mbuf *)0
142	MGET(m, i, t);
143#undef m_retry
144	if (m != NULL)
145		mbstat.m_wait++;
146	else
147		mbstat.m_drops++;
148	return (m);
149}
150
151/*
152 * As above; retry an MGETHDR.
153 */
154struct mbuf *
155m_retryhdr(i, t)
156	int i, t;
157{
158	register struct mbuf *m;
159
160	m_reclaim();
161#define m_retryhdr(i, t) (struct mbuf *)0
162	MGETHDR(m, i, t);
163#undef m_retryhdr
164	if (m != NULL)
165		mbstat.m_wait++;
166	else
167		mbstat.m_drops++;
168	return (m);
169}
170
171void
172m_reclaim()
173{
174	register struct domain *dp;
175	register struct protosw *pr;
176	int s = splimp();
177
178	for (dp = domains; dp; dp = dp->dom_next)
179		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
180			if (pr->pr_drain)
181				(*pr->pr_drain)();
182	splx(s);
183	mbstat.m_drain++;
184}
185
186/*
187 * Space allocation routines.
188 * These are also available as macros
189 * for critical paths.
190 */
191struct mbuf *
192m_get(nowait, type)
193	int nowait, type;
194{
195	register struct mbuf *m;
196
197	MGET(m, nowait, type);
198	return (m);
199}
200
201struct mbuf *
202m_gethdr(nowait, type)
203	int nowait, type;
204{
205	register struct mbuf *m;
206
207	MGETHDR(m, nowait, type);
208	return (m);
209}
210
211struct mbuf *
212m_getclr(nowait, type)
213	int nowait, type;
214{
215	register struct mbuf *m;
216
217	MGET(m, nowait, type);
218	if (m == 0)
219		return (0);
220	bzero(mtod(m, caddr_t), MLEN);
221	return (m);
222}
223
224struct mbuf *
225m_free(m)
226	struct mbuf *m;
227{
228	register struct mbuf *n;
229
230	MFREE(m, n);
231	return (n);
232}
233
234void
235m_freem(m)
236	register struct mbuf *m;
237{
238	register struct mbuf *n;
239
240	if (m == NULL)
241		return;
242	do {
243		MFREE(m, n);
244		m = n;
245	} while (m);
246}
247
248/*
249 * Mbuffer utility routines.
250 */
251
252/*
253 * Lesser-used path for M_PREPEND:
254 * allocate new mbuf to prepend to chain,
255 * copy junk along.
256 */
257struct mbuf *
258m_prepend(m, len, how)
259	register struct mbuf *m;
260	int len, how;
261{
262	struct mbuf *mn;
263
264	MGET(mn, how, m->m_type);
265	if (mn == (struct mbuf *)NULL) {
266		m_freem(m);
267		return ((struct mbuf *)NULL);
268	}
269	if (m->m_flags & M_PKTHDR) {
270		M_COPY_PKTHDR(mn, m);
271		m->m_flags &= ~M_PKTHDR;
272	}
273	mn->m_next = m;
274	m = mn;
275	if (len < MHLEN)
276		MH_ALIGN(m, len);
277	m->m_len = len;
278	return (m);
279}
280
281/*
282 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
283 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
284 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
285 */
286int MCFail;
287
288struct mbuf *
289m_copym(m, off0, len, wait)
290	register struct mbuf *m;
291	int off0, wait;
292	register int len;
293{
294	register struct mbuf *n, **np;
295	register int off = off0;
296	struct mbuf *top;
297	int copyhdr = 0;
298
299	if (off < 0 || len < 0)
300		panic("m_copym");
301	if (off == 0 && m->m_flags & M_PKTHDR)
302		copyhdr = 1;
303	while (off > 0) {
304		if (m == 0)
305			panic("m_copym");
306		if (off < m->m_len)
307			break;
308		off -= m->m_len;
309		m = m->m_next;
310	}
311	np = &top;
312	top = 0;
313	while (len > 0) {
314		if (m == 0) {
315			if (len != M_COPYALL)
316				panic("m_copym");
317			break;
318		}
319		MGET(n, wait, m->m_type);
320		*np = n;
321		if (n == 0)
322			goto nospace;
323		if (copyhdr) {
324			M_COPY_PKTHDR(n, m);
325			if (len == M_COPYALL)
326				n->m_pkthdr.len -= off0;
327			else
328				n->m_pkthdr.len = len;
329			copyhdr = 0;
330		}
331		n->m_len = min(len, m->m_len - off);
332		if (m->m_flags & M_EXT) {
333			n->m_data = m->m_data + off;
334			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
335			n->m_ext = m->m_ext;
336			n->m_flags |= M_EXT;
337		} else
338			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
339			    (unsigned)n->m_len);
340		if (len != M_COPYALL)
341			len -= n->m_len;
342		off = 0;
343		m = m->m_next;
344		np = &n->m_next;
345	}
346	if (top == 0)
347		MCFail++;
348	return (top);
349nospace:
350	m_freem(top);
351	MCFail++;
352	return (0);
353}
354
355/*
356 * Copy data from an mbuf chain starting "off" bytes from the beginning,
357 * continuing for "len" bytes, into the indicated buffer.
358 */
359void
360m_copydata(m, off, len, cp)
361	register struct mbuf *m;
362	register int off;
363	register int len;
364	caddr_t cp;
365{
366	register unsigned count;
367
368	if (off < 0 || len < 0)
369		panic("m_copydata");
370	while (off > 0) {
371		if (m == 0)
372			panic("m_copydata");
373		if (off < m->m_len)
374			break;
375		off -= m->m_len;
376		m = m->m_next;
377	}
378	while (len > 0) {
379		if (m == 0)
380			panic("m_copydata");
381		count = min(m->m_len - off, len);
382		bcopy(mtod(m, caddr_t) + off, cp, count);
383		len -= count;
384		cp += count;
385		off = 0;
386		m = m->m_next;
387	}
388}
389
390/*
391 * Concatenate mbuf chain n to m.
392 * Both chains must be of the same type (e.g. MT_DATA).
393 * Any m_pkthdr is not updated.
394 */
395void
396m_cat(m, n)
397	register struct mbuf *m, *n;
398{
399	while (m->m_next)
400		m = m->m_next;
401	while (n) {
402		if (m->m_flags & M_EXT ||
403		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
404			/* just join the two chains */
405			m->m_next = n;
406			return;
407		}
408		/* splat the data from one into the other */
409		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
410		    (u_int)n->m_len);
411		m->m_len += n->m_len;
412		n = m_free(n);
413	}
414}
415
416void
417m_adj(mp, req_len)
418	struct mbuf *mp;
419	int req_len;
420{
421	register int len = req_len;
422	register struct mbuf *m;
423	register count;
424
425	if ((m = mp) == NULL)
426		return;
427	if (len >= 0) {
428		/*
429		 * Trim from head.
430		 */
431		while (m != NULL && len > 0) {
432			if (m->m_len <= len) {
433				len -= m->m_len;
434				m->m_len = 0;
435				m = m->m_next;
436			} else {
437				m->m_len -= len;
438				m->m_data += len;
439				len = 0;
440			}
441		}
442		m = mp;
443		if (mp->m_flags & M_PKTHDR)
444			m->m_pkthdr.len -= (req_len - len);
445	} else {
446		/*
447		 * Trim from tail.  Scan the mbuf chain,
448		 * calculating its length and finding the last mbuf.
449		 * If the adjustment only affects this mbuf, then just
450		 * adjust and return.  Otherwise, rescan and truncate
451		 * after the remaining size.
452		 */
453		len = -len;
454		count = 0;
455		for (;;) {
456			count += m->m_len;
457			if (m->m_next == (struct mbuf *)0)
458				break;
459			m = m->m_next;
460		}
461		if (m->m_len >= len) {
462			m->m_len -= len;
463			if (mp->m_flags & M_PKTHDR)
464				mp->m_pkthdr.len -= len;
465			return;
466		}
467		count -= len;
468		if (count < 0)
469			count = 0;
470		/*
471		 * Correct length for chain is "count".
472		 * Find the mbuf with last data, adjust its length,
473		 * and toss data from remaining mbufs on chain.
474		 */
475		m = mp;
476		if (m->m_flags & M_PKTHDR)
477			m->m_pkthdr.len = count;
478		for (; m; m = m->m_next) {
479			if (m->m_len >= count) {
480				m->m_len = count;
481				break;
482			}
483			count -= m->m_len;
484		}
485		while (m->m_next)
486			(m = m->m_next) ->m_len = 0;
487	}
488}
489
490/*
491 * Rearange an mbuf chain so that len bytes are contiguous
492 * and in the data area of an mbuf (so that mtod and dtom
493 * will work for a structure of size len).  Returns the resulting
494 * mbuf chain on success, frees it and returns null on failure.
495 * If there is room, it will add up to max_protohdr-len extra bytes to the
496 * contiguous region in an attempt to avoid being called next time.
497 */
498int MPFail;
499
500struct mbuf *
501m_pullup(n, len)
502	register struct mbuf *n;
503	int len;
504{
505	register struct mbuf *m;
506	register int count;
507	int space;
508
509	/*
510	 * If first mbuf has no cluster, and has room for len bytes
511	 * without shifting current data, pullup into it,
512	 * otherwise allocate a new mbuf to prepend to the chain.
513	 */
514	if ((n->m_flags & M_EXT) == 0 &&
515	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
516		if (n->m_len >= len)
517			return (n);
518		m = n;
519		n = n->m_next;
520		len -= m->m_len;
521	} else {
522		if (len > MHLEN)
523			goto bad;
524		MGET(m, M_DONTWAIT, n->m_type);
525		if (m == 0)
526			goto bad;
527		m->m_len = 0;
528		if (n->m_flags & M_PKTHDR) {
529			M_COPY_PKTHDR(m, n);
530			n->m_flags &= ~M_PKTHDR;
531		}
532	}
533	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
534	do {
535		count = min(min(max(len, max_protohdr), space), n->m_len);
536		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
537		  (unsigned)count);
538		len -= count;
539		m->m_len += count;
540		n->m_len -= count;
541		space -= count;
542		if (n->m_len)
543			n->m_data += count;
544		else
545			n = m_free(n);
546	} while (len > 0 && n);
547	if (len > 0) {
548		(void) m_free(m);
549		goto bad;
550	}
551	m->m_next = n;
552	return (m);
553bad:
554	m_freem(n);
555	MPFail++;
556	return (0);
557}
558
559/*
560 * Partition an mbuf chain in two pieces, returning the tail --
561 * all but the first len0 bytes.  In case of failure, it returns NULL and
562 * attempts to restore the chain to its original state.
563 */
564struct mbuf *
565m_split(m0, len0, wait)
566	register struct mbuf *m0;
567	int len0, wait;
568{
569	register struct mbuf *m, *n;
570	unsigned len = len0, remain;
571
572	for (m = m0; m && len > m->m_len; m = m->m_next)
573		len -= m->m_len;
574	if (m == 0)
575		return (0);
576	remain = m->m_len - len;
577	if (m0->m_flags & M_PKTHDR) {
578		MGETHDR(n, wait, m0->m_type);
579		if (n == 0)
580			return (0);
581		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
582		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
583		m0->m_pkthdr.len = len0;
584		if (m->m_flags & M_EXT)
585			goto extpacket;
586		if (remain > MHLEN) {
587			/* m can't be the lead packet */
588			MH_ALIGN(n, 0);
589			n->m_next = m_split(m, len, wait);
590			if (n->m_next == 0) {
591				(void) m_free(n);
592				return (0);
593			} else
594				return (n);
595		} else
596			MH_ALIGN(n, remain);
597	} else if (remain == 0) {
598		n = m->m_next;
599		m->m_next = 0;
600		return (n);
601	} else {
602		MGET(n, wait, m->m_type);
603		if (n == 0)
604			return (0);
605		M_ALIGN(n, remain);
606	}
607extpacket:
608	if (m->m_flags & M_EXT) {
609		n->m_flags |= M_EXT;
610		n->m_ext = m->m_ext;
611		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
612		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
613		n->m_data = m->m_data + len;
614	} else {
615		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
616	}
617	n->m_len = remain;
618	m->m_len = len;
619	n->m_next = m->m_next;
620	m->m_next = 0;
621	return (n);
622}
623/*
624 * Routine to copy from device local memory into mbufs.
625 */
626struct mbuf *
627m_devget(buf, totlen, off0, ifp, copy)
628	char *buf;
629	int totlen, off0;
630	struct ifnet *ifp;
631	void (*copy) __P((char *from, caddr_t to, u_int len));
632{
633	register struct mbuf *m;
634	struct mbuf *top = 0, **mp = &top;
635	register int off = off0, len;
636	register char *cp;
637	char *epkt;
638
639	cp = buf;
640	epkt = cp + totlen;
641	if (off) {
642		cp += off + 2 * sizeof(u_short);
643		totlen -= 2 * sizeof(u_short);
644	}
645	MGETHDR(m, M_DONTWAIT, MT_DATA);
646	if (m == 0)
647		return (0);
648	m->m_pkthdr.rcvif = ifp;
649	m->m_pkthdr.len = totlen;
650	m->m_len = MHLEN;
651
652	while (totlen > 0) {
653		if (top) {
654			MGET(m, M_DONTWAIT, MT_DATA);
655			if (m == 0) {
656				m_freem(top);
657				return (0);
658			}
659			m->m_len = MLEN;
660		}
661		len = min(totlen, epkt - cp);
662		if (len >= MINCLSIZE) {
663			MCLGET(m, M_DONTWAIT);
664			if (m->m_flags & M_EXT)
665				m->m_len = len = min(len, MCLBYTES);
666			else
667				len = m->m_len;
668		} else {
669			/*
670			 * Place initial small packet/header at end of mbuf.
671			 */
672			if (len < m->m_len) {
673				if (top == 0 && len + max_linkhdr <= m->m_len)
674					m->m_data += max_linkhdr;
675				m->m_len = len;
676			} else
677				len = m->m_len;
678		}
679		if (copy)
680			copy(cp, mtod(m, caddr_t), (unsigned)len);
681		else
682			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
683		cp += len;
684		*mp = m;
685		mp = &m->m_next;
686		totlen -= len;
687		if (cp == epkt)
688			cp = buf;
689	}
690	return (top);
691}
692
693/*
694 * Copy data from a buffer back into the indicated mbuf chain,
695 * starting "off" bytes from the beginning, extending the mbuf
696 * chain if necessary.
697 */
698void
699m_copyback(m0, off, len, cp)
700	struct	mbuf *m0;
701	register int off;
702	register int len;
703	caddr_t cp;
704{
705	register int mlen;
706	register struct mbuf *m = m0, *n;
707	int totlen = 0;
708
709	if (m0 == 0)
710		return;
711	while (off > (mlen = m->m_len)) {
712		off -= mlen;
713		totlen += mlen;
714		if (m->m_next == 0) {
715			n = m_getclr(M_DONTWAIT, m->m_type);
716			if (n == 0)
717				goto out;
718			n->m_len = min(MLEN, len + off);
719			m->m_next = n;
720		}
721		m = m->m_next;
722	}
723	while (len > 0) {
724		mlen = min (m->m_len - off, len);
725		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
726		cp += mlen;
727		len -= mlen;
728		mlen += off;
729		off = 0;
730		totlen += mlen;
731		if (len == 0)
732			break;
733		if (m->m_next == 0) {
734			n = m_get(M_DONTWAIT, m->m_type);
735			if (n == 0)
736				break;
737			n->m_len = min(MLEN, len);
738			m->m_next = n;
739		}
740		m = m->m_next;
741	}
742out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
743		m->m_pkthdr.len = totlen;
744}
745