1/*-
2 * Copyright (c) 1982, 1986, 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
30 */
31#include <sys/kernel.h>
32#include <sys/malloc.h>
33#include <sys/mbuf.h>
34
35/*
36 * Copy data from an mbuf chain starting "off" bytes from the beginning,
37 * continuing for "len" bytes, into the indicated buffer.
38 */
39void
40m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
41{
42	u_int count;
43
44	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
45	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
46	while (off > 0) {
47		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
48		if (off < m->m_len)
49			break;
50		off -= m->m_len;
51		m = m->m_next;
52	}
53	while (len > 0) {
54		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
55		count = min(m->m_len - off, len);
56		bcopy(mtod(m, caddr_t) + off, cp, count);
57		len -= count;
58		cp += count;
59		off = 0;
60		m = m->m_next;
61	}
62}
63
64/*
65 * Concatenate mbuf chain n to m.
66 * Both chains must be of the same type (e.g. MT_DATA).
67 * Any m_pkthdr is not updated.
68 */
69void
70m_cat(struct mbuf *m, struct mbuf *n)
71{
72	while (m->m_next)
73		m = m->m_next;
74	while (n) {
75		if (m->m_flags & M_EXT ||
76		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
77			/* just join the two chains */
78			m->m_next = n;
79			return;
80		}
81		/* splat the data from one into the other */
82		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
83		    (u_int)n->m_len);
84		m->m_len += n->m_len;
85		n = m_free(n);
86	}
87}
88
89u_int
90m_length(struct mbuf *m0, struct mbuf **last)
91{
92	struct mbuf *m;
93	u_int len;
94
95	len = 0;
96	for (m = m0; m != NULL; m = m->m_next) {
97		len += m->m_len;
98		if (m->m_next == NULL)
99			break;
100	}
101	if (last != NULL)
102		*last = m;
103	return (len);
104}
105
106u_int
107m_fixhdr(struct mbuf *m0)
108{
109	u_int len;
110
111	len = m_length(m0, NULL);
112	m0->m_pkthdr.len = len;
113	return (len);
114}
115
116/*
117 * Duplicate "from"'s mbuf pkthdr in "to".
118 * "from" must have M_PKTHDR set, and "to" must be empty.
119 * In particular, this does a deep copy of the packet tags.
120 */
121int
122m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
123{
124	MBUF_CHECKSLEEP(how);
125	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
126	if ((to->m_flags & M_EXT) == 0)
127		to->m_data = to->m_pktdat;
128	to->m_pkthdr = from->m_pkthdr;
129	SLIST_INIT(&to->m_pkthdr.tags);
130	return (m_tag_copy_chain(to, from, MBTOM(how)));
131}
132
133/*
134 * Defragment a mbuf chain, returning the shortest possible
135 * chain of mbufs and clusters.  If allocation fails and
136 * this cannot be completed, NULL will be returned, but
137 * the passed in chain will be unchanged.  Upon success,
138 * the original chain will be freed, and the new chain
139 * will be returned.
140 *
141 * If a non-packet header is passed in, the original
142 * mbuf (chain?) will be returned unharmed.
143 */
144struct mbuf *
145m_defrag(struct mbuf *m0, int how)
146{
147	struct mbuf *m_new = NULL, *m_final = NULL;
148	int progress = 0, length;
149
150	MBUF_CHECKSLEEP(how);
151	if (!(m0->m_flags & M_PKTHDR))
152		return (m0);
153
154	m_fixhdr(m0); /* Needed sanity check */
155
156	if (m0->m_pkthdr.len > MHLEN)
157		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
158	else
159		m_final = m_gethdr(how, MT_DATA);
160
161	if (m_final == NULL)
162		goto nospace;
163
164	if (m_dup_pkthdr(m_final, m0, how) == 0)
165		goto nospace;
166
167	m_new = m_final;
168
169	while (progress < m0->m_pkthdr.len) {
170		length = m0->m_pkthdr.len - progress;
171		if (length > MCLBYTES)
172			length = MCLBYTES;
173
174		if (m_new == NULL) {
175			if (length > MLEN)
176				m_new = m_getcl(how, MT_DATA, 0);
177			else
178				m_new = m_get(how, MT_DATA);
179			if (m_new == NULL)
180				goto nospace;
181		}
182
183		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
184		progress += length;
185		m_new->m_len = length;
186		if (m_new != m_final)
187			m_cat(m_final, m_new);
188		m_new = NULL;
189	}
190
191	m_freem(m0);
192	m0 = m_final;
193	return (m0);
194nospace:
195	if (m_final)
196		m_freem(m_final);
197	return (NULL);
198}
199
200void
201m_adj(struct mbuf *mp, int req_len)
202{
203	int len = req_len;
204	struct mbuf *m;
205	int count;
206
207	if ((m = mp) == NULL)
208		return;
209	if (len >= 0) {
210		/*
211		 * Trim from head.
212		 */
213		while (m != NULL && len > 0) {
214			if (m->m_len <= len) {
215				len -= m->m_len;
216				m->m_len = 0;
217				m = m->m_next;
218			} else {
219				m->m_len -= len;
220				m->m_data += len;
221				len = 0;
222			}
223		}
224		m = mp;
225		if (mp->m_flags & M_PKTHDR)
226			m->m_pkthdr.len -= (req_len - len);
227	} else {
228		/*
229		 * Trim from tail.  Scan the mbuf chain,
230		 * calculating its length and finding the last mbuf.
231		 * If the adjustment only affects this mbuf, then just
232		 * adjust and return.  Otherwise, rescan and truncate
233		 * after the remaining size.
234		 */
235		len = -len;
236		count = 0;
237		for (;;) {
238			count += m->m_len;
239			if (m->m_next == (struct mbuf *)0)
240				break;
241			m = m->m_next;
242		}
243		if (m->m_len >= len) {
244			m->m_len -= len;
245			if (mp->m_flags & M_PKTHDR)
246				mp->m_pkthdr.len -= len;
247			return;
248		}
249		count -= len;
250		if (count < 0)
251			count = 0;
252		/*
253		 * Correct length for chain is "count".
254		 * Find the mbuf with last data, adjust its length,
255		 * and toss data from remaining mbufs on chain.
256		 */
257		m = mp;
258		if (m->m_flags & M_PKTHDR)
259			m->m_pkthdr.len = count;
260		for (; m; m = m->m_next) {
261			if (m->m_len >= count) {
262				m->m_len = count;
263				if (m->m_next != NULL) {
264					m_freem(m->m_next);
265					m->m_next = NULL;
266				}
267				break;
268			}
269			count -= m->m_len;
270		}
271	}
272}
273
274/*
275 * Rearange an mbuf chain so that len bytes are contiguous
276 * and in the data area of an mbuf (so that mtod and dtom
277 * will work for a structure of size len).  Returns the resulting
278 * mbuf chain on success, frees it and returns null on failure.
279 * If there is room, it will add up to max_protohdr-len extra bytes to the
280 * contiguous region in an attempt to avoid being called next time.
281 */
282struct mbuf *
283m_pullup(struct mbuf *n, int len)
284{
285	struct mbuf *m;
286	int count;
287	int space;
288
289	/*
290	 * If first mbuf has no cluster, and has room for len bytes
291	 * without shifting current data, pullup into it,
292	 * otherwise allocate a new mbuf to prepend to the chain.
293	 */
294	if ((n->m_flags & M_EXT) == 0 &&
295	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
296		if (n->m_len >= len)
297			return (n);
298		m = n;
299		n = n->m_next;
300		len -= m->m_len;
301	} else {
302		if (len > MHLEN)
303			goto bad;
304		MGET(m, M_DONTWAIT, n->m_type);
305		if (m == NULL)
306			goto bad;
307		m->m_len = 0;
308		if (n->m_flags & M_PKTHDR)
309			M_MOVE_PKTHDR(m, n);
310	}
311	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
312	do {
313		count = min(min(max(len, max_protohdr), space), n->m_len);
314		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
315		  (u_int)count);
316		len -= count;
317		m->m_len += count;
318		n->m_len -= count;
319		space -= count;
320		if (n->m_len)
321			n->m_data += count;
322		else
323			n = m_free(n);
324	} while (len > 0 && n);
325	if (len > 0) {
326		(void) m_free(m);
327		goto bad;
328	}
329	m->m_next = n;
330	return (m);
331bad:
332	m_freem(n);
333	return (NULL);
334}
335
336/*
337 * Lesser-used path for M_PREPEND:
338 * allocate new mbuf to prepend to chain,
339 * copy junk along.
340 */
341struct mbuf *
342m_prepend(struct mbuf *m, int len, int how)
343{
344	struct mbuf *mn;
345
346	if (m->m_flags & M_PKTHDR)
347		MGETHDR(mn, how, m->m_type);
348	else
349		MGET(mn, how, m->m_type);
350	if (mn == NULL) {
351		m_freem(m);
352		return (NULL);
353	}
354	if (m->m_flags & M_PKTHDR)
355		M_MOVE_PKTHDR(mn, m);
356	mn->m_next = m;
357	m = mn;
358	if (len < MHLEN)
359		MH_ALIGN(m, len);
360	m->m_len = len;
361	return (m);
362}
363
364/*
365 * "Move" mbuf pkthdr from "from" to "to".
366 * "from" must have M_PKTHDR set, and "to" must be empty.
367 */
368void
369m_move_pkthdr(struct mbuf *to, struct mbuf *from)
370{
371#ifdef MAC
372	/*
373	 * XXXMAC: It could be this should also occur for non-MAC?
374	 */
375	if (to->m_flags & M_PKTHDR)
376		m_tag_delete_chain(to, NULL);
377#endif
378	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
379	if ((to->m_flags & M_EXT) == 0)
380		to->m_data = to->m_pktdat;
381	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
382	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
383	from->m_flags &= ~M_PKTHDR;
384}
385
386/*
387 * Routine to copy from device local memory into mbufs.
388 * Note that `off' argument is offset into first mbuf of target chain from
389 * which to begin copying the data to.
390 */
391struct mbuf *
392m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
393	 void (*copy)(char *from, caddr_t to, u_int len))
394{
395	struct mbuf *m;
396	struct mbuf *top = NULL, **mp = &top;
397	int len;
398
399	if (off < 0 || off > MHLEN)
400		return (NULL);
401
402	while (totlen > 0) {
403		if (top == NULL) {	/* First one, must be PKTHDR */
404			if (totlen + off >= MINCLSIZE) {
405				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
406				len = MCLBYTES;
407			} else {
408				m = m_gethdr(M_DONTWAIT, MT_DATA);
409				len = MHLEN;
410
411				/* Place initial small packet/header at end of mbuf */
412				if (m && totlen + off + max_linkhdr <= MLEN) {
413					m->m_data += max_linkhdr;
414					len -= max_linkhdr;
415				}
416			}
417			if (m == NULL)
418				return NULL;
419			m->m_pkthdr.rcvif = ifp;
420			m->m_pkthdr.len = totlen;
421		} else {
422			if (totlen + off >= MINCLSIZE) {
423				m = m_getcl(M_DONTWAIT, MT_DATA, 0);
424				len = MCLBYTES;
425			} else {
426				m = m_get(M_DONTWAIT, MT_DATA);
427				len = MLEN;
428			}
429			if (m == NULL) {
430				m_freem(top);
431				return NULL;
432			}
433		}
434		if (off) {
435			m->m_data += off;
436			len -= off;
437			off = 0;
438		}
439		m->m_len = len = min(totlen, len);
440		if (copy)
441			copy(buf, mtod(m, caddr_t), (u_int)len);
442		else
443			bcopy(buf, mtod(m, caddr_t), (u_int)len);
444		buf += len;
445		*mp = m;
446		mp = &m->m_next;
447		totlen -= len;
448	}
449	return (top);
450}
451
452/*
453 * Copy data from a buffer back into the indicated mbuf chain,
454 * starting "off" bytes from the beginning, extending the mbuf
455 * chain if necessary.
456 */
457void
458m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
459{
460	int mlen;
461	struct mbuf *m = m0, *n;
462	int totlen = 0;
463
464	if (m0 == NULL)
465		return;
466	while (off > (mlen = m->m_len)) {
467		off -= mlen;
468		totlen += mlen;
469		if (m->m_next == NULL) {
470			n = m_get(M_DONTWAIT, m->m_type);
471			if (n == NULL)
472				goto out;
473			bzero(mtod(n, caddr_t), MLEN);
474			n->m_len = min(MLEN, len + off);
475			m->m_next = n;
476		}
477		m = m->m_next;
478	}
479	while (len > 0) {
480		mlen = min (m->m_len - off, len);
481		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
482		cp += mlen;
483		len -= mlen;
484		mlen += off;
485		off = 0;
486		totlen += mlen;
487		if (len == 0)
488			break;
489		if (m->m_next == NULL) {
490			n = m_get(M_DONTWAIT, m->m_type);
491			if (n == NULL)
492				break;
493			n->m_len = min(MLEN, len);
494			m->m_next = n;
495		}
496		m = m->m_next;
497	}
498out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
499		m->m_pkthdr.len = totlen;
500}
501
502/*
503 * Append the specified data to the indicated mbuf chain,
504 * Extend the mbuf chain if the new data does not fit in
505 * existing space.
506 *
507 * Return 1 if able to complete the job; otherwise 0.
508 */
509int
510m_append(struct mbuf *m0, int len, c_caddr_t cp)
511{
512	struct mbuf *m, *n;
513	int remainder, space;
514
515	for (m = m0; m->m_next != NULL; m = m->m_next)
516		;
517	remainder = len;
518	space = M_TRAILINGSPACE(m);
519	if (space > 0) {
520		/*
521		 * Copy into available space.
522		 */
523		if (space > remainder)
524			space = remainder;
525		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
526		m->m_len += space;
527		cp += space, remainder -= space;
528	}
529	while (remainder > 0) {
530		/*
531		 * Allocate a new mbuf; could check space
532		 * and allocate a cluster instead.
533		 */
534		n = m_get(M_DONTWAIT, m->m_type);
535		if (n == NULL)
536			break;
537		n->m_len = min(MLEN, remainder);
538		bcopy(cp, mtod(n, caddr_t), n->m_len);
539		cp += n->m_len, remainder -= n->m_len;
540		m->m_next = n;
541		m = n;
542	}
543	if (m0->m_flags & M_PKTHDR)
544		m0->m_pkthdr.len += len - remainder;
545	return (remainder == 0);
546}
547
548/*
549 * Defragment an mbuf chain, returning at most maxfrags separate
550 * mbufs+clusters.  If this is not possible NULL is returned and
551 * the original mbuf chain is left in it's present (potentially
552 * modified) state.  We use two techniques: collapsing consecutive
553 * mbufs and replacing consecutive mbufs by a cluster.
554 *
555 * NB: this should really be named m_defrag but that name is taken
556 */
557struct mbuf *
558m_collapse(struct mbuf *m0, int how, int maxfrags)
559{
560	struct mbuf *m, *n, *n2, **prev;
561	u_int curfrags;
562
563	/*
564	 * Calculate the current number of frags.
565	 */
566	curfrags = 0;
567	for (m = m0; m != NULL; m = m->m_next)
568		curfrags++;
569	/*
570	 * First, try to collapse mbufs.  Note that we always collapse
571	 * towards the front so we don't need to deal with moving the
572	 * pkthdr.  This may be suboptimal if the first mbuf has much
573	 * less data than the following.
574	 */
575	m = m0;
576again:
577	for (;;) {
578		n = m->m_next;
579		if (n == NULL)
580			break;
581		if ((m->m_flags & M_RDONLY) == 0 &&
582		    n->m_len < M_TRAILINGSPACE(m)) {
583			bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
584				n->m_len);
585			m->m_len += n->m_len;
586			m->m_next = n->m_next;
587			m_free(n);
588			if (--curfrags <= maxfrags)
589				return m0;
590		} else
591			m = n;
592	}
593	KASSERT(maxfrags > 1,
594		("maxfrags %u, but normal collapse failed", maxfrags));
595	/*
596	 * Collapse consecutive mbufs to a cluster.
597	 */
598	prev = &m0->m_next;		/* NB: not the first mbuf */
599	while ((n = *prev) != NULL) {
600		if ((n2 = n->m_next) != NULL &&
601		    n->m_len + n2->m_len < MCLBYTES) {
602			m = m_getcl(how, MT_DATA, 0);
603			if (m == NULL)
604				goto bad;
605			bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
606			bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
607				n2->m_len);
608			m->m_len = n->m_len + n2->m_len;
609			m->m_next = n2->m_next;
610			*prev = m;
611			m_free(n);
612			m_free(n2);
613			if (--curfrags <= maxfrags)	/* +1 cl -2 mbufs */
614				return m0;
615			/*
616			 * Still not there, try the normal collapse
617			 * again before we allocate another cluster.
618			 */
619			goto again;
620		}
621		prev = &n->m_next;
622	}
623	/*
624	 * No place where we can collapse to a cluster; punt.
625	 * This can occur if, for example, you request 2 frags
626	 * but the packet requires that both be clusters (we
627	 * never reallocate the first mbuf to avoid moving the
628	 * packet header).
629	 */
630bad:
631	return NULL;
632}
633
634/*
635 * Attach the the cluster from *m to *n, set up m_ext in *n
636 * and bump the refcount of the cluster.
637 */
638static void
639mb_dupcl(struct mbuf *n, struct mbuf *m)
640{
641	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
642	KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
643
644	n->m_ext.ext_buf = m->m_ext.ext_buf;
645	n->m_ext.ext_size = m->m_ext.ext_size;
646	n->m_ext.ext_type = m->m_ext.ext_type;
647	n->m_flags |= M_EXT;
648}
649
650/*
651 * Partition an mbuf chain in two pieces, returning the tail --
652 * all but the first len0 bytes.  In case of failure, it returns NULL and
653 * attempts to restore the chain to its original state.
654 *
655 * Note that the resulting mbufs might be read-only, because the new
656 * mbuf can end up sharing an mbuf cluster with the original mbuf if
657 * the "breaking point" happens to lie within a cluster mbuf. Use the
658 * M_WRITABLE() macro to check for this case.
659 */
660struct mbuf *
661m_split(struct mbuf *m0, int len0, int wait)
662{
663	struct mbuf *m, *n;
664	u_int len = len0, remain;
665
666	MBUF_CHECKSLEEP(wait);
667	for (m = m0; m && len > m->m_len; m = m->m_next)
668		len -= m->m_len;
669	if (m == NULL)
670		return (NULL);
671	remain = m->m_len - len;
672	if (m0->m_flags & M_PKTHDR) {
673		MGETHDR(n, wait, m0->m_type);
674		if (n == NULL)
675			return (NULL);
676		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
677		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
678		m0->m_pkthdr.len = len0;
679		if (m->m_flags & M_EXT)
680			goto extpacket;
681		if (remain > MHLEN) {
682			/* m can't be the lead packet */
683			MH_ALIGN(n, 0);
684			n->m_next = m_split(m, len, wait);
685			if (n->m_next == NULL) {
686				(void) m_free(n);
687				return (NULL);
688			} else {
689				n->m_len = 0;
690				return (n);
691			}
692		} else
693			MH_ALIGN(n, remain);
694	} else if (remain == 0) {
695		n = m->m_next;
696		m->m_next = NULL;
697		return (n);
698	} else {
699		MGET(n, wait, m->m_type);
700		if (n == NULL)
701			return (NULL);
702		M_ALIGN(n, remain);
703	}
704extpacket:
705	if (m->m_flags & M_EXT) {
706		n->m_data = m->m_data + len;
707		mb_dupcl(n, m);
708	} else {
709		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
710	}
711	n->m_len = remain;
712	m->m_len = len;
713	n->m_next = m->m_next;
714	m->m_next = NULL;
715	return (n);
716}
717
718/*
719 * Copy a packet header mbuf chain into a completely new chain, including
720 * copying any mbuf clusters.  Use this instead of m_copypacket() when
721 * you need a writable copy of an mbuf chain.
722 */
723struct mbuf *
724m_dup(struct mbuf *m, int how)
725{
726	struct mbuf **p, *top = NULL;
727	int remain, moff, nsize;
728
729	MBUF_CHECKSLEEP(how);
730	/* Sanity check */
731	if (m == NULL)
732		return (NULL);
733	M_ASSERTPKTHDR(m);
734
735	/* While there's more data, get a new mbuf, tack it on, and fill it */
736	remain = m->m_pkthdr.len;
737	moff = 0;
738	p = &top;
739	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
740		struct mbuf *n;
741
742		/* Get the next new mbuf */
743		if (remain >= MINCLSIZE) {
744			n = m_getcl(how, m->m_type, 0);
745			nsize = MCLBYTES;
746		} else {
747			n = m_get(how, m->m_type);
748			nsize = MLEN;
749		}
750		if (n == NULL)
751			goto nospace;
752
753		if (top == NULL) {		/* First one, must be PKTHDR */
754			if (!m_dup_pkthdr(n, m, how)) {
755				m_free(n);
756				goto nospace;
757			}
758			if ((n->m_flags & M_EXT) == 0)
759				nsize = MHLEN;
760		}
761		n->m_len = 0;
762
763		/* Link it into the new chain */
764		*p = n;
765		p = &n->m_next;
766
767		/* Copy data from original mbuf(s) into new mbuf */
768		while (n->m_len < nsize && m != NULL) {
769			int chunk = min(nsize - n->m_len, m->m_len - moff);
770
771			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
772			moff += chunk;
773			n->m_len += chunk;
774			remain -= chunk;
775			if (moff == m->m_len) {
776				m = m->m_next;
777				moff = 0;
778			}
779		}
780
781		/* Check correct total mbuf length */
782		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
783		    	("%s: bogus m_pkthdr.len", __func__));
784	}
785	return (top);
786
787nospace:
788	m_freem(top);
789	return (NULL);
790}
791
792/*
793 * Create a writable copy of the mbuf chain.  While doing this
794 * we compact the chain with a goal of producing a chain with
795 * at most two mbufs.  The second mbuf in this chain is likely
796 * to be a cluster.  The primary purpose of this work is to create
797 * a writable packet for encryption, compression, etc.  The
798 * secondary goal is to linearize the data so the data can be
799 * passed to crypto hardware in the most efficient manner possible.
800 */
801struct mbuf *
802m_unshare(struct mbuf *m0, int how)
803{
804	struct mbuf *m, *mprev;
805	struct mbuf *n, *mfirst, *mlast;
806	int len, off;
807
808	mprev = NULL;
809	for (m = m0; m != NULL; m = mprev->m_next) {
810		/*
811		 * Regular mbufs are ignored unless there's a cluster
812		 * in front of it that we can use to coalesce.  We do
813		 * the latter mainly so later clusters can be coalesced
814		 * also w/o having to handle them specially (i.e. convert
815		 * mbuf+cluster -> cluster).  This optimization is heavily
816		 * influenced by the assumption that we're running over
817		 * Ethernet where MCLBYTES is large enough that the max
818		 * packet size will permit lots of coalescing into a
819		 * single cluster.  This in turn permits efficient
820		 * crypto operations, especially when using hardware.
821		 */
822		if ((m->m_flags & M_EXT) == 0) {
823			if (mprev && (mprev->m_flags & M_EXT) &&
824			    m->m_len <= M_TRAILINGSPACE(mprev)) {
825				/* XXX: this ignores mbuf types */
826				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
827				       mtod(m, caddr_t), m->m_len);
828				mprev->m_len += m->m_len;
829				mprev->m_next = m->m_next;	/* unlink from chain */
830				m_free(m);			/* reclaim mbuf */
831#if 0
832				newipsecstat.ips_mbcoalesced++;
833#endif
834			} else {
835				mprev = m;
836			}
837			continue;
838		}
839		/*
840		 * Writable mbufs are left alone (for now).
841		 */
842		if (M_WRITABLE(m)) {
843			mprev = m;
844			continue;
845		}
846
847		/*
848		 * Not writable, replace with a copy or coalesce with
849		 * the previous mbuf if possible (since we have to copy
850		 * it anyway, we try to reduce the number of mbufs and
851		 * clusters so that future work is easier).
852		 */
853		KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
854		/* NB: we only coalesce into a cluster or larger */
855		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
856		    m->m_len <= M_TRAILINGSPACE(mprev)) {
857			/* XXX: this ignores mbuf types */
858			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
859			       mtod(m, caddr_t), m->m_len);
860			mprev->m_len += m->m_len;
861			mprev->m_next = m->m_next;	/* unlink from chain */
862			m_free(m);			/* reclaim mbuf */
863#if 0
864			newipsecstat.ips_clcoalesced++;
865#endif
866			continue;
867		}
868
869		/*
870		 * Allocate new space to hold the copy...
871		 */
872		/* XXX why can M_PKTHDR be set past the first mbuf? */
873		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
874			/*
875			 * NB: if a packet header is present we must
876			 * allocate the mbuf separately from any cluster
877			 * because M_MOVE_PKTHDR will smash the data
878			 * pointer and drop the M_EXT marker.
879			 */
880			MGETHDR(n, how, m->m_type);
881			if (n == NULL) {
882				m_freem(m0);
883				return (NULL);
884			}
885			M_MOVE_PKTHDR(n, m);
886			MCLGET(n, how);
887			if ((n->m_flags & M_EXT) == 0) {
888				m_free(n);
889				m_freem(m0);
890				return (NULL);
891			}
892		} else {
893			n = m_getcl(how, m->m_type, m->m_flags);
894			if (n == NULL) {
895				m_freem(m0);
896				return (NULL);
897			}
898		}
899		/*
900		 * ... and copy the data.  We deal with jumbo mbufs
901		 * (i.e. m_len > MCLBYTES) by splitting them into
902		 * clusters.  We could just malloc a buffer and make
903		 * it external but too many device drivers don't know
904		 * how to break up the non-contiguous memory when
905		 * doing DMA.
906		 */
907		len = m->m_len;
908		off = 0;
909		mfirst = n;
910		mlast = NULL;
911		for (;;) {
912			int cc = min(len, MCLBYTES);
913			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
914			n->m_len = cc;
915			if (mlast != NULL)
916				mlast->m_next = n;
917			mlast = n;
918#if 0
919			newipsecstat.ips_clcopied++;
920#endif
921
922			len -= cc;
923			if (len <= 0)
924				break;
925			off += cc;
926
927			n = m_getcl(how, m->m_type, m->m_flags);
928			if (n == NULL) {
929				m_freem(mfirst);
930				m_freem(m0);
931				return (NULL);
932			}
933		}
934		n->m_next = m->m_next;
935		if (mprev == NULL)
936			m0 = mfirst;		/* new head of chain */
937		else
938			mprev->m_next = mfirst;	/* replace old mbuf */
939		m_free(m);			/* release old mbuf */
940		mprev = mfirst;
941	}
942	return (m0);
943}
944
945/*
946 * Set the m_data pointer of a newly-allocated mbuf
947 * to place an object of the specified size at the
948 * end of the mbuf, longword aligned.
949 */
950void
951m_align(struct mbuf *m, int len)
952{
953	int adjust;
954
955	if (m->m_flags & M_EXT)
956		adjust = m->m_ext.ext_size - len;
957	else if (m->m_flags & M_PKTHDR)
958		adjust = MHLEN - len;
959	else
960		adjust = MLEN - len;
961	m->m_data += adjust &~ (sizeof(long)-1);
962}
963
964/*
965 * Copy an entire packet, including header (which must be present).
966 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
967 * Note that the copy is read-only, because clusters are not copied,
968 * only their reference counts are incremented.
969 * Preserve alignment of the first mbuf so if the creator has left
970 * some room at the beginning (e.g. for inserting protocol headers)
971 * the copies still have the room available.
972 */
973struct mbuf *
974m_copypacket(struct mbuf *m, int how)
975{
976	struct mbuf *top, *n, *o;
977
978	MBUF_CHECKSLEEP(how);
979	MGET(n, how, m->m_type);
980	top = n;
981	if (n == NULL)
982		goto nospace;
983
984	if (!m_dup_pkthdr(n, m, how))
985		goto nospace;
986	n->m_len = m->m_len;
987	if (m->m_flags & M_EXT) {
988		n->m_data = m->m_data;
989		mb_dupcl(n, m);
990	} else {
991		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
992		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
993	}
994
995	m = m->m_next;
996	while (m) {
997		MGET(o, how, m->m_type);
998		if (o == NULL)
999			goto nospace;
1000
1001		n->m_next = o;
1002		n = n->m_next;
1003
1004		n->m_len = m->m_len;
1005		if (m->m_flags & M_EXT) {
1006			n->m_data = m->m_data;
1007			mb_dupcl(n, m);
1008		} else {
1009			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
1010		}
1011
1012		m = m->m_next;
1013	}
1014	return top;
1015nospace:
1016	m_freem(top);
1017	return (NULL);
1018}
1019