uipc_mbuf2.c revision 92723
1/*	$FreeBSD: head/sys/kern/uipc_mbuf2.c 92723 2002-03-19 21:25:46Z alfred $	*/
2/*	$KAME: uipc_mbuf2.c,v 1.29 2001/02/14 13:42:10 itojun Exp $	*/
3/*	$NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $	*/
4
5/*
6 * Copyright (C) 1999 WIDE Project.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
35 * Copyright (c) 1982, 1986, 1988, 1991, 1993
36 *	The Regents of the University of California.  All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *	This product includes software developed by the University of
49 *	California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)uipc_mbuf.c	8.4 (Berkeley) 2/14/95
67 */
68
69/*#define PULLDOWN_DEBUG*/
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/lock.h>
74#include <sys/malloc.h>
75#include <sys/mbuf.h>
76#include <sys/mutex.h>
77
78/* can't call it m_dup(), as freebsd[34] uses m_dup() with different arg */
79static struct mbuf *m_dup1(struct mbuf *, int, int, int);
80
81/*
82 * ensure that [off, off + len) is contiguous on the mbuf chain "m".
83 * packet chain before "off" is kept untouched.
84 * if offp == NULL, the target will start at <retval, 0> on resulting chain.
85 * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
86 *
87 * on error return (NULL return value), original "m" will be freed.
88 *
89 * XXX: M_TRAILINGSPACE/M_LEADINGSPACE only permitted on writable ext_buf.
90 */
91struct mbuf *
92m_pulldown(struct mbuf *m, int off, int len, int *offp)
93{
94	struct mbuf *n, *o;
95	int hlen, tlen, olen;
96	int writable;
97
98	/* check invalid arguments. */
99	if (m == NULL)
100		panic("m == NULL in m_pulldown()");
101	if (len > MCLBYTES) {
102		m_freem(m);
103		return NULL;	/* impossible */
104	}
105
106#ifdef PULLDOWN_DEBUG
107    {
108	struct mbuf *t;
109	printf("before:");
110	for (t = m; t; t = t->m_next)
111		printf(" %d", t->m_len);
112	printf("\n");
113    }
114#endif
115	n = m;
116	while (n != NULL && off > 0) {
117		if (n->m_len > off)
118			break;
119		off -= n->m_len;
120		n = n->m_next;
121	}
122	/* be sure to point non-empty mbuf */
123	while (n != NULL && n->m_len == 0)
124		n = n->m_next;
125	if (!n) {
126		m_freem(m);
127		return NULL;	/* mbuf chain too short */
128	}
129
130	/*
131	 * XXX: This code is flawed because it considers a "writable" mbuf
132	 *      data region to require all of the following:
133	 *	  (i) mbuf _has_ to have M_EXT set; if it is just a regular
134	 *	      mbuf, it is still not considered "writable."
135	 *	  (ii) since mbuf has M_EXT, the ext_type _has_ to be
136	 *	       EXT_CLUSTER. Anything else makes it non-writable.
137	 *	  (iii) M_WRITABLE() must evaluate true.
138	 *      Ideally, the requirement should only be (iii).
139	 *
140	 * If we're writable, we're sure we're writable, because the ref. count
141	 * cannot increase from 1, as that would require posession of mbuf
142	 * n by someone else (which is impossible). However, if we're _not_
143	 * writable, we may eventually become writable )if the ref. count drops
144	 * to 1), but we'll fail to notice it unless we re-evaluate
145	 * M_WRITABLE(). For now, we only evaluate once at the beginning and
146	 * live with this.
147	 */
148	/*
149	 * XXX: This is dumb. If we're just a regular mbuf with no M_EXT,
150	 *      then we're not "writable," according to this code.
151	 */
152	writable = 0;
153	if ((n->m_flags & M_EXT) == 0 ||
154	    (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n)))
155		writable = 1;
156
157	/*
158	 * the target data is on <n, off>.
159	 * if we got enough data on the mbuf "n", we're done.
160	 */
161	if ((off == 0 || offp) && len <= n->m_len - off && writable)
162		goto ok;
163
164	/*
165	 * when len <= n->m_len - off and off != 0, it is a special case.
166	 * len bytes from <n, off> sits in single mbuf, but the caller does
167	 * not like the starting position (off).
168	 * chop the current mbuf into two pieces, set off to 0.
169	 */
170	if (len <= n->m_len - off) {
171		o = m_dup1(n, off, n->m_len - off, M_DONTWAIT);
172		if (o == NULL) {
173			m_freem(m);
174			return NULL;	/* ENOBUFS */
175		}
176		n->m_len = off;
177		o->m_next = n->m_next;
178		n->m_next = o;
179		n = n->m_next;
180		off = 0;
181		goto ok;
182	}
183
184	/*
185	 * we need to take hlen from <n, off> and tlen from <n->m_next, 0>,
186	 * and construct contiguous mbuf with m_len == len.
187	 * note that hlen + tlen == len, and tlen > 0.
188	 */
189	hlen = n->m_len - off;
190	tlen = len - hlen;
191
192	/*
193	 * ensure that we have enough trailing data on mbuf chain.
194	 * if not, we can do nothing about the chain.
195	 */
196	olen = 0;
197	for (o = n->m_next; o != NULL; o = o->m_next)
198		olen += o->m_len;
199	if (hlen + olen < len) {
200		m_freem(m);
201		return NULL;	/* mbuf chain too short */
202	}
203
204	/*
205	 * easy cases first.
206	 * we need to use m_copydata() to get data from <n->m_next, 0>.
207	 */
208	if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen
209	 && writable) {
210		m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len);
211		n->m_len += tlen;
212		m_adj(n->m_next, tlen);
213		goto ok;
214	}
215	if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen
216	 && writable) {
217		n->m_next->m_data -= hlen;
218		n->m_next->m_len += hlen;
219		bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen);
220		n->m_len -= hlen;
221		n = n->m_next;
222		off = 0;
223		goto ok;
224	}
225
226	/*
227	 * now, we need to do the hard way.  don't m_copy as there's no room
228	 * on both end.
229	 */
230	MGET(o, M_DONTWAIT, m->m_type);
231	if (o && len > MLEN) {
232		MCLGET(o, M_DONTWAIT);
233		if ((o->m_flags & M_EXT) == 0) {
234			m_free(o);
235			o = NULL;
236		}
237	}
238	if (!o) {
239		m_freem(m);
240		return NULL;	/* ENOBUFS */
241	}
242	/* get hlen from <n, off> into <o, 0> */
243	o->m_len = hlen;
244	bcopy(mtod(n, caddr_t) + off, mtod(o, caddr_t), hlen);
245	n->m_len -= hlen;
246	/* get tlen from <n->m_next, 0> into <o, hlen> */
247	m_copydata(n->m_next, 0, tlen, mtod(o, caddr_t) + o->m_len);
248	o->m_len += tlen;
249	m_adj(n->m_next, tlen);
250	o->m_next = n->m_next;
251	n->m_next = o;
252	n = o;
253	off = 0;
254
255ok:
256#ifdef PULLDOWN_DEBUG
257    {
258	struct mbuf *t;
259	printf("after:");
260	for (t = m; t; t = t->m_next)
261		printf("%c%d", t == n ? '*' : ' ', t->m_len);
262	printf(" (off=%d)\n", off);
263    }
264#endif
265	if (offp)
266		*offp = off;
267	return n;
268}
269
270static struct mbuf *
271m_dup1(struct mbuf *m, int off, int len, int wait)
272{
273	struct mbuf *n;
274	int l;
275	int copyhdr;
276
277	if (len > MCLBYTES)
278		return NULL;
279	if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
280		copyhdr = 1;
281		MGETHDR(n, wait, m->m_type);
282		l = MHLEN;
283	} else {
284		copyhdr = 0;
285		MGET(n, wait, m->m_type);
286		l = MLEN;
287	}
288	if (n && len > l) {
289		MCLGET(n, wait);
290		if ((n->m_flags & M_EXT) == 0) {
291			m_free(n);
292			n = NULL;
293		}
294	}
295	if (!n)
296		return NULL;
297
298	if (copyhdr)
299		M_COPY_PKTHDR(n, m);
300	m_copydata(m, off, len, mtod(n, caddr_t));
301	return n;
302}
303
304/*
305 * pkthdr.aux chain manipulation.
306 * we don't allow clusters at this moment.
307 */
308struct mbuf *
309m_aux_add2(struct mbuf *m, int af, int type, void *p)
310{
311	struct mbuf *n;
312	struct mauxtag *t;
313
314	if ((m->m_flags & M_PKTHDR) == 0)
315		return NULL;
316
317	n = m_aux_find(m, af, type);
318	if (n)
319		return n;
320
321	MGET(n, M_DONTWAIT, m->m_type);
322	if (n == NULL)
323		return NULL;
324
325	t = mtod(n, struct mauxtag *);
326	bzero(t, sizeof(*t));
327	t->af = af;
328	t->type = type;
329	t->p = p;
330	n->m_data += sizeof(struct mauxtag);
331	n->m_len = 0;
332	n->m_next = m->m_pkthdr.aux;
333	m->m_pkthdr.aux = n;
334	return n;
335}
336
337struct mbuf *
338m_aux_find2(struct mbuf *m, int af, int type, void *p)
339{
340	struct mbuf *n;
341	struct mauxtag *t;
342
343	if ((m->m_flags & M_PKTHDR) == 0)
344		return NULL;
345
346	for (n = m->m_pkthdr.aux; n; n = n->m_next) {
347		t = (struct mauxtag *)n->m_dat;
348		if (n->m_data != ((caddr_t)t) + sizeof(struct mauxtag)) {
349			printf("m_aux_find: invalid m_data for mbuf=%p (%p %p)\n", n, t, n->m_data);
350			continue;
351		}
352		if (t->af == af && t->type == type && t->p == p)
353			return n;
354	}
355	return NULL;
356}
357
358struct mbuf *
359m_aux_find(struct mbuf *m, int af, int type)
360{
361
362	return m_aux_find2(m, af, type, NULL);
363}
364
365struct mbuf *
366m_aux_add(struct mbuf *m, int af, int type)
367{
368
369	return m_aux_add2(m, af, type, NULL);
370}
371
372void
373m_aux_delete(struct mbuf *m, struct mbuf *victim)
374{
375	struct mbuf *n, *prev, *next;
376	struct mauxtag *t;
377
378	if ((m->m_flags & M_PKTHDR) == 0)
379		return;
380
381	prev = NULL;
382	n = m->m_pkthdr.aux;
383	while (n) {
384		t = (struct mauxtag *)n->m_dat;
385		next = n->m_next;
386		if (n->m_data != ((caddr_t)t) + sizeof(struct mauxtag)) {
387			printf("m_aux_delete: invalid m_data for mbuf=%p (%p %p)\n", n, t, n->m_data);
388			prev = n;
389			n = next;
390			continue;
391		}
392		if (n == victim) {
393			if (prev)
394				prev->m_next = n->m_next;
395			else
396				m->m_pkthdr.aux = n->m_next;
397			n->m_next = NULL;
398			m_free(n);
399		} else
400			prev = n;
401		n = next;
402	}
403}
404