ipsec_mbuf.c revision 1.10
1/*	$NetBSD: ipsec_mbuf.c,v 1.10 2007/12/14 20:55:22 seanb Exp $	*/
2/*-
3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.10 2007/12/14 20:55:22 seanb Exp $");
32
33/*
34 * IPsec-specific mbuf routines.
35 */
36
37#ifdef __FreeBSD__
38#include "opt_param.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/mbuf.h>
44#include <sys/socket.h>
45
46#include <net/route.h>
47#include <netinet/in.h>
48
49#include <netipsec/ipsec.h>
50#include <netipsec/ipsec_var.h>
51
52#include <netipsec/ipsec_osdep.h>
53#include <net/net_osdep.h>
54
55extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
56
57/*
58 * Create a writable copy of the mbuf chain.  While doing this
59 * we compact the chain with a goal of producing a chain with
60 * at most two mbufs.  The second mbuf in this chain is likely
61 * to be a cluster.  The primary purpose of this work is to create
62 * a writable packet for encryption, compression, etc.  The
63 * secondary goal is to linearize the data so the data can be
64 * passed to crypto hardware in the most efficient manner possible.
65 */
66struct mbuf *
67m_clone(struct mbuf *m0)
68{
69	struct mbuf *m, *mprev;
70	struct mbuf *n, *mfirst, *mlast;
71	int len, off;
72
73	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
74
75	mprev = NULL;
76	for (m = m0; m != NULL; m = mprev->m_next) {
77		/*
78		 * Regular mbufs are ignored unless there's a cluster
79		 * in front of it that we can use to coalesce.  We do
80		 * the latter mainly so later clusters can be coalesced
81		 * also w/o having to handle them specially (i.e. convert
82		 * mbuf+cluster -> cluster).  This optimization is heavily
83		 * influenced by the assumption that we're running over
84		 * Ethernet where MCLBYTES is large enough that the max
85		 * packet size will permit lots of coalescing into a
86		 * single cluster.  This in turn permits efficient
87		 * crypto operations, especially when using hardware.
88		 */
89		if ((m->m_flags & M_EXT) == 0) {
90			if (mprev && (mprev->m_flags & M_EXT) &&
91			    m->m_len <= M_TRAILINGSPACE(mprev)) {
92				/* XXX: this ignores mbuf types */
93				memcpy(mtod(mprev, char *) + mprev->m_len,
94				       mtod(m, char *), m->m_len);
95				mprev->m_len += m->m_len;
96				mprev->m_next = m->m_next;	/* unlink from chain */
97				m_free(m);			/* reclaim mbuf */
98				newipsecstat.ips_mbcoalesced++;
99			} else {
100				mprev = m;
101			}
102			continue;
103		}
104		/*
105		 * Writable mbufs are left alone (for now).  Note
106		 * that for 4.x systems it's not possible to identify
107		 * whether or not mbufs with external buffers are
108		 * writable unless they use clusters.
109		 */
110		if (M_EXT_WRITABLE(m)) {
111			mprev = m;
112			continue;
113		}
114
115		/*
116		 * Not writable, replace with a copy or coalesce with
117		 * the previous mbuf if possible (since we have to copy
118		 * it anyway, we try to reduce the number of mbufs and
119		 * clusters so that future work is easier).
120		 */
121		IPSEC_ASSERT(m->m_flags & M_EXT,
122			("m_clone: m_flags 0x%x", m->m_flags));
123		/* NB: we only coalesce into a cluster or larger */
124		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
125		    m->m_len <= M_TRAILINGSPACE(mprev)) {
126			/* XXX: this ignores mbuf types */
127			memcpy(mtod(mprev, char *) + mprev->m_len,
128			       mtod(m, char *), m->m_len);
129			mprev->m_len += m->m_len;
130			mprev->m_next = m->m_next;	/* unlink from chain */
131			m_free(m);			/* reclaim mbuf */
132			newipsecstat.ips_clcoalesced++;
133			continue;
134		}
135
136		/*
137		 * Allocate new space to hold the copy...
138		 */
139		/* XXX why can M_PKTHDR be set past the first mbuf? */
140		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
141			/*
142			 * NB: if a packet header is present we must
143			 * allocate the mbuf separately from any cluster
144			 * because M_MOVE_PKTHDR will smash the data
145			 * pointer and drop the M_EXT marker.
146			 */
147			MGETHDR(n, M_DONTWAIT, m->m_type);
148			if (n == NULL) {
149				m_freem(m0);
150				return (NULL);
151			}
152			M_MOVE_PKTHDR(n, m);
153			MCLGET(n, M_DONTWAIT);
154			if ((n->m_flags & M_EXT) == 0) {
155				m_free(n);
156				m_freem(m0);
157				return (NULL);
158			}
159		} else {
160			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
161			if (n == NULL) {
162				m_freem(m0);
163				return (NULL);
164			}
165		}
166		/*
167		 * ... and copy the data.  We deal with jumbo mbufs
168		 * (i.e. m_len > MCLBYTES) by splitting them into
169		 * clusters.  We could just malloc a buffer and make
170		 * it external but too many device drivers don't know
171		 * how to break up the non-contiguous memory when
172		 * doing DMA.
173		 */
174		len = m->m_len;
175		off = 0;
176		mfirst = n;
177		mlast = NULL;
178		for (;;) {
179			int cc = min(len, MCLBYTES);
180			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
181			n->m_len = cc;
182			if (mlast != NULL)
183				mlast->m_next = n;
184			mlast = n;
185			newipsecstat.ips_clcopied++;
186
187			len -= cc;
188			if (len <= 0)
189				break;
190			off += cc;
191
192			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
193			if (n == NULL) {
194				m_freem(mfirst);
195				m_freem(m0);
196				return (NULL);
197			}
198		}
199		n->m_next = m->m_next;
200		if (mprev == NULL)
201			m0 = mfirst;		/* new head of chain */
202		else
203			mprev->m_next = mfirst;	/* replace old mbuf */
204		m_free(m);			/* release old mbuf */
205		mprev = mfirst;
206	}
207	return (m0);
208}
209
210/*
211 * Make space for a new header of length hlen at skip bytes
212 * into the packet.  When doing this we allocate new mbufs only
213 * when absolutely necessary.  The mbuf where the new header
214 * is to go is returned together with an offset into the mbuf.
215 * If NULL is returned then the mbuf chain may have been modified;
216 * the caller is assumed to always free the chain.
217 */
218struct mbuf *
219m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
220{
221	struct mbuf *m;
222	unsigned remain;
223
224	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
225	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
226
227	for (m = m0; m && skip > m->m_len; m = m->m_next)
228		skip -= m->m_len;
229	if (m == NULL)
230		return (NULL);
231	/*
232	 * At this point skip is the offset into the mbuf m
233	 * where the new header should be placed.  Figure out
234	 * if there's space to insert the new header.  If so,
235	 * and copying the remainder makese sense then do so.
236	 * Otherwise insert a new mbuf in the chain, splitting
237	 * the contents of m as needed.
238	 */
239	remain = m->m_len - skip;		/* data to move */
240	if (hlen > M_TRAILINGSPACE(m)) {
241		struct mbuf *n0, *n, **np;
242		int todo, len, done, alloc;
243
244		n0 = NULL;
245		np = &n0;
246		alloc = 0;
247		done = 0;
248		todo = remain;
249		while (todo > 0) {
250			if (todo > MHLEN) {
251				n = m_getcl(M_DONTWAIT, m->m_type, 0);
252				len = MCLBYTES;
253			}
254			else {
255				n = m_get(M_DONTWAIT, m->m_type);
256				len = MHLEN;
257			}
258			if (n == NULL) {
259				m_freem(n0);
260				return NULL;
261			}
262			*np = n;
263			np = &n->m_next;
264			alloc++;
265			len = min(todo, len);
266			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
267			n->m_len = len;
268			done += len;
269			todo -= len;
270		}
271
272		if (hlen <= M_TRAILINGSPACE(m) + remain) {
273			m->m_len = skip + hlen;
274			*off = skip;
275			if (n0 != NULL) {
276				*np = m->m_next;
277				m->m_next = n0;
278			}
279		}
280		else {
281			n = m_get(M_DONTWAIT, m->m_type);
282			if (n == NULL) {
283				m_freem(n0);
284				return NULL;
285			}
286			alloc++;
287
288			if ((n->m_next = n0) == NULL)
289				np = &n->m_next;
290			n0 = n;
291
292			*np = m->m_next;
293			m->m_next = n0;
294
295			n->m_len = hlen;
296			m->m_len = skip;
297
298			m = n;			/* header is at front ... */
299			*off = 0;		/* ... of new mbuf */
300		}
301
302		newipsecstat.ips_mbinserted += alloc;
303	} else {
304		/*
305		 * Copy the remainder to the back of the mbuf
306		 * so there's space to write the new header.
307		 */
308		/* XXX can this be memcpy? does it handle overlap? */
309		ovbcopy(mtod(m, char *) + skip,
310			mtod(m, char *) + skip + hlen, remain);
311		m->m_len += hlen;
312		*off = skip;
313	}
314	m0->m_pkthdr.len += hlen;		/* adjust packet length */
315	return m;
316}
317
318/*
319 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
320 * length is updated, and a pointer to the first byte of the padding
321 * (which is guaranteed to be all in one mbuf) is returned.
322 */
323void *
324m_pad(struct mbuf *m, int n)
325{
326	register struct mbuf *m0, *m1;
327	register int len, pad;
328	void *retval;
329
330	if (n <= 0) {  /* No stupid arguments. */
331		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
332		m_freem(m);
333		return NULL;
334	}
335
336	len = m->m_pkthdr.len;
337	pad = n;
338	m0 = m;
339
340	while (m0->m_len < len) {
341IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
342		len -= m0->m_len;
343		m0 = m0->m_next;
344	}
345
346	if (m0->m_len != len) {
347		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
348		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
349
350		m_freem(m);
351		return NULL;
352	}
353
354	/* Check for zero-length trailing mbufs, and find the last one. */
355	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
356		if (m1->m_next->m_len != 0) {
357			DPRINTF(("m_pad: length mismatch (should be %d "
358			    "instead of %d)\n",
359			    m->m_pkthdr.len,
360			    m->m_pkthdr.len + m1->m_next->m_len));
361
362			m_freem(m);
363			return NULL;
364		}
365
366		m0 = m1->m_next;
367	}
368
369	if (pad > M_TRAILINGSPACE(m0)) {
370		/* Add an mbuf to the chain. */
371		MGET(m1, M_DONTWAIT, MT_DATA);
372		if (m1 == 0) {
373			m_freem(m0);
374			DPRINTF(("m_pad: unable to get extra mbuf\n"));
375			return NULL;
376		}
377
378		m0->m_next = m1;
379		m0 = m1;
380		m0->m_len = 0;
381	}
382
383	retval = m0->m_data + m0->m_len;
384	m0->m_len += pad;
385	m->m_pkthdr.len += pad;
386
387	return retval;
388}
389
390/*
391 * Remove hlen data at offset skip in the packet.  This is used by
392 * the protocols strip protocol headers and associated data (e.g. IV,
393 * authenticator) on input.
394 */
395int
396m_striphdr(struct mbuf *m, int skip, int hlen)
397{
398	struct mbuf *m1;
399	int roff;
400
401	/* Find beginning of header */
402	m1 = m_getptr(m, skip, &roff);
403	if (m1 == NULL)
404		return (EINVAL);
405
406	/* Remove the header and associated data from the mbuf. */
407	if (roff == 0) {
408		/* The header was at the beginning of the mbuf */
409		newipsecstat.ips_input_front++;
410		m_adj(m1, hlen);
411		if ((m1->m_flags & M_PKTHDR) == 0)
412			m->m_pkthdr.len -= hlen;
413	} else if (roff + hlen >= m1->m_len) {
414		struct mbuf *mo;
415
416		/*
417		 * Part or all of the header is at the end of this mbuf,
418		 * so first let's remove the remainder of the header from
419		 * the beginning of the remainder of the mbuf chain, if any.
420		 */
421		newipsecstat.ips_input_end++;
422		if (roff + hlen > m1->m_len) {
423			/* Adjust the next mbuf by the remainder */
424			m_adj(m1->m_next, roff + hlen - m1->m_len);
425
426			/* The second mbuf is guaranteed not to have a pkthdr... */
427			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
428		}
429
430		/* Now, let's unlink the mbuf chain for a second...*/
431		mo = m1->m_next;
432		m1->m_next = NULL;
433
434		/* ...and trim the end of the first part of the chain...sick */
435		m_adj(m1, -(m1->m_len - roff));
436		if ((m1->m_flags & M_PKTHDR) == 0)
437			m->m_pkthdr.len -= (m1->m_len - roff);
438
439		/* Finally, let's relink */
440		m1->m_next = mo;
441	} else {
442		/*
443		 * The header lies in the "middle" of the mbuf; copy
444		 * the remainder of the mbuf down over the header.
445		 */
446		newipsecstat.ips_input_middle++;
447		ovbcopy(mtod(m1, u_char *) + roff + hlen,
448		      mtod(m1, u_char *) + roff,
449		      m1->m_len - (roff + hlen));
450		m1->m_len -= hlen;
451		m->m_pkthdr.len -= hlen;
452	}
453	return (0);
454}
455
456/*
457 * Diagnostic routine to check mbuf alignment as required by the
458 * crypto device drivers (that use DMA).
459 */
460void
461m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
462{
463	int roff;
464	struct mbuf *m = m_getptr(m0, off, &roff);
465	void *addr;
466
467	if (m == NULL)
468		return;
469	printf("%s (off %u len %u): ", where, off, len);
470	addr = mtod(m, char *) + roff;
471	do {
472		int mlen;
473
474		if (((uintptr_t) addr) & 3) {
475			printf("addr misaligned %p,", addr);
476			break;
477		}
478		mlen = m->m_len;
479		if (mlen > len)
480			mlen = len;
481		len -= mlen;
482		if (len && (mlen & 3)) {
483			printf("len mismatch %u,", mlen);
484			break;
485		}
486		m = m->m_next;
487		addr = m ? mtod(m, void *) : NULL;
488	} while (m && len > 0);
489	for (m = m0; m; m = m->m_next)
490		printf(" [%p:%u]", mtod(m, void *), m->m_len);
491	printf("\n");
492}
493