ipsec_mbuf.c revision 1.12
1/*	$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $	*/
2/*-
3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $");
32
33/*
34 * IPsec-specific mbuf routines.
35 */
36
37#ifdef __FreeBSD__
38#include "opt_param.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/mbuf.h>
44#include <sys/socket.h>
45
46#include <net/route.h>
47#include <netinet/in.h>
48
49#include <netipsec/ipsec.h>
50#include <netipsec/ipsec_var.h>
51#include <netipsec/ipsec_private.h>
52
53#include <netipsec/ipsec_osdep.h>
54#include <net/net_osdep.h>
55
56/*
57 * Create a writable copy of the mbuf chain.  While doing this
58 * we compact the chain with a goal of producing a chain with
59 * at most two mbufs.  The second mbuf in this chain is likely
60 * to be a cluster.  The primary purpose of this work is to create
61 * a writable packet for encryption, compression, etc.  The
62 * secondary goal is to linearize the data so the data can be
63 * passed to crypto hardware in the most efficient manner possible.
64 */
65struct mbuf *
66m_clone(struct mbuf *m0)
67{
68	struct mbuf *m, *mprev;
69	struct mbuf *n, *mfirst, *mlast;
70	int len, off;
71
72	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
73
74	mprev = NULL;
75	for (m = m0; m != NULL; m = mprev->m_next) {
76		/*
77		 * Regular mbufs are ignored unless there's a cluster
78		 * in front of it that we can use to coalesce.  We do
79		 * the latter mainly so later clusters can be coalesced
80		 * also w/o having to handle them specially (i.e. convert
81		 * mbuf+cluster -> cluster).  This optimization is heavily
82		 * influenced by the assumption that we're running over
83		 * Ethernet where MCLBYTES is large enough that the max
84		 * packet size will permit lots of coalescing into a
85		 * single cluster.  This in turn permits efficient
86		 * crypto operations, especially when using hardware.
87		 */
88		if ((m->m_flags & M_EXT) == 0) {
89			if (mprev && (mprev->m_flags & M_EXT) &&
90			    m->m_len <= M_TRAILINGSPACE(mprev)) {
91				/* XXX: this ignores mbuf types */
92				memcpy(mtod(mprev, char *) + mprev->m_len,
93				       mtod(m, char *), m->m_len);
94				mprev->m_len += m->m_len;
95				mprev->m_next = m->m_next;	/* unlink from chain */
96				m_free(m);			/* reclaim mbuf */
97				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
98			} else {
99				mprev = m;
100			}
101			continue;
102		}
103		/*
104		 * Writable mbufs are left alone (for now).  Note
105		 * that for 4.x systems it's not possible to identify
106		 * whether or not mbufs with external buffers are
107		 * writable unless they use clusters.
108		 */
109		if (M_EXT_WRITABLE(m)) {
110			mprev = m;
111			continue;
112		}
113
114		/*
115		 * Not writable, replace with a copy or coalesce with
116		 * the previous mbuf if possible (since we have to copy
117		 * it anyway, we try to reduce the number of mbufs and
118		 * clusters so that future work is easier).
119		 */
120		IPSEC_ASSERT(m->m_flags & M_EXT,
121			("m_clone: m_flags 0x%x", m->m_flags));
122		/* NB: we only coalesce into a cluster or larger */
123		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
124		    m->m_len <= M_TRAILINGSPACE(mprev)) {
125			/* XXX: this ignores mbuf types */
126			memcpy(mtod(mprev, char *) + mprev->m_len,
127			       mtod(m, char *), m->m_len);
128			mprev->m_len += m->m_len;
129			mprev->m_next = m->m_next;	/* unlink from chain */
130			m_free(m);			/* reclaim mbuf */
131			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
132			continue;
133		}
134
135		/*
136		 * Allocate new space to hold the copy...
137		 */
138		/* XXX why can M_PKTHDR be set past the first mbuf? */
139		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
140			/*
141			 * NB: if a packet header is present we must
142			 * allocate the mbuf separately from any cluster
143			 * because M_MOVE_PKTHDR will smash the data
144			 * pointer and drop the M_EXT marker.
145			 */
146			MGETHDR(n, M_DONTWAIT, m->m_type);
147			if (n == NULL) {
148				m_freem(m0);
149				return (NULL);
150			}
151			M_MOVE_PKTHDR(n, m);
152			MCLGET(n, M_DONTWAIT);
153			if ((n->m_flags & M_EXT) == 0) {
154				m_free(n);
155				m_freem(m0);
156				return (NULL);
157			}
158		} else {
159			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
160			if (n == NULL) {
161				m_freem(m0);
162				return (NULL);
163			}
164		}
165		/*
166		 * ... and copy the data.  We deal with jumbo mbufs
167		 * (i.e. m_len > MCLBYTES) by splitting them into
168		 * clusters.  We could just malloc a buffer and make
169		 * it external but too many device drivers don't know
170		 * how to break up the non-contiguous memory when
171		 * doing DMA.
172		 */
173		len = m->m_len;
174		off = 0;
175		mfirst = n;
176		mlast = NULL;
177		for (;;) {
178			int cc = min(len, MCLBYTES);
179			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
180			n->m_len = cc;
181			if (mlast != NULL)
182				mlast->m_next = n;
183			mlast = n;
184			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
185
186			len -= cc;
187			if (len <= 0)
188				break;
189			off += cc;
190
191			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
192			if (n == NULL) {
193				m_freem(mfirst);
194				m_freem(m0);
195				return (NULL);
196			}
197		}
198		n->m_next = m->m_next;
199		if (mprev == NULL)
200			m0 = mfirst;		/* new head of chain */
201		else
202			mprev->m_next = mfirst;	/* replace old mbuf */
203		m_free(m);			/* release old mbuf */
204		mprev = mfirst;
205	}
206	return (m0);
207}
208
209/*
210 * Make space for a new header of length hlen at skip bytes
211 * into the packet.  When doing this we allocate new mbufs only
212 * when absolutely necessary.  The mbuf where the new header
213 * is to go is returned together with an offset into the mbuf.
214 * If NULL is returned then the mbuf chain may have been modified;
215 * the caller is assumed to always free the chain.
216 */
217struct mbuf *
218m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
219{
220	struct mbuf *m;
221	unsigned remain;
222
223	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
224	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
225
226	for (m = m0; m && skip > m->m_len; m = m->m_next)
227		skip -= m->m_len;
228	if (m == NULL)
229		return (NULL);
230	/*
231	 * At this point skip is the offset into the mbuf m
232	 * where the new header should be placed.  Figure out
233	 * if there's space to insert the new header.  If so,
234	 * and copying the remainder makese sense then do so.
235	 * Otherwise insert a new mbuf in the chain, splitting
236	 * the contents of m as needed.
237	 */
238	remain = m->m_len - skip;		/* data to move */
239	if (hlen > M_TRAILINGSPACE(m)) {
240		struct mbuf *n0, *n, **np;
241		int todo, len, done, alloc;
242
243		n0 = NULL;
244		np = &n0;
245		alloc = 0;
246		done = 0;
247		todo = remain;
248		while (todo > 0) {
249			if (todo > MHLEN) {
250				n = m_getcl(M_DONTWAIT, m->m_type, 0);
251				len = MCLBYTES;
252			}
253			else {
254				n = m_get(M_DONTWAIT, m->m_type);
255				len = MHLEN;
256			}
257			if (n == NULL) {
258				m_freem(n0);
259				return NULL;
260			}
261			*np = n;
262			np = &n->m_next;
263			alloc++;
264			len = min(todo, len);
265			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
266			n->m_len = len;
267			done += len;
268			todo -= len;
269		}
270
271		if (hlen <= M_TRAILINGSPACE(m) + remain) {
272			m->m_len = skip + hlen;
273			*off = skip;
274			if (n0 != NULL) {
275				*np = m->m_next;
276				m->m_next = n0;
277			}
278		}
279		else {
280			n = m_get(M_DONTWAIT, m->m_type);
281			if (n == NULL) {
282				m_freem(n0);
283				return NULL;
284			}
285			alloc++;
286
287			if ((n->m_next = n0) == NULL)
288				np = &n->m_next;
289			n0 = n;
290
291			*np = m->m_next;
292			m->m_next = n0;
293
294			n->m_len = hlen;
295			m->m_len = skip;
296
297			m = n;			/* header is at front ... */
298			*off = 0;		/* ... of new mbuf */
299		}
300
301		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
302	} else {
303		/*
304		 * Copy the remainder to the back of the mbuf
305		 * so there's space to write the new header.
306		 */
307		/* XXX can this be memcpy? does it handle overlap? */
308		ovbcopy(mtod(m, char *) + skip,
309			mtod(m, char *) + skip + hlen, remain);
310		m->m_len += hlen;
311		*off = skip;
312	}
313	m0->m_pkthdr.len += hlen;		/* adjust packet length */
314	return m;
315}
316
317/*
318 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
319 * length is updated, and a pointer to the first byte of the padding
320 * (which is guaranteed to be all in one mbuf) is returned.
321 */
322void *
323m_pad(struct mbuf *m, int n)
324{
325	register struct mbuf *m0, *m1;
326	register int len, pad;
327	void *retval;
328
329	if (n <= 0) {  /* No stupid arguments. */
330		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
331		m_freem(m);
332		return NULL;
333	}
334
335	len = m->m_pkthdr.len;
336	pad = n;
337	m0 = m;
338
339	while (m0->m_len < len) {
340IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
341		len -= m0->m_len;
342		m0 = m0->m_next;
343	}
344
345	if (m0->m_len != len) {
346		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
347		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
348
349		m_freem(m);
350		return NULL;
351	}
352
353	/* Check for zero-length trailing mbufs, and find the last one. */
354	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
355		if (m1->m_next->m_len != 0) {
356			DPRINTF(("m_pad: length mismatch (should be %d "
357			    "instead of %d)\n",
358			    m->m_pkthdr.len,
359			    m->m_pkthdr.len + m1->m_next->m_len));
360
361			m_freem(m);
362			return NULL;
363		}
364
365		m0 = m1->m_next;
366	}
367
368	if (pad > M_TRAILINGSPACE(m0)) {
369		/* Add an mbuf to the chain. */
370		MGET(m1, M_DONTWAIT, MT_DATA);
371		if (m1 == 0) {
372			m_freem(m0);
373			DPRINTF(("m_pad: unable to get extra mbuf\n"));
374			return NULL;
375		}
376
377		m0->m_next = m1;
378		m0 = m1;
379		m0->m_len = 0;
380	}
381
382	retval = m0->m_data + m0->m_len;
383	m0->m_len += pad;
384	m->m_pkthdr.len += pad;
385
386	return retval;
387}
388
389/*
390 * Remove hlen data at offset skip in the packet.  This is used by
391 * the protocols strip protocol headers and associated data (e.g. IV,
392 * authenticator) on input.
393 */
394int
395m_striphdr(struct mbuf *m, int skip, int hlen)
396{
397	struct mbuf *m1;
398	int roff;
399
400	/* Find beginning of header */
401	m1 = m_getptr(m, skip, &roff);
402	if (m1 == NULL)
403		return (EINVAL);
404
405	/* Remove the header and associated data from the mbuf. */
406	if (roff == 0) {
407		/* The header was at the beginning of the mbuf */
408		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
409		m_adj(m1, hlen);
410		if ((m1->m_flags & M_PKTHDR) == 0)
411			m->m_pkthdr.len -= hlen;
412	} else if (roff + hlen >= m1->m_len) {
413		struct mbuf *mo;
414
415		/*
416		 * Part or all of the header is at the end of this mbuf,
417		 * so first let's remove the remainder of the header from
418		 * the beginning of the remainder of the mbuf chain, if any.
419		 */
420		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
421		if (roff + hlen > m1->m_len) {
422			/* Adjust the next mbuf by the remainder */
423			m_adj(m1->m_next, roff + hlen - m1->m_len);
424
425			/* The second mbuf is guaranteed not to have a pkthdr... */
426			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
427		}
428
429		/* Now, let's unlink the mbuf chain for a second...*/
430		mo = m1->m_next;
431		m1->m_next = NULL;
432
433		/* ...and trim the end of the first part of the chain...sick */
434		m_adj(m1, -(m1->m_len - roff));
435		if ((m1->m_flags & M_PKTHDR) == 0)
436			m->m_pkthdr.len -= (m1->m_len - roff);
437
438		/* Finally, let's relink */
439		m1->m_next = mo;
440	} else {
441		/*
442		 * The header lies in the "middle" of the mbuf; copy
443		 * the remainder of the mbuf down over the header.
444		 */
445		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
446		ovbcopy(mtod(m1, u_char *) + roff + hlen,
447		      mtod(m1, u_char *) + roff,
448		      m1->m_len - (roff + hlen));
449		m1->m_len -= hlen;
450		m->m_pkthdr.len -= hlen;
451	}
452	return (0);
453}
454
455/*
456 * Diagnostic routine to check mbuf alignment as required by the
457 * crypto device drivers (that use DMA).
458 */
459void
460m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
461{
462	int roff;
463	struct mbuf *m = m_getptr(m0, off, &roff);
464	void *addr;
465
466	if (m == NULL)
467		return;
468	printf("%s (off %u len %u): ", where, off, len);
469	addr = mtod(m, char *) + roff;
470	do {
471		int mlen;
472
473		if (((uintptr_t) addr) & 3) {
474			printf("addr misaligned %p,", addr);
475			break;
476		}
477		mlen = m->m_len;
478		if (mlen > len)
479			mlen = len;
480		len -= mlen;
481		if (len && (mlen & 3)) {
482			printf("len mismatch %u,", mlen);
483			break;
484		}
485		m = m->m_next;
486		addr = m ? mtod(m, void *) : NULL;
487	} while (m && len > 0);
488	for (m = m0; m; m = m->m_next)
489		printf(" [%p:%u]", mtod(m, void *), m->m_len);
490	printf("\n");
491}
492