ipsec_mbuf.c revision 1.15
1/*	$NetBSD: ipsec_mbuf.c,v 1.15 2017/04/19 03:39:14 ozaki-r Exp $	*/
2/*-
3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.15 2017/04/19 03:39:14 ozaki-r Exp $");
32
33/*
34 * IPsec-specific mbuf routines.
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/mbuf.h>
40#include <sys/socket.h>
41
42#include <net/route.h>
43#include <netinet/in.h>
44
45#include <netipsec/ipsec.h>
46#include <netipsec/ipsec_var.h>
47#include <netipsec/ipsec_private.h>
48
49#include <net/net_osdep.h>
50
51/*
52 * Create a writable copy of the mbuf chain.  While doing this
53 * we compact the chain with a goal of producing a chain with
54 * at most two mbufs.  The second mbuf in this chain is likely
55 * to be a cluster.  The primary purpose of this work is to create
56 * a writable packet for encryption, compression, etc.  The
57 * secondary goal is to linearize the data so the data can be
58 * passed to crypto hardware in the most efficient manner possible.
59 */
60struct mbuf *
61m_clone(struct mbuf *m0)
62{
63	struct mbuf *m, *mprev;
64	struct mbuf *n, *mfirst, *mlast;
65	int len, off;
66
67	KASSERT(m0 != NULL);
68
69	mprev = NULL;
70	for (m = m0; m != NULL; m = mprev->m_next) {
71		/*
72		 * Regular mbufs are ignored unless there's a cluster
73		 * in front of it that we can use to coalesce.  We do
74		 * the latter mainly so later clusters can be coalesced
75		 * also w/o having to handle them specially (i.e. convert
76		 * mbuf+cluster -> cluster).  This optimization is heavily
77		 * influenced by the assumption that we're running over
78		 * Ethernet where MCLBYTES is large enough that the max
79		 * packet size will permit lots of coalescing into a
80		 * single cluster.  This in turn permits efficient
81		 * crypto operations, especially when using hardware.
82		 */
83		if ((m->m_flags & M_EXT) == 0) {
84			if (mprev && (mprev->m_flags & M_EXT) &&
85			    m->m_len <= M_TRAILINGSPACE(mprev)) {
86				/* XXX: this ignores mbuf types */
87				memcpy(mtod(mprev, char *) + mprev->m_len,
88				       mtod(m, char *), m->m_len);
89				mprev->m_len += m->m_len;
90				mprev->m_next = m->m_next;	/* unlink from chain */
91				m_free(m);			/* reclaim mbuf */
92				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
93			} else {
94				mprev = m;
95			}
96			continue;
97		}
98		/*
99		 * Writable mbufs are left alone (for now).  Note
100		 * that for 4.x systems it's not possible to identify
101		 * whether or not mbufs with external buffers are
102		 * writable unless they use clusters.
103		 */
104		if (M_EXT_WRITABLE(m)) {
105			mprev = m;
106			continue;
107		}
108
109		/*
110		 * Not writable, replace with a copy or coalesce with
111		 * the previous mbuf if possible (since we have to copy
112		 * it anyway, we try to reduce the number of mbufs and
113		 * clusters so that future work is easier).
114		 */
115		KASSERTMSG(m->m_flags & M_EXT, "m_flags 0x%x", m->m_flags);
116		/* NB: we only coalesce into a cluster or larger */
117		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
118		    m->m_len <= M_TRAILINGSPACE(mprev)) {
119			/* XXX: this ignores mbuf types */
120			memcpy(mtod(mprev, char *) + mprev->m_len,
121			       mtod(m, char *), m->m_len);
122			mprev->m_len += m->m_len;
123			mprev->m_next = m->m_next;	/* unlink from chain */
124			m_free(m);			/* reclaim mbuf */
125			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
126			continue;
127		}
128
129		/*
130		 * Allocate new space to hold the copy...
131		 */
132		/* XXX why can M_PKTHDR be set past the first mbuf? */
133		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
134			/*
135			 * NB: if a packet header is present we must
136			 * allocate the mbuf separately from any cluster
137			 * because M_MOVE_PKTHDR will smash the data
138			 * pointer and drop the M_EXT marker.
139			 */
140			MGETHDR(n, M_DONTWAIT, m->m_type);
141			if (n == NULL) {
142				m_freem(m0);
143				return (NULL);
144			}
145			M_MOVE_PKTHDR(n, m);
146			MCLGET(n, M_DONTWAIT);
147			if ((n->m_flags & M_EXT) == 0) {
148				m_free(n);
149				m_freem(m0);
150				return (NULL);
151			}
152		} else {
153			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
154			if (n == NULL) {
155				m_freem(m0);
156				return (NULL);
157			}
158		}
159		/*
160		 * ... and copy the data.  We deal with jumbo mbufs
161		 * (i.e. m_len > MCLBYTES) by splitting them into
162		 * clusters.  We could just malloc a buffer and make
163		 * it external but too many device drivers don't know
164		 * how to break up the non-contiguous memory when
165		 * doing DMA.
166		 */
167		len = m->m_len;
168		off = 0;
169		mfirst = n;
170		mlast = NULL;
171		for (;;) {
172			int cc = min(len, MCLBYTES);
173			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
174			n->m_len = cc;
175			if (mlast != NULL)
176				mlast->m_next = n;
177			mlast = n;
178			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
179
180			len -= cc;
181			if (len <= 0)
182				break;
183			off += cc;
184
185			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
186			if (n == NULL) {
187				m_freem(mfirst);
188				m_freem(m0);
189				return (NULL);
190			}
191		}
192		n->m_next = m->m_next;
193		if (mprev == NULL)
194			m0 = mfirst;		/* new head of chain */
195		else
196			mprev->m_next = mfirst;	/* replace old mbuf */
197		m_free(m);			/* release old mbuf */
198		mprev = mfirst;
199	}
200	return (m0);
201}
202
203/*
204 * Make space for a new header of length hlen at skip bytes
205 * into the packet.  When doing this we allocate new mbufs only
206 * when absolutely necessary.  The mbuf where the new header
207 * is to go is returned together with an offset into the mbuf.
208 * If NULL is returned then the mbuf chain may have been modified;
209 * the caller is assumed to always free the chain.
210 */
211struct mbuf *
212m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
213{
214	struct mbuf *m;
215	unsigned remain;
216
217	KASSERT(m0 != NULL);
218	KASSERTMSG(hlen < MHLEN, "hlen too big: %u", hlen);
219
220	for (m = m0; m && skip > m->m_len; m = m->m_next)
221		skip -= m->m_len;
222	if (m == NULL)
223		return (NULL);
224	/*
225	 * At this point skip is the offset into the mbuf m
226	 * where the new header should be placed.  Figure out
227	 * if there's space to insert the new header.  If so,
228	 * and copying the remainder makese sense then do so.
229	 * Otherwise insert a new mbuf in the chain, splitting
230	 * the contents of m as needed.
231	 */
232	remain = m->m_len - skip;		/* data to move */
233	if (hlen > M_TRAILINGSPACE(m)) {
234		struct mbuf *n0, *n, **np;
235		int todo, len, done, alloc;
236
237		n0 = NULL;
238		np = &n0;
239		alloc = 0;
240		done = 0;
241		todo = remain;
242		while (todo > 0) {
243			if (todo > MHLEN) {
244				n = m_getcl(M_DONTWAIT, m->m_type, 0);
245				len = MCLBYTES;
246			}
247			else {
248				n = m_get(M_DONTWAIT, m->m_type);
249				len = MHLEN;
250			}
251			if (n == NULL) {
252				m_freem(n0);
253				return NULL;
254			}
255			*np = n;
256			np = &n->m_next;
257			alloc++;
258			len = min(todo, len);
259			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
260			n->m_len = len;
261			done += len;
262			todo -= len;
263		}
264
265		if (hlen <= M_TRAILINGSPACE(m) + remain) {
266			m->m_len = skip + hlen;
267			*off = skip;
268			if (n0 != NULL) {
269				*np = m->m_next;
270				m->m_next = n0;
271			}
272		}
273		else {
274			n = m_get(M_DONTWAIT, m->m_type);
275			if (n == NULL) {
276				m_freem(n0);
277				return NULL;
278			}
279			alloc++;
280
281			if ((n->m_next = n0) == NULL)
282				np = &n->m_next;
283			n0 = n;
284
285			*np = m->m_next;
286			m->m_next = n0;
287
288			n->m_len = hlen;
289			m->m_len = skip;
290
291			m = n;			/* header is at front ... */
292			*off = 0;		/* ... of new mbuf */
293		}
294
295		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
296	} else {
297		/*
298		 * Copy the remainder to the back of the mbuf
299		 * so there's space to write the new header.
300		 */
301		/* XXX can this be memcpy? does it handle overlap? */
302		ovbcopy(mtod(m, char *) + skip,
303			mtod(m, char *) + skip + hlen, remain);
304		m->m_len += hlen;
305		*off = skip;
306	}
307	m0->m_pkthdr.len += hlen;		/* adjust packet length */
308	return m;
309}
310
311/*
312 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
313 * length is updated, and a pointer to the first byte of the padding
314 * (which is guaranteed to be all in one mbuf) is returned.
315 */
316void *
317m_pad(struct mbuf *m, int n)
318{
319	register struct mbuf *m0, *m1;
320	register int len, pad;
321	void *retval;
322
323	if (n <= 0) {  /* No stupid arguments. */
324		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
325		m_freem(m);
326		return NULL;
327	}
328
329	len = m->m_pkthdr.len;
330	pad = n;
331	m0 = m;
332
333	while (m0->m_len < len) {
334		KASSERTMSG(m0->m_next != NULL,
335		    "m0 null, len %u m_len %u", len, m0->m_len);/*XXX*/
336		len -= m0->m_len;
337		m0 = m0->m_next;
338	}
339
340	if (m0->m_len != len) {
341		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
342		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
343
344		m_freem(m);
345		return NULL;
346	}
347
348	/* Check for zero-length trailing mbufs, and find the last one. */
349	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
350		if (m1->m_next->m_len != 0) {
351			DPRINTF(("m_pad: length mismatch (should be %d "
352			    "instead of %d)\n",
353			    m->m_pkthdr.len,
354			    m->m_pkthdr.len + m1->m_next->m_len));
355
356			m_freem(m);
357			return NULL;
358		}
359
360		m0 = m1->m_next;
361	}
362
363	if (pad > M_TRAILINGSPACE(m0)) {
364		/* Add an mbuf to the chain. */
365		MGET(m1, M_DONTWAIT, MT_DATA);
366		if (m1 == 0) {
367			m_freem(m0);
368			DPRINTF(("m_pad: unable to get extra mbuf\n"));
369			return NULL;
370		}
371
372		m0->m_next = m1;
373		m0 = m1;
374		m0->m_len = 0;
375	}
376
377	retval = m0->m_data + m0->m_len;
378	m0->m_len += pad;
379	m->m_pkthdr.len += pad;
380
381	return retval;
382}
383
384/*
385 * Remove hlen data at offset skip in the packet.  This is used by
386 * the protocols strip protocol headers and associated data (e.g. IV,
387 * authenticator) on input.
388 */
389int
390m_striphdr(struct mbuf *m, int skip, int hlen)
391{
392	struct mbuf *m1;
393	int roff;
394
395	/* Find beginning of header */
396	m1 = m_getptr(m, skip, &roff);
397	if (m1 == NULL)
398		return (EINVAL);
399
400	/* Remove the header and associated data from the mbuf. */
401	if (roff == 0) {
402		/* The header was at the beginning of the mbuf */
403		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
404		m_adj(m1, hlen);
405		if ((m1->m_flags & M_PKTHDR) == 0)
406			m->m_pkthdr.len -= hlen;
407	} else if (roff + hlen >= m1->m_len) {
408		struct mbuf *mo;
409
410		/*
411		 * Part or all of the header is at the end of this mbuf,
412		 * so first let's remove the remainder of the header from
413		 * the beginning of the remainder of the mbuf chain, if any.
414		 */
415		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
416		if (roff + hlen > m1->m_len) {
417			/* Adjust the next mbuf by the remainder */
418			m_adj(m1->m_next, roff + hlen - m1->m_len);
419
420			/* The second mbuf is guaranteed not to have a pkthdr... */
421			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
422		}
423
424		/* Now, let's unlink the mbuf chain for a second...*/
425		mo = m1->m_next;
426		m1->m_next = NULL;
427
428		/* ...and trim the end of the first part of the chain...sick */
429		m_adj(m1, -(m1->m_len - roff));
430		if ((m1->m_flags & M_PKTHDR) == 0)
431			m->m_pkthdr.len -= (m1->m_len - roff);
432
433		/* Finally, let's relink */
434		m1->m_next = mo;
435	} else {
436		/*
437		 * The header lies in the "middle" of the mbuf; copy
438		 * the remainder of the mbuf down over the header.
439		 */
440		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
441		ovbcopy(mtod(m1, u_char *) + roff + hlen,
442		      mtod(m1, u_char *) + roff,
443		      m1->m_len - (roff + hlen));
444		m1->m_len -= hlen;
445		m->m_pkthdr.len -= hlen;
446	}
447	return (0);
448}
449
450/*
451 * Diagnostic routine to check mbuf alignment as required by the
452 * crypto device drivers (that use DMA).
453 */
454void
455m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
456{
457	int roff;
458	struct mbuf *m = m_getptr(m0, off, &roff);
459	void *addr;
460
461	if (m == NULL)
462		return;
463	printf("%s (off %u len %u): ", where, off, len);
464	addr = mtod(m, char *) + roff;
465	do {
466		int mlen;
467
468		if (((uintptr_t) addr) & 3) {
469			printf("addr misaligned %p,", addr);
470			break;
471		}
472		mlen = m->m_len;
473		if (mlen > len)
474			mlen = len;
475		len -= mlen;
476		if (len && (mlen & 3)) {
477			printf("len mismatch %u,", mlen);
478			break;
479		}
480		m = m->m_next;
481		addr = m ? mtod(m, void *) : NULL;
482	} while (m && len > 0);
483	for (m = m0; m; m = m->m_next)
484		printf(" [%p:%u]", mtod(m, void *), m->m_len);
485	printf("\n");
486}
487