ipsec_mbuf.c revision 1.13
1/*	$NetBSD: ipsec_mbuf.c,v 1.13 2017/04/18 05:25:32 ozaki-r Exp $	*/
2/*-
3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.13 2017/04/18 05:25:32 ozaki-r Exp $");
32
33/*
34 * IPsec-specific mbuf routines.
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/mbuf.h>
40#include <sys/socket.h>
41
42#include <net/route.h>
43#include <netinet/in.h>
44
45#include <netipsec/ipsec.h>
46#include <netipsec/ipsec_var.h>
47#include <netipsec/ipsec_private.h>
48
49#include <netipsec/ipsec_osdep.h>
50#include <net/net_osdep.h>
51
52/*
53 * Create a writable copy of the mbuf chain.  While doing this
54 * we compact the chain with a goal of producing a chain with
55 * at most two mbufs.  The second mbuf in this chain is likely
56 * to be a cluster.  The primary purpose of this work is to create
57 * a writable packet for encryption, compression, etc.  The
58 * secondary goal is to linearize the data so the data can be
59 * passed to crypto hardware in the most efficient manner possible.
60 */
61struct mbuf *
62m_clone(struct mbuf *m0)
63{
64	struct mbuf *m, *mprev;
65	struct mbuf *n, *mfirst, *mlast;
66	int len, off;
67
68	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
69
70	mprev = NULL;
71	for (m = m0; m != NULL; m = mprev->m_next) {
72		/*
73		 * Regular mbufs are ignored unless there's a cluster
74		 * in front of it that we can use to coalesce.  We do
75		 * the latter mainly so later clusters can be coalesced
76		 * also w/o having to handle them specially (i.e. convert
77		 * mbuf+cluster -> cluster).  This optimization is heavily
78		 * influenced by the assumption that we're running over
79		 * Ethernet where MCLBYTES is large enough that the max
80		 * packet size will permit lots of coalescing into a
81		 * single cluster.  This in turn permits efficient
82		 * crypto operations, especially when using hardware.
83		 */
84		if ((m->m_flags & M_EXT) == 0) {
85			if (mprev && (mprev->m_flags & M_EXT) &&
86			    m->m_len <= M_TRAILINGSPACE(mprev)) {
87				/* XXX: this ignores mbuf types */
88				memcpy(mtod(mprev, char *) + mprev->m_len,
89				       mtod(m, char *), m->m_len);
90				mprev->m_len += m->m_len;
91				mprev->m_next = m->m_next;	/* unlink from chain */
92				m_free(m);			/* reclaim mbuf */
93				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
94			} else {
95				mprev = m;
96			}
97			continue;
98		}
99		/*
100		 * Writable mbufs are left alone (for now).  Note
101		 * that for 4.x systems it's not possible to identify
102		 * whether or not mbufs with external buffers are
103		 * writable unless they use clusters.
104		 */
105		if (M_EXT_WRITABLE(m)) {
106			mprev = m;
107			continue;
108		}
109
110		/*
111		 * Not writable, replace with a copy or coalesce with
112		 * the previous mbuf if possible (since we have to copy
113		 * it anyway, we try to reduce the number of mbufs and
114		 * clusters so that future work is easier).
115		 */
116		IPSEC_ASSERT(m->m_flags & M_EXT,
117			("m_clone: m_flags 0x%x", m->m_flags));
118		/* NB: we only coalesce into a cluster or larger */
119		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
120		    m->m_len <= M_TRAILINGSPACE(mprev)) {
121			/* XXX: this ignores mbuf types */
122			memcpy(mtod(mprev, char *) + mprev->m_len,
123			       mtod(m, char *), m->m_len);
124			mprev->m_len += m->m_len;
125			mprev->m_next = m->m_next;	/* unlink from chain */
126			m_free(m);			/* reclaim mbuf */
127			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
128			continue;
129		}
130
131		/*
132		 * Allocate new space to hold the copy...
133		 */
134		/* XXX why can M_PKTHDR be set past the first mbuf? */
135		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
136			/*
137			 * NB: if a packet header is present we must
138			 * allocate the mbuf separately from any cluster
139			 * because M_MOVE_PKTHDR will smash the data
140			 * pointer and drop the M_EXT marker.
141			 */
142			MGETHDR(n, M_DONTWAIT, m->m_type);
143			if (n == NULL) {
144				m_freem(m0);
145				return (NULL);
146			}
147			M_MOVE_PKTHDR(n, m);
148			MCLGET(n, M_DONTWAIT);
149			if ((n->m_flags & M_EXT) == 0) {
150				m_free(n);
151				m_freem(m0);
152				return (NULL);
153			}
154		} else {
155			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
156			if (n == NULL) {
157				m_freem(m0);
158				return (NULL);
159			}
160		}
161		/*
162		 * ... and copy the data.  We deal with jumbo mbufs
163		 * (i.e. m_len > MCLBYTES) by splitting them into
164		 * clusters.  We could just malloc a buffer and make
165		 * it external but too many device drivers don't know
166		 * how to break up the non-contiguous memory when
167		 * doing DMA.
168		 */
169		len = m->m_len;
170		off = 0;
171		mfirst = n;
172		mlast = NULL;
173		for (;;) {
174			int cc = min(len, MCLBYTES);
175			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
176			n->m_len = cc;
177			if (mlast != NULL)
178				mlast->m_next = n;
179			mlast = n;
180			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
181
182			len -= cc;
183			if (len <= 0)
184				break;
185			off += cc;
186
187			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
188			if (n == NULL) {
189				m_freem(mfirst);
190				m_freem(m0);
191				return (NULL);
192			}
193		}
194		n->m_next = m->m_next;
195		if (mprev == NULL)
196			m0 = mfirst;		/* new head of chain */
197		else
198			mprev->m_next = mfirst;	/* replace old mbuf */
199		m_free(m);			/* release old mbuf */
200		mprev = mfirst;
201	}
202	return (m0);
203}
204
205/*
206 * Make space for a new header of length hlen at skip bytes
207 * into the packet.  When doing this we allocate new mbufs only
208 * when absolutely necessary.  The mbuf where the new header
209 * is to go is returned together with an offset into the mbuf.
210 * If NULL is returned then the mbuf chain may have been modified;
211 * the caller is assumed to always free the chain.
212 */
213struct mbuf *
214m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
215{
216	struct mbuf *m;
217	unsigned remain;
218
219	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
220	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
221
222	for (m = m0; m && skip > m->m_len; m = m->m_next)
223		skip -= m->m_len;
224	if (m == NULL)
225		return (NULL);
226	/*
227	 * At this point skip is the offset into the mbuf m
228	 * where the new header should be placed.  Figure out
229	 * if there's space to insert the new header.  If so,
230	 * and copying the remainder makese sense then do so.
231	 * Otherwise insert a new mbuf in the chain, splitting
232	 * the contents of m as needed.
233	 */
234	remain = m->m_len - skip;		/* data to move */
235	if (hlen > M_TRAILINGSPACE(m)) {
236		struct mbuf *n0, *n, **np;
237		int todo, len, done, alloc;
238
239		n0 = NULL;
240		np = &n0;
241		alloc = 0;
242		done = 0;
243		todo = remain;
244		while (todo > 0) {
245			if (todo > MHLEN) {
246				n = m_getcl(M_DONTWAIT, m->m_type, 0);
247				len = MCLBYTES;
248			}
249			else {
250				n = m_get(M_DONTWAIT, m->m_type);
251				len = MHLEN;
252			}
253			if (n == NULL) {
254				m_freem(n0);
255				return NULL;
256			}
257			*np = n;
258			np = &n->m_next;
259			alloc++;
260			len = min(todo, len);
261			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
262			n->m_len = len;
263			done += len;
264			todo -= len;
265		}
266
267		if (hlen <= M_TRAILINGSPACE(m) + remain) {
268			m->m_len = skip + hlen;
269			*off = skip;
270			if (n0 != NULL) {
271				*np = m->m_next;
272				m->m_next = n0;
273			}
274		}
275		else {
276			n = m_get(M_DONTWAIT, m->m_type);
277			if (n == NULL) {
278				m_freem(n0);
279				return NULL;
280			}
281			alloc++;
282
283			if ((n->m_next = n0) == NULL)
284				np = &n->m_next;
285			n0 = n;
286
287			*np = m->m_next;
288			m->m_next = n0;
289
290			n->m_len = hlen;
291			m->m_len = skip;
292
293			m = n;			/* header is at front ... */
294			*off = 0;		/* ... of new mbuf */
295		}
296
297		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
298	} else {
299		/*
300		 * Copy the remainder to the back of the mbuf
301		 * so there's space to write the new header.
302		 */
303		/* XXX can this be memcpy? does it handle overlap? */
304		ovbcopy(mtod(m, char *) + skip,
305			mtod(m, char *) + skip + hlen, remain);
306		m->m_len += hlen;
307		*off = skip;
308	}
309	m0->m_pkthdr.len += hlen;		/* adjust packet length */
310	return m;
311}
312
313/*
314 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
315 * length is updated, and a pointer to the first byte of the padding
316 * (which is guaranteed to be all in one mbuf) is returned.
317 */
318void *
319m_pad(struct mbuf *m, int n)
320{
321	register struct mbuf *m0, *m1;
322	register int len, pad;
323	void *retval;
324
325	if (n <= 0) {  /* No stupid arguments. */
326		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
327		m_freem(m);
328		return NULL;
329	}
330
331	len = m->m_pkthdr.len;
332	pad = n;
333	m0 = m;
334
335	while (m0->m_len < len) {
336IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
337		len -= m0->m_len;
338		m0 = m0->m_next;
339	}
340
341	if (m0->m_len != len) {
342		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
343		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
344
345		m_freem(m);
346		return NULL;
347	}
348
349	/* Check for zero-length trailing mbufs, and find the last one. */
350	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
351		if (m1->m_next->m_len != 0) {
352			DPRINTF(("m_pad: length mismatch (should be %d "
353			    "instead of %d)\n",
354			    m->m_pkthdr.len,
355			    m->m_pkthdr.len + m1->m_next->m_len));
356
357			m_freem(m);
358			return NULL;
359		}
360
361		m0 = m1->m_next;
362	}
363
364	if (pad > M_TRAILINGSPACE(m0)) {
365		/* Add an mbuf to the chain. */
366		MGET(m1, M_DONTWAIT, MT_DATA);
367		if (m1 == 0) {
368			m_freem(m0);
369			DPRINTF(("m_pad: unable to get extra mbuf\n"));
370			return NULL;
371		}
372
373		m0->m_next = m1;
374		m0 = m1;
375		m0->m_len = 0;
376	}
377
378	retval = m0->m_data + m0->m_len;
379	m0->m_len += pad;
380	m->m_pkthdr.len += pad;
381
382	return retval;
383}
384
385/*
386 * Remove hlen data at offset skip in the packet.  This is used by
387 * the protocols strip protocol headers and associated data (e.g. IV,
388 * authenticator) on input.
389 */
390int
391m_striphdr(struct mbuf *m, int skip, int hlen)
392{
393	struct mbuf *m1;
394	int roff;
395
396	/* Find beginning of header */
397	m1 = m_getptr(m, skip, &roff);
398	if (m1 == NULL)
399		return (EINVAL);
400
401	/* Remove the header and associated data from the mbuf. */
402	if (roff == 0) {
403		/* The header was at the beginning of the mbuf */
404		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
405		m_adj(m1, hlen);
406		if ((m1->m_flags & M_PKTHDR) == 0)
407			m->m_pkthdr.len -= hlen;
408	} else if (roff + hlen >= m1->m_len) {
409		struct mbuf *mo;
410
411		/*
412		 * Part or all of the header is at the end of this mbuf,
413		 * so first let's remove the remainder of the header from
414		 * the beginning of the remainder of the mbuf chain, if any.
415		 */
416		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
417		if (roff + hlen > m1->m_len) {
418			/* Adjust the next mbuf by the remainder */
419			m_adj(m1->m_next, roff + hlen - m1->m_len);
420
421			/* The second mbuf is guaranteed not to have a pkthdr... */
422			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
423		}
424
425		/* Now, let's unlink the mbuf chain for a second...*/
426		mo = m1->m_next;
427		m1->m_next = NULL;
428
429		/* ...and trim the end of the first part of the chain...sick */
430		m_adj(m1, -(m1->m_len - roff));
431		if ((m1->m_flags & M_PKTHDR) == 0)
432			m->m_pkthdr.len -= (m1->m_len - roff);
433
434		/* Finally, let's relink */
435		m1->m_next = mo;
436	} else {
437		/*
438		 * The header lies in the "middle" of the mbuf; copy
439		 * the remainder of the mbuf down over the header.
440		 */
441		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
442		ovbcopy(mtod(m1, u_char *) + roff + hlen,
443		      mtod(m1, u_char *) + roff,
444		      m1->m_len - (roff + hlen));
445		m1->m_len -= hlen;
446		m->m_pkthdr.len -= hlen;
447	}
448	return (0);
449}
450
451/*
452 * Diagnostic routine to check mbuf alignment as required by the
453 * crypto device drivers (that use DMA).
454 */
455void
456m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
457{
458	int roff;
459	struct mbuf *m = m_getptr(m0, off, &roff);
460	void *addr;
461
462	if (m == NULL)
463		return;
464	printf("%s (off %u len %u): ", where, off, len);
465	addr = mtod(m, char *) + roff;
466	do {
467		int mlen;
468
469		if (((uintptr_t) addr) & 3) {
470			printf("addr misaligned %p,", addr);
471			break;
472		}
473		mlen = m->m_len;
474		if (mlen > len)
475			mlen = len;
476		len -= mlen;
477		if (len && (mlen & 3)) {
478			printf("len mismatch %u,", mlen);
479			break;
480		}
481		m = m->m_next;
482		addr = m ? mtod(m, void *) : NULL;
483	} while (m && len > 0);
484	for (m = m0; m; m = m->m_next)
485		printf(" [%p:%u]", mtod(m, void *), m->m_len);
486	printf("\n");
487}
488