ipsec_mbuf.c revision 1.22
1/*	$NetBSD: ipsec_mbuf.c,v 1.22 2018/03/10 17:52:50 maxv Exp $	*/
2
3/*
4 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
29 */
30
31#include <sys/cdefs.h>
32__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.22 2018/03/10 17:52:50 maxv Exp $");
33
34/*
35 * IPsec-specific mbuf routines.
36 */
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
42
43#include <net/route.h>
44#include <netinet/in.h>
45
46#include <netipsec/ipsec.h>
47#include <netipsec/ipsec_var.h>
48#include <netipsec/ipsec_private.h>
49
50/*
51 * Create a writable copy of the mbuf chain.  While doing this
52 * we compact the chain with a goal of producing a chain with
53 * at most two mbufs.  The second mbuf in this chain is likely
54 * to be a cluster.  The primary purpose of this work is to create
55 * a writable packet for encryption, compression, etc.  The
56 * secondary goal is to linearize the data so the data can be
57 * passed to crypto hardware in the most efficient manner possible.
58 */
59struct mbuf *
60m_clone(struct mbuf *m0)
61{
62	struct mbuf *m, *mprev;
63	struct mbuf *n, *mfirst, *mlast;
64	int len, off;
65
66	KASSERT(m0 != NULL);
67
68	mprev = NULL;
69	for (m = m0; m != NULL; m = mprev->m_next) {
70		/*
71		 * Regular mbufs are ignored unless there's a cluster
72		 * in front of it that we can use to coalesce.  We do
73		 * the latter mainly so later clusters can be coalesced
74		 * also w/o having to handle them specially (i.e. convert
75		 * mbuf+cluster -> cluster).  This optimization is heavily
76		 * influenced by the assumption that we're running over
77		 * Ethernet where MCLBYTES is large enough that the max
78		 * packet size will permit lots of coalescing into a
79		 * single cluster.  This in turn permits efficient
80		 * crypto operations, especially when using hardware.
81		 */
82		if ((m->m_flags & M_EXT) == 0) {
83			if (mprev && (mprev->m_flags & M_EXT) &&
84			    m->m_len <= M_TRAILINGSPACE(mprev)) {
85				/* XXX: this ignores mbuf types */
86				memcpy(mtod(mprev, char *) + mprev->m_len,
87				       mtod(m, char *), m->m_len);
88				mprev->m_len += m->m_len;
89				mprev->m_next = m->m_next;	/* unlink from chain */
90				m_free(m);			/* reclaim mbuf */
91				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
92			} else {
93				mprev = m;
94			}
95			continue;
96		}
97		/*
98		 * Writable mbufs are left alone (for now).  Note
99		 * that for 4.x systems it's not possible to identify
100		 * whether or not mbufs with external buffers are
101		 * writable unless they use clusters.
102		 */
103		if (M_EXT_WRITABLE(m)) {
104			mprev = m;
105			continue;
106		}
107
108		/*
109		 * Not writable, replace with a copy or coalesce with
110		 * the previous mbuf if possible (since we have to copy
111		 * it anyway, we try to reduce the number of mbufs and
112		 * clusters so that future work is easier).
113		 */
114		KASSERTMSG(m->m_flags & M_EXT, "m_flags 0x%x", m->m_flags);
115		/* NB: we only coalesce into a cluster or larger */
116		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
117		    m->m_len <= M_TRAILINGSPACE(mprev)) {
118			/* XXX: this ignores mbuf types */
119			memcpy(mtod(mprev, char *) + mprev->m_len,
120			       mtod(m, char *), m->m_len);
121			mprev->m_len += m->m_len;
122			mprev->m_next = m->m_next;	/* unlink from chain */
123			m_free(m);			/* reclaim mbuf */
124			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
125			continue;
126		}
127
128		/*
129		 * Allocate new space to hold the copy...
130		 */
131		/* XXX why can M_PKTHDR be set past the first mbuf? */
132		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
133			/*
134			 * NB: if a packet header is present we must
135			 * allocate the mbuf separately from any cluster
136			 * because M_MOVE_PKTHDR will smash the data
137			 * pointer and drop the M_EXT marker.
138			 */
139			MGETHDR(n, M_DONTWAIT, m->m_type);
140			if (n == NULL) {
141				m_freem(m0);
142				return (NULL);
143			}
144			M_MOVE_PKTHDR(n, m);
145			MCLGET(n, M_DONTWAIT);
146			if ((n->m_flags & M_EXT) == 0) {
147				m_free(n);
148				m_freem(m0);
149				return (NULL);
150			}
151		} else {
152			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
153			if (n == NULL) {
154				m_freem(m0);
155				return (NULL);
156			}
157		}
158		/*
159		 * ... and copy the data.  We deal with jumbo mbufs
160		 * (i.e. m_len > MCLBYTES) by splitting them into
161		 * clusters.  We could just malloc a buffer and make
162		 * it external but too many device drivers don't know
163		 * how to break up the non-contiguous memory when
164		 * doing DMA.
165		 */
166		len = m->m_len;
167		off = 0;
168		mfirst = n;
169		mlast = NULL;
170		for (;;) {
171			int cc = min(len, MCLBYTES);
172			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
173			n->m_len = cc;
174			if (mlast != NULL)
175				mlast->m_next = n;
176			mlast = n;
177			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
178
179			len -= cc;
180			if (len <= 0)
181				break;
182			off += cc;
183
184			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
185			if (n == NULL) {
186				m_freem(mfirst);
187				m_freem(m0);
188				return (NULL);
189			}
190		}
191		n->m_next = m->m_next;
192		if (mprev == NULL)
193			m0 = mfirst;		/* new head of chain */
194		else
195			mprev->m_next = mfirst;	/* replace old mbuf */
196		m_free(m);			/* release old mbuf */
197		mprev = mfirst;
198	}
199	return (m0);
200}
201
202/*
203 * Make space for a new header of length hlen at skip bytes
204 * into the packet.  When doing this we allocate new mbufs only
205 * when absolutely necessary.  The mbuf where the new header
206 * is to go is returned together with an offset into the mbuf.
207 * If NULL is returned then the mbuf chain may have been modified;
208 * the caller is assumed to always free the chain.
209 */
210struct mbuf *
211m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
212{
213	struct mbuf *m;
214	unsigned remain;
215
216	KASSERT(m0 != NULL);
217	KASSERT(m0->m_flags & M_PKTHDR);
218	KASSERTMSG(hlen < MHLEN, "hlen too big: %u", hlen);
219
220	for (m = m0; m && skip > m->m_len; m = m->m_next)
221		skip -= m->m_len;
222	if (m == NULL)
223		return (NULL);
224	/*
225	 * At this point skip is the offset into the mbuf m
226	 * where the new header should be placed.  Figure out
227	 * if there's space to insert the new header.  If so,
228	 * and copying the remainder makes sense then do so.
229	 * Otherwise insert a new mbuf in the chain, splitting
230	 * the contents of m as needed.
231	 */
232	remain = m->m_len - skip;		/* data to move */
233	if (hlen > M_TRAILINGSPACE(m)) {
234		struct mbuf *n0, *n, **np;
235		int todo, len, done, alloc;
236
237		n0 = NULL;
238		np = &n0;
239		alloc = 0;
240		done = 0;
241		todo = remain;
242		while (todo > 0) {
243			if (todo > MHLEN) {
244				n = m_getcl(M_DONTWAIT, m->m_type, 0);
245				len = MCLBYTES;
246			} else {
247				n = m_get(M_DONTWAIT, m->m_type);
248				len = MHLEN;
249			}
250			if (n == NULL) {
251				m_freem(n0);
252				return NULL;
253			}
254			*np = n;
255			np = &n->m_next;
256			alloc++;
257			len = min(todo, len);
258			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
259			n->m_len = len;
260			done += len;
261			todo -= len;
262		}
263
264		if (hlen <= M_TRAILINGSPACE(m) + remain) {
265			m->m_len = skip + hlen;
266			*off = skip;
267			if (n0 != NULL) {
268				*np = m->m_next;
269				m->m_next = n0;
270			}
271		} else {
272			n = m_get(M_DONTWAIT, m->m_type);
273			if (n == NULL) {
274				m_freem(n0);
275				return NULL;
276			}
277			alloc++;
278
279			if ((n->m_next = n0) == NULL)
280				np = &n->m_next;
281			n0 = n;
282
283			*np = m->m_next;
284			m->m_next = n0;
285
286			n->m_len = hlen;
287			m->m_len = skip;
288
289			m = n;			/* header is at front ... */
290			*off = 0;		/* ... of new mbuf */
291		}
292
293		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
294	} else {
295		/*
296		 * Copy the remainder to the back of the mbuf
297		 * so there's space to write the new header.
298		 */
299		/* XXX can this be memcpy? does it handle overlap? */
300		memmove(mtod(m, char *) + skip + hlen,
301			mtod(m, char *) + skip, remain);
302		m->m_len += hlen;
303		*off = skip;
304	}
305	m0->m_pkthdr.len += hlen;		/* adjust packet length */
306	return m;
307}
308
309/*
310 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
311 * length is updated, and a pointer to the first byte of the padding
312 * (which is guaranteed to be all in one mbuf) is returned.
313 */
314void *
315m_pad(struct mbuf *m, int n)
316{
317	register struct mbuf *m0, *m1;
318	register int len, pad;
319	void *retval;
320
321	if (__predict_false(n > MLEN)) {
322		panic("%s: %d > MLEN", __func__, n);
323	}
324	KASSERT(m->m_flags & M_PKTHDR);
325
326	len = m->m_pkthdr.len;
327	pad = n;
328	m0 = m;
329
330	while (m0->m_len < len) {
331		KASSERTMSG(m0->m_next != NULL,
332		    "m0 null, len %u m_len %u", len, m0->m_len);
333		len -= m0->m_len;
334		m0 = m0->m_next;
335	}
336
337	if (m0->m_len != len) {
338		IPSECLOG(LOG_DEBUG,
339		    "length mismatch (should be %d instead of %d)\n",
340		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len);
341		m_freem(m);
342		return NULL;
343	}
344
345	/* Check for zero-length trailing mbufs, and find the last one. */
346	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
347		if (m1->m_next->m_len != 0) {
348			IPSECLOG(LOG_DEBUG,
349			    "length mismatch (should be %d instead of %d)\n",
350			    m->m_pkthdr.len,
351			    m->m_pkthdr.len + m1->m_next->m_len);
352			m_freem(m);
353			return NULL;
354		}
355
356		m0 = m1->m_next;
357	}
358
359	if (pad > M_TRAILINGSPACE(m0)) {
360		/* Add an mbuf to the chain. */
361		MGET(m1, M_DONTWAIT, MT_DATA);
362		if (m1 == NULL) {
363			m_freem(m);
364			IPSECLOG(LOG_DEBUG, "unable to get extra mbuf\n");
365			return NULL;
366		}
367
368		m0->m_next = m1;
369		m0 = m1;
370		m0->m_len = 0;
371	}
372
373	retval = m0->m_data + m0->m_len;
374	m0->m_len += pad;
375	m->m_pkthdr.len += pad;
376
377	return retval;
378}
379
380/*
381 * Remove hlen data at offset skip in the packet.  This is used by
382 * the protocols strip protocol headers and associated data (e.g. IV,
383 * authenticator) on input.
384 */
385int
386m_striphdr(struct mbuf *m, int skip, int hlen)
387{
388	struct mbuf *m1;
389	int roff;
390
391	KASSERT(m->m_flags & M_PKTHDR);
392
393	/* Find beginning of header */
394	m1 = m_getptr(m, skip, &roff);
395	if (m1 == NULL)
396		return (EINVAL);
397
398	/* Remove the header and associated data from the mbuf. */
399	if (roff == 0) {
400		/* The header was at the beginning of the mbuf */
401		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
402		m_adj(m1, hlen);
403		if ((m1->m_flags & M_PKTHDR) == 0)
404			m->m_pkthdr.len -= hlen;
405	} else if (roff + hlen >= m1->m_len) {
406		struct mbuf *mo;
407
408		/*
409		 * Part or all of the header is at the end of this mbuf,
410		 * so first let's remove the remainder of the header from
411		 * the beginning of the remainder of the mbuf chain, if any.
412		 */
413		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
414		if (roff + hlen > m1->m_len) {
415			/* Adjust the next mbuf by the remainder */
416			m_adj(m1->m_next, roff + hlen - m1->m_len);
417
418			/* The second mbuf is guaranteed not to have a pkthdr... */
419			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
420		}
421
422		/* Now, let's unlink the mbuf chain for a second...*/
423		mo = m1->m_next;
424		m1->m_next = NULL;
425
426		/* ...and trim the end of the first part of the chain...sick */
427		m_adj(m1, -(m1->m_len - roff));
428		if ((m1->m_flags & M_PKTHDR) == 0)
429			m->m_pkthdr.len -= (m1->m_len - roff);
430
431		/* Finally, let's relink */
432		m1->m_next = mo;
433	} else {
434		/*
435		 * The header lies in the "middle" of the mbuf; copy
436		 * the remainder of the mbuf down over the header.
437		 */
438		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
439		memmove(mtod(m1, u_char *) + roff,
440		      mtod(m1, u_char *) + roff + hlen,
441		      m1->m_len - (roff + hlen));
442		m1->m_len -= hlen;
443		m->m_pkthdr.len -= hlen;
444	}
445	return (0);
446}
447