ipsec_mbuf.c revision 1.10
1/* $NetBSD: ipsec_mbuf.c,v 1.10 2007/12/14 20:55:22 seanb Exp $ */ 2/*- 3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $ 28 */ 29 30#include <sys/cdefs.h> 31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.10 2007/12/14 20:55:22 seanb Exp $"); 32 33/* 34 * IPsec-specific mbuf routines. 35 */ 36 37#ifdef __FreeBSD__ 38#include "opt_param.h" 39#endif 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/mbuf.h> 44#include <sys/socket.h> 45 46#include <net/route.h> 47#include <netinet/in.h> 48 49#include <netipsec/ipsec.h> 50#include <netipsec/ipsec_var.h> 51 52#include <netipsec/ipsec_osdep.h> 53#include <net/net_osdep.h> 54 55extern struct mbuf *m_getptr(struct mbuf *, int, int *); 56 57/* 58 * Create a writable copy of the mbuf chain. While doing this 59 * we compact the chain with a goal of producing a chain with 60 * at most two mbufs. The second mbuf in this chain is likely 61 * to be a cluster. The primary purpose of this work is to create 62 * a writable packet for encryption, compression, etc. The 63 * secondary goal is to linearize the data so the data can be 64 * passed to crypto hardware in the most efficient manner possible. 65 */ 66struct mbuf * 67m_clone(struct mbuf *m0) 68{ 69 struct mbuf *m, *mprev; 70 struct mbuf *n, *mfirst, *mlast; 71 int len, off; 72 73 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf")); 74 75 mprev = NULL; 76 for (m = m0; m != NULL; m = mprev->m_next) { 77 /* 78 * Regular mbufs are ignored unless there's a cluster 79 * in front of it that we can use to coalesce. We do 80 * the latter mainly so later clusters can be coalesced 81 * also w/o having to handle them specially (i.e. convert 82 * mbuf+cluster -> cluster). This optimization is heavily 83 * influenced by the assumption that we're running over 84 * Ethernet where MCLBYTES is large enough that the max 85 * packet size will permit lots of coalescing into a 86 * single cluster. This in turn permits efficient 87 * crypto operations, especially when using hardware. 88 */ 89 if ((m->m_flags & M_EXT) == 0) { 90 if (mprev && (mprev->m_flags & M_EXT) && 91 m->m_len <= M_TRAILINGSPACE(mprev)) { 92 /* XXX: this ignores mbuf types */ 93 memcpy(mtod(mprev, char *) + mprev->m_len, 94 mtod(m, char *), m->m_len); 95 mprev->m_len += m->m_len; 96 mprev->m_next = m->m_next; /* unlink from chain */ 97 m_free(m); /* reclaim mbuf */ 98 newipsecstat.ips_mbcoalesced++; 99 } else { 100 mprev = m; 101 } 102 continue; 103 } 104 /* 105 * Writable mbufs are left alone (for now). Note 106 * that for 4.x systems it's not possible to identify 107 * whether or not mbufs with external buffers are 108 * writable unless they use clusters. 109 */ 110 if (M_EXT_WRITABLE(m)) { 111 mprev = m; 112 continue; 113 } 114 115 /* 116 * Not writable, replace with a copy or coalesce with 117 * the previous mbuf if possible (since we have to copy 118 * it anyway, we try to reduce the number of mbufs and 119 * clusters so that future work is easier). 120 */ 121 IPSEC_ASSERT(m->m_flags & M_EXT, 122 ("m_clone: m_flags 0x%x", m->m_flags)); 123 /* NB: we only coalesce into a cluster or larger */ 124 if (mprev != NULL && (mprev->m_flags & M_EXT) && 125 m->m_len <= M_TRAILINGSPACE(mprev)) { 126 /* XXX: this ignores mbuf types */ 127 memcpy(mtod(mprev, char *) + mprev->m_len, 128 mtod(m, char *), m->m_len); 129 mprev->m_len += m->m_len; 130 mprev->m_next = m->m_next; /* unlink from chain */ 131 m_free(m); /* reclaim mbuf */ 132 newipsecstat.ips_clcoalesced++; 133 continue; 134 } 135 136 /* 137 * Allocate new space to hold the copy... 138 */ 139 /* XXX why can M_PKTHDR be set past the first mbuf? */ 140 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 141 /* 142 * NB: if a packet header is present we must 143 * allocate the mbuf separately from any cluster 144 * because M_MOVE_PKTHDR will smash the data 145 * pointer and drop the M_EXT marker. 146 */ 147 MGETHDR(n, M_DONTWAIT, m->m_type); 148 if (n == NULL) { 149 m_freem(m0); 150 return (NULL); 151 } 152 M_MOVE_PKTHDR(n, m); 153 MCLGET(n, M_DONTWAIT); 154 if ((n->m_flags & M_EXT) == 0) { 155 m_free(n); 156 m_freem(m0); 157 return (NULL); 158 } 159 } else { 160 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 161 if (n == NULL) { 162 m_freem(m0); 163 return (NULL); 164 } 165 } 166 /* 167 * ... and copy the data. We deal with jumbo mbufs 168 * (i.e. m_len > MCLBYTES) by splitting them into 169 * clusters. We could just malloc a buffer and make 170 * it external but too many device drivers don't know 171 * how to break up the non-contiguous memory when 172 * doing DMA. 173 */ 174 len = m->m_len; 175 off = 0; 176 mfirst = n; 177 mlast = NULL; 178 for (;;) { 179 int cc = min(len, MCLBYTES); 180 memcpy(mtod(n, char *), mtod(m, char *) + off, cc); 181 n->m_len = cc; 182 if (mlast != NULL) 183 mlast->m_next = n; 184 mlast = n; 185 newipsecstat.ips_clcopied++; 186 187 len -= cc; 188 if (len <= 0) 189 break; 190 off += cc; 191 192 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 193 if (n == NULL) { 194 m_freem(mfirst); 195 m_freem(m0); 196 return (NULL); 197 } 198 } 199 n->m_next = m->m_next; 200 if (mprev == NULL) 201 m0 = mfirst; /* new head of chain */ 202 else 203 mprev->m_next = mfirst; /* replace old mbuf */ 204 m_free(m); /* release old mbuf */ 205 mprev = mfirst; 206 } 207 return (m0); 208} 209 210/* 211 * Make space for a new header of length hlen at skip bytes 212 * into the packet. When doing this we allocate new mbufs only 213 * when absolutely necessary. The mbuf where the new header 214 * is to go is returned together with an offset into the mbuf. 215 * If NULL is returned then the mbuf chain may have been modified; 216 * the caller is assumed to always free the chain. 217 */ 218struct mbuf * 219m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 220{ 221 struct mbuf *m; 222 unsigned remain; 223 224 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); 225 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); 226 227 for (m = m0; m && skip > m->m_len; m = m->m_next) 228 skip -= m->m_len; 229 if (m == NULL) 230 return (NULL); 231 /* 232 * At this point skip is the offset into the mbuf m 233 * where the new header should be placed. Figure out 234 * if there's space to insert the new header. If so, 235 * and copying the remainder makese sense then do so. 236 * Otherwise insert a new mbuf in the chain, splitting 237 * the contents of m as needed. 238 */ 239 remain = m->m_len - skip; /* data to move */ 240 if (hlen > M_TRAILINGSPACE(m)) { 241 struct mbuf *n0, *n, **np; 242 int todo, len, done, alloc; 243 244 n0 = NULL; 245 np = &n0; 246 alloc = 0; 247 done = 0; 248 todo = remain; 249 while (todo > 0) { 250 if (todo > MHLEN) { 251 n = m_getcl(M_DONTWAIT, m->m_type, 0); 252 len = MCLBYTES; 253 } 254 else { 255 n = m_get(M_DONTWAIT, m->m_type); 256 len = MHLEN; 257 } 258 if (n == NULL) { 259 m_freem(n0); 260 return NULL; 261 } 262 *np = n; 263 np = &n->m_next; 264 alloc++; 265 len = min(todo, len); 266 memcpy(n->m_data, mtod(m, char *) + skip + done, len); 267 n->m_len = len; 268 done += len; 269 todo -= len; 270 } 271 272 if (hlen <= M_TRAILINGSPACE(m) + remain) { 273 m->m_len = skip + hlen; 274 *off = skip; 275 if (n0 != NULL) { 276 *np = m->m_next; 277 m->m_next = n0; 278 } 279 } 280 else { 281 n = m_get(M_DONTWAIT, m->m_type); 282 if (n == NULL) { 283 m_freem(n0); 284 return NULL; 285 } 286 alloc++; 287 288 if ((n->m_next = n0) == NULL) 289 np = &n->m_next; 290 n0 = n; 291 292 *np = m->m_next; 293 m->m_next = n0; 294 295 n->m_len = hlen; 296 m->m_len = skip; 297 298 m = n; /* header is at front ... */ 299 *off = 0; /* ... of new mbuf */ 300 } 301 302 newipsecstat.ips_mbinserted += alloc; 303 } else { 304 /* 305 * Copy the remainder to the back of the mbuf 306 * so there's space to write the new header. 307 */ 308 /* XXX can this be memcpy? does it handle overlap? */ 309 ovbcopy(mtod(m, char *) + skip, 310 mtod(m, char *) + skip + hlen, remain); 311 m->m_len += hlen; 312 *off = skip; 313 } 314 m0->m_pkthdr.len += hlen; /* adjust packet length */ 315 return m; 316} 317 318/* 319 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 320 * length is updated, and a pointer to the first byte of the padding 321 * (which is guaranteed to be all in one mbuf) is returned. 322 */ 323void * 324m_pad(struct mbuf *m, int n) 325{ 326 register struct mbuf *m0, *m1; 327 register int len, pad; 328 void *retval; 329 330 if (n <= 0) { /* No stupid arguments. */ 331 DPRINTF(("m_pad: pad length invalid (%d)\n", n)); 332 m_freem(m); 333 return NULL; 334 } 335 336 len = m->m_pkthdr.len; 337 pad = n; 338 m0 = m; 339 340 while (m0->m_len < len) { 341IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ 342 len -= m0->m_len; 343 m0 = m0->m_next; 344 } 345 346 if (m0->m_len != len) { 347 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", 348 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); 349 350 m_freem(m); 351 return NULL; 352 } 353 354 /* Check for zero-length trailing mbufs, and find the last one. */ 355 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 356 if (m1->m_next->m_len != 0) { 357 DPRINTF(("m_pad: length mismatch (should be %d " 358 "instead of %d)\n", 359 m->m_pkthdr.len, 360 m->m_pkthdr.len + m1->m_next->m_len)); 361 362 m_freem(m); 363 return NULL; 364 } 365 366 m0 = m1->m_next; 367 } 368 369 if (pad > M_TRAILINGSPACE(m0)) { 370 /* Add an mbuf to the chain. */ 371 MGET(m1, M_DONTWAIT, MT_DATA); 372 if (m1 == 0) { 373 m_freem(m0); 374 DPRINTF(("m_pad: unable to get extra mbuf\n")); 375 return NULL; 376 } 377 378 m0->m_next = m1; 379 m0 = m1; 380 m0->m_len = 0; 381 } 382 383 retval = m0->m_data + m0->m_len; 384 m0->m_len += pad; 385 m->m_pkthdr.len += pad; 386 387 return retval; 388} 389 390/* 391 * Remove hlen data at offset skip in the packet. This is used by 392 * the protocols strip protocol headers and associated data (e.g. IV, 393 * authenticator) on input. 394 */ 395int 396m_striphdr(struct mbuf *m, int skip, int hlen) 397{ 398 struct mbuf *m1; 399 int roff; 400 401 /* Find beginning of header */ 402 m1 = m_getptr(m, skip, &roff); 403 if (m1 == NULL) 404 return (EINVAL); 405 406 /* Remove the header and associated data from the mbuf. */ 407 if (roff == 0) { 408 /* The header was at the beginning of the mbuf */ 409 newipsecstat.ips_input_front++; 410 m_adj(m1, hlen); 411 if ((m1->m_flags & M_PKTHDR) == 0) 412 m->m_pkthdr.len -= hlen; 413 } else if (roff + hlen >= m1->m_len) { 414 struct mbuf *mo; 415 416 /* 417 * Part or all of the header is at the end of this mbuf, 418 * so first let's remove the remainder of the header from 419 * the beginning of the remainder of the mbuf chain, if any. 420 */ 421 newipsecstat.ips_input_end++; 422 if (roff + hlen > m1->m_len) { 423 /* Adjust the next mbuf by the remainder */ 424 m_adj(m1->m_next, roff + hlen - m1->m_len); 425 426 /* The second mbuf is guaranteed not to have a pkthdr... */ 427 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 428 } 429 430 /* Now, let's unlink the mbuf chain for a second...*/ 431 mo = m1->m_next; 432 m1->m_next = NULL; 433 434 /* ...and trim the end of the first part of the chain...sick */ 435 m_adj(m1, -(m1->m_len - roff)); 436 if ((m1->m_flags & M_PKTHDR) == 0) 437 m->m_pkthdr.len -= (m1->m_len - roff); 438 439 /* Finally, let's relink */ 440 m1->m_next = mo; 441 } else { 442 /* 443 * The header lies in the "middle" of the mbuf; copy 444 * the remainder of the mbuf down over the header. 445 */ 446 newipsecstat.ips_input_middle++; 447 ovbcopy(mtod(m1, u_char *) + roff + hlen, 448 mtod(m1, u_char *) + roff, 449 m1->m_len - (roff + hlen)); 450 m1->m_len -= hlen; 451 m->m_pkthdr.len -= hlen; 452 } 453 return (0); 454} 455 456/* 457 * Diagnostic routine to check mbuf alignment as required by the 458 * crypto device drivers (that use DMA). 459 */ 460void 461m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 462{ 463 int roff; 464 struct mbuf *m = m_getptr(m0, off, &roff); 465 void *addr; 466 467 if (m == NULL) 468 return; 469 printf("%s (off %u len %u): ", where, off, len); 470 addr = mtod(m, char *) + roff; 471 do { 472 int mlen; 473 474 if (((uintptr_t) addr) & 3) { 475 printf("addr misaligned %p,", addr); 476 break; 477 } 478 mlen = m->m_len; 479 if (mlen > len) 480 mlen = len; 481 len -= mlen; 482 if (len && (mlen & 3)) { 483 printf("len mismatch %u,", mlen); 484 break; 485 } 486 m = m->m_next; 487 addr = m ? mtod(m, void *) : NULL; 488 } while (m && len > 0); 489 for (m = m0; m; m = m->m_next) 490 printf(" [%p:%u]", mtod(m, void *), m->m_len); 491 printf("\n"); 492} 493