ipsec_mbuf.c revision 1.12
1/* $NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $ */ 2/*- 3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $ 28 */ 29 30#include <sys/cdefs.h> 31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $"); 32 33/* 34 * IPsec-specific mbuf routines. 35 */ 36 37#ifdef __FreeBSD__ 38#include "opt_param.h" 39#endif 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/mbuf.h> 44#include <sys/socket.h> 45 46#include <net/route.h> 47#include <netinet/in.h> 48 49#include <netipsec/ipsec.h> 50#include <netipsec/ipsec_var.h> 51#include <netipsec/ipsec_private.h> 52 53#include <netipsec/ipsec_osdep.h> 54#include <net/net_osdep.h> 55 56/* 57 * Create a writable copy of the mbuf chain. While doing this 58 * we compact the chain with a goal of producing a chain with 59 * at most two mbufs. The second mbuf in this chain is likely 60 * to be a cluster. The primary purpose of this work is to create 61 * a writable packet for encryption, compression, etc. The 62 * secondary goal is to linearize the data so the data can be 63 * passed to crypto hardware in the most efficient manner possible. 64 */ 65struct mbuf * 66m_clone(struct mbuf *m0) 67{ 68 struct mbuf *m, *mprev; 69 struct mbuf *n, *mfirst, *mlast; 70 int len, off; 71 72 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf")); 73 74 mprev = NULL; 75 for (m = m0; m != NULL; m = mprev->m_next) { 76 /* 77 * Regular mbufs are ignored unless there's a cluster 78 * in front of it that we can use to coalesce. We do 79 * the latter mainly so later clusters can be coalesced 80 * also w/o having to handle them specially (i.e. convert 81 * mbuf+cluster -> cluster). This optimization is heavily 82 * influenced by the assumption that we're running over 83 * Ethernet where MCLBYTES is large enough that the max 84 * packet size will permit lots of coalescing into a 85 * single cluster. This in turn permits efficient 86 * crypto operations, especially when using hardware. 87 */ 88 if ((m->m_flags & M_EXT) == 0) { 89 if (mprev && (mprev->m_flags & M_EXT) && 90 m->m_len <= M_TRAILINGSPACE(mprev)) { 91 /* XXX: this ignores mbuf types */ 92 memcpy(mtod(mprev, char *) + mprev->m_len, 93 mtod(m, char *), m->m_len); 94 mprev->m_len += m->m_len; 95 mprev->m_next = m->m_next; /* unlink from chain */ 96 m_free(m); /* reclaim mbuf */ 97 IPSEC_STATINC(IPSEC_STAT_MBCOALESCED); 98 } else { 99 mprev = m; 100 } 101 continue; 102 } 103 /* 104 * Writable mbufs are left alone (for now). Note 105 * that for 4.x systems it's not possible to identify 106 * whether or not mbufs with external buffers are 107 * writable unless they use clusters. 108 */ 109 if (M_EXT_WRITABLE(m)) { 110 mprev = m; 111 continue; 112 } 113 114 /* 115 * Not writable, replace with a copy or coalesce with 116 * the previous mbuf if possible (since we have to copy 117 * it anyway, we try to reduce the number of mbufs and 118 * clusters so that future work is easier). 119 */ 120 IPSEC_ASSERT(m->m_flags & M_EXT, 121 ("m_clone: m_flags 0x%x", m->m_flags)); 122 /* NB: we only coalesce into a cluster or larger */ 123 if (mprev != NULL && (mprev->m_flags & M_EXT) && 124 m->m_len <= M_TRAILINGSPACE(mprev)) { 125 /* XXX: this ignores mbuf types */ 126 memcpy(mtod(mprev, char *) + mprev->m_len, 127 mtod(m, char *), m->m_len); 128 mprev->m_len += m->m_len; 129 mprev->m_next = m->m_next; /* unlink from chain */ 130 m_free(m); /* reclaim mbuf */ 131 IPSEC_STATINC(IPSEC_STAT_CLCOALESCED); 132 continue; 133 } 134 135 /* 136 * Allocate new space to hold the copy... 137 */ 138 /* XXX why can M_PKTHDR be set past the first mbuf? */ 139 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 140 /* 141 * NB: if a packet header is present we must 142 * allocate the mbuf separately from any cluster 143 * because M_MOVE_PKTHDR will smash the data 144 * pointer and drop the M_EXT marker. 145 */ 146 MGETHDR(n, M_DONTWAIT, m->m_type); 147 if (n == NULL) { 148 m_freem(m0); 149 return (NULL); 150 } 151 M_MOVE_PKTHDR(n, m); 152 MCLGET(n, M_DONTWAIT); 153 if ((n->m_flags & M_EXT) == 0) { 154 m_free(n); 155 m_freem(m0); 156 return (NULL); 157 } 158 } else { 159 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 160 if (n == NULL) { 161 m_freem(m0); 162 return (NULL); 163 } 164 } 165 /* 166 * ... and copy the data. We deal with jumbo mbufs 167 * (i.e. m_len > MCLBYTES) by splitting them into 168 * clusters. We could just malloc a buffer and make 169 * it external but too many device drivers don't know 170 * how to break up the non-contiguous memory when 171 * doing DMA. 172 */ 173 len = m->m_len; 174 off = 0; 175 mfirst = n; 176 mlast = NULL; 177 for (;;) { 178 int cc = min(len, MCLBYTES); 179 memcpy(mtod(n, char *), mtod(m, char *) + off, cc); 180 n->m_len = cc; 181 if (mlast != NULL) 182 mlast->m_next = n; 183 mlast = n; 184 IPSEC_STATINC(IPSEC_STAT_CLCOPIED); 185 186 len -= cc; 187 if (len <= 0) 188 break; 189 off += cc; 190 191 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 192 if (n == NULL) { 193 m_freem(mfirst); 194 m_freem(m0); 195 return (NULL); 196 } 197 } 198 n->m_next = m->m_next; 199 if (mprev == NULL) 200 m0 = mfirst; /* new head of chain */ 201 else 202 mprev->m_next = mfirst; /* replace old mbuf */ 203 m_free(m); /* release old mbuf */ 204 mprev = mfirst; 205 } 206 return (m0); 207} 208 209/* 210 * Make space for a new header of length hlen at skip bytes 211 * into the packet. When doing this we allocate new mbufs only 212 * when absolutely necessary. The mbuf where the new header 213 * is to go is returned together with an offset into the mbuf. 214 * If NULL is returned then the mbuf chain may have been modified; 215 * the caller is assumed to always free the chain. 216 */ 217struct mbuf * 218m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 219{ 220 struct mbuf *m; 221 unsigned remain; 222 223 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); 224 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); 225 226 for (m = m0; m && skip > m->m_len; m = m->m_next) 227 skip -= m->m_len; 228 if (m == NULL) 229 return (NULL); 230 /* 231 * At this point skip is the offset into the mbuf m 232 * where the new header should be placed. Figure out 233 * if there's space to insert the new header. If so, 234 * and copying the remainder makese sense then do so. 235 * Otherwise insert a new mbuf in the chain, splitting 236 * the contents of m as needed. 237 */ 238 remain = m->m_len - skip; /* data to move */ 239 if (hlen > M_TRAILINGSPACE(m)) { 240 struct mbuf *n0, *n, **np; 241 int todo, len, done, alloc; 242 243 n0 = NULL; 244 np = &n0; 245 alloc = 0; 246 done = 0; 247 todo = remain; 248 while (todo > 0) { 249 if (todo > MHLEN) { 250 n = m_getcl(M_DONTWAIT, m->m_type, 0); 251 len = MCLBYTES; 252 } 253 else { 254 n = m_get(M_DONTWAIT, m->m_type); 255 len = MHLEN; 256 } 257 if (n == NULL) { 258 m_freem(n0); 259 return NULL; 260 } 261 *np = n; 262 np = &n->m_next; 263 alloc++; 264 len = min(todo, len); 265 memcpy(n->m_data, mtod(m, char *) + skip + done, len); 266 n->m_len = len; 267 done += len; 268 todo -= len; 269 } 270 271 if (hlen <= M_TRAILINGSPACE(m) + remain) { 272 m->m_len = skip + hlen; 273 *off = skip; 274 if (n0 != NULL) { 275 *np = m->m_next; 276 m->m_next = n0; 277 } 278 } 279 else { 280 n = m_get(M_DONTWAIT, m->m_type); 281 if (n == NULL) { 282 m_freem(n0); 283 return NULL; 284 } 285 alloc++; 286 287 if ((n->m_next = n0) == NULL) 288 np = &n->m_next; 289 n0 = n; 290 291 *np = m->m_next; 292 m->m_next = n0; 293 294 n->m_len = hlen; 295 m->m_len = skip; 296 297 m = n; /* header is at front ... */ 298 *off = 0; /* ... of new mbuf */ 299 } 300 301 IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc); 302 } else { 303 /* 304 * Copy the remainder to the back of the mbuf 305 * so there's space to write the new header. 306 */ 307 /* XXX can this be memcpy? does it handle overlap? */ 308 ovbcopy(mtod(m, char *) + skip, 309 mtod(m, char *) + skip + hlen, remain); 310 m->m_len += hlen; 311 *off = skip; 312 } 313 m0->m_pkthdr.len += hlen; /* adjust packet length */ 314 return m; 315} 316 317/* 318 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 319 * length is updated, and a pointer to the first byte of the padding 320 * (which is guaranteed to be all in one mbuf) is returned. 321 */ 322void * 323m_pad(struct mbuf *m, int n) 324{ 325 register struct mbuf *m0, *m1; 326 register int len, pad; 327 void *retval; 328 329 if (n <= 0) { /* No stupid arguments. */ 330 DPRINTF(("m_pad: pad length invalid (%d)\n", n)); 331 m_freem(m); 332 return NULL; 333 } 334 335 len = m->m_pkthdr.len; 336 pad = n; 337 m0 = m; 338 339 while (m0->m_len < len) { 340IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ 341 len -= m0->m_len; 342 m0 = m0->m_next; 343 } 344 345 if (m0->m_len != len) { 346 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", 347 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); 348 349 m_freem(m); 350 return NULL; 351 } 352 353 /* Check for zero-length trailing mbufs, and find the last one. */ 354 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 355 if (m1->m_next->m_len != 0) { 356 DPRINTF(("m_pad: length mismatch (should be %d " 357 "instead of %d)\n", 358 m->m_pkthdr.len, 359 m->m_pkthdr.len + m1->m_next->m_len)); 360 361 m_freem(m); 362 return NULL; 363 } 364 365 m0 = m1->m_next; 366 } 367 368 if (pad > M_TRAILINGSPACE(m0)) { 369 /* Add an mbuf to the chain. */ 370 MGET(m1, M_DONTWAIT, MT_DATA); 371 if (m1 == 0) { 372 m_freem(m0); 373 DPRINTF(("m_pad: unable to get extra mbuf\n")); 374 return NULL; 375 } 376 377 m0->m_next = m1; 378 m0 = m1; 379 m0->m_len = 0; 380 } 381 382 retval = m0->m_data + m0->m_len; 383 m0->m_len += pad; 384 m->m_pkthdr.len += pad; 385 386 return retval; 387} 388 389/* 390 * Remove hlen data at offset skip in the packet. This is used by 391 * the protocols strip protocol headers and associated data (e.g. IV, 392 * authenticator) on input. 393 */ 394int 395m_striphdr(struct mbuf *m, int skip, int hlen) 396{ 397 struct mbuf *m1; 398 int roff; 399 400 /* Find beginning of header */ 401 m1 = m_getptr(m, skip, &roff); 402 if (m1 == NULL) 403 return (EINVAL); 404 405 /* Remove the header and associated data from the mbuf. */ 406 if (roff == 0) { 407 /* The header was at the beginning of the mbuf */ 408 IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT); 409 m_adj(m1, hlen); 410 if ((m1->m_flags & M_PKTHDR) == 0) 411 m->m_pkthdr.len -= hlen; 412 } else if (roff + hlen >= m1->m_len) { 413 struct mbuf *mo; 414 415 /* 416 * Part or all of the header is at the end of this mbuf, 417 * so first let's remove the remainder of the header from 418 * the beginning of the remainder of the mbuf chain, if any. 419 */ 420 IPSEC_STATINC(IPSEC_STAT_INPUT_END); 421 if (roff + hlen > m1->m_len) { 422 /* Adjust the next mbuf by the remainder */ 423 m_adj(m1->m_next, roff + hlen - m1->m_len); 424 425 /* The second mbuf is guaranteed not to have a pkthdr... */ 426 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 427 } 428 429 /* Now, let's unlink the mbuf chain for a second...*/ 430 mo = m1->m_next; 431 m1->m_next = NULL; 432 433 /* ...and trim the end of the first part of the chain...sick */ 434 m_adj(m1, -(m1->m_len - roff)); 435 if ((m1->m_flags & M_PKTHDR) == 0) 436 m->m_pkthdr.len -= (m1->m_len - roff); 437 438 /* Finally, let's relink */ 439 m1->m_next = mo; 440 } else { 441 /* 442 * The header lies in the "middle" of the mbuf; copy 443 * the remainder of the mbuf down over the header. 444 */ 445 IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE); 446 ovbcopy(mtod(m1, u_char *) + roff + hlen, 447 mtod(m1, u_char *) + roff, 448 m1->m_len - (roff + hlen)); 449 m1->m_len -= hlen; 450 m->m_pkthdr.len -= hlen; 451 } 452 return (0); 453} 454 455/* 456 * Diagnostic routine to check mbuf alignment as required by the 457 * crypto device drivers (that use DMA). 458 */ 459void 460m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 461{ 462 int roff; 463 struct mbuf *m = m_getptr(m0, off, &roff); 464 void *addr; 465 466 if (m == NULL) 467 return; 468 printf("%s (off %u len %u): ", where, off, len); 469 addr = mtod(m, char *) + roff; 470 do { 471 int mlen; 472 473 if (((uintptr_t) addr) & 3) { 474 printf("addr misaligned %p,", addr); 475 break; 476 } 477 mlen = m->m_len; 478 if (mlen > len) 479 mlen = len; 480 len -= mlen; 481 if (len && (mlen & 3)) { 482 printf("len mismatch %u,", mlen); 483 break; 484 } 485 m = m->m_next; 486 addr = m ? mtod(m, void *) : NULL; 487 } while (m && len > 0); 488 for (m = m0; m; m = m->m_next) 489 printf(" [%p:%u]", mtod(m, void *), m->m_len); 490 printf("\n"); 491} 492