ipsec_mbuf.c revision 1.13
1/* $NetBSD: ipsec_mbuf.c,v 1.13 2017/04/18 05:25:32 ozaki-r Exp $ */ 2/*- 3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $ 28 */ 29 30#include <sys/cdefs.h> 31__KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.13 2017/04/18 05:25:32 ozaki-r Exp $"); 32 33/* 34 * IPsec-specific mbuf routines. 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/mbuf.h> 40#include <sys/socket.h> 41 42#include <net/route.h> 43#include <netinet/in.h> 44 45#include <netipsec/ipsec.h> 46#include <netipsec/ipsec_var.h> 47#include <netipsec/ipsec_private.h> 48 49#include <netipsec/ipsec_osdep.h> 50#include <net/net_osdep.h> 51 52/* 53 * Create a writable copy of the mbuf chain. While doing this 54 * we compact the chain with a goal of producing a chain with 55 * at most two mbufs. The second mbuf in this chain is likely 56 * to be a cluster. The primary purpose of this work is to create 57 * a writable packet for encryption, compression, etc. The 58 * secondary goal is to linearize the data so the data can be 59 * passed to crypto hardware in the most efficient manner possible. 60 */ 61struct mbuf * 62m_clone(struct mbuf *m0) 63{ 64 struct mbuf *m, *mprev; 65 struct mbuf *n, *mfirst, *mlast; 66 int len, off; 67 68 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf")); 69 70 mprev = NULL; 71 for (m = m0; m != NULL; m = mprev->m_next) { 72 /* 73 * Regular mbufs are ignored unless there's a cluster 74 * in front of it that we can use to coalesce. We do 75 * the latter mainly so later clusters can be coalesced 76 * also w/o having to handle them specially (i.e. convert 77 * mbuf+cluster -> cluster). This optimization is heavily 78 * influenced by the assumption that we're running over 79 * Ethernet where MCLBYTES is large enough that the max 80 * packet size will permit lots of coalescing into a 81 * single cluster. This in turn permits efficient 82 * crypto operations, especially when using hardware. 83 */ 84 if ((m->m_flags & M_EXT) == 0) { 85 if (mprev && (mprev->m_flags & M_EXT) && 86 m->m_len <= M_TRAILINGSPACE(mprev)) { 87 /* XXX: this ignores mbuf types */ 88 memcpy(mtod(mprev, char *) + mprev->m_len, 89 mtod(m, char *), m->m_len); 90 mprev->m_len += m->m_len; 91 mprev->m_next = m->m_next; /* unlink from chain */ 92 m_free(m); /* reclaim mbuf */ 93 IPSEC_STATINC(IPSEC_STAT_MBCOALESCED); 94 } else { 95 mprev = m; 96 } 97 continue; 98 } 99 /* 100 * Writable mbufs are left alone (for now). Note 101 * that for 4.x systems it's not possible to identify 102 * whether or not mbufs with external buffers are 103 * writable unless they use clusters. 104 */ 105 if (M_EXT_WRITABLE(m)) { 106 mprev = m; 107 continue; 108 } 109 110 /* 111 * Not writable, replace with a copy or coalesce with 112 * the previous mbuf if possible (since we have to copy 113 * it anyway, we try to reduce the number of mbufs and 114 * clusters so that future work is easier). 115 */ 116 IPSEC_ASSERT(m->m_flags & M_EXT, 117 ("m_clone: m_flags 0x%x", m->m_flags)); 118 /* NB: we only coalesce into a cluster or larger */ 119 if (mprev != NULL && (mprev->m_flags & M_EXT) && 120 m->m_len <= M_TRAILINGSPACE(mprev)) { 121 /* XXX: this ignores mbuf types */ 122 memcpy(mtod(mprev, char *) + mprev->m_len, 123 mtod(m, char *), m->m_len); 124 mprev->m_len += m->m_len; 125 mprev->m_next = m->m_next; /* unlink from chain */ 126 m_free(m); /* reclaim mbuf */ 127 IPSEC_STATINC(IPSEC_STAT_CLCOALESCED); 128 continue; 129 } 130 131 /* 132 * Allocate new space to hold the copy... 133 */ 134 /* XXX why can M_PKTHDR be set past the first mbuf? */ 135 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 136 /* 137 * NB: if a packet header is present we must 138 * allocate the mbuf separately from any cluster 139 * because M_MOVE_PKTHDR will smash the data 140 * pointer and drop the M_EXT marker. 141 */ 142 MGETHDR(n, M_DONTWAIT, m->m_type); 143 if (n == NULL) { 144 m_freem(m0); 145 return (NULL); 146 } 147 M_MOVE_PKTHDR(n, m); 148 MCLGET(n, M_DONTWAIT); 149 if ((n->m_flags & M_EXT) == 0) { 150 m_free(n); 151 m_freem(m0); 152 return (NULL); 153 } 154 } else { 155 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 156 if (n == NULL) { 157 m_freem(m0); 158 return (NULL); 159 } 160 } 161 /* 162 * ... and copy the data. We deal with jumbo mbufs 163 * (i.e. m_len > MCLBYTES) by splitting them into 164 * clusters. We could just malloc a buffer and make 165 * it external but too many device drivers don't know 166 * how to break up the non-contiguous memory when 167 * doing DMA. 168 */ 169 len = m->m_len; 170 off = 0; 171 mfirst = n; 172 mlast = NULL; 173 for (;;) { 174 int cc = min(len, MCLBYTES); 175 memcpy(mtod(n, char *), mtod(m, char *) + off, cc); 176 n->m_len = cc; 177 if (mlast != NULL) 178 mlast->m_next = n; 179 mlast = n; 180 IPSEC_STATINC(IPSEC_STAT_CLCOPIED); 181 182 len -= cc; 183 if (len <= 0) 184 break; 185 off += cc; 186 187 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 188 if (n == NULL) { 189 m_freem(mfirst); 190 m_freem(m0); 191 return (NULL); 192 } 193 } 194 n->m_next = m->m_next; 195 if (mprev == NULL) 196 m0 = mfirst; /* new head of chain */ 197 else 198 mprev->m_next = mfirst; /* replace old mbuf */ 199 m_free(m); /* release old mbuf */ 200 mprev = mfirst; 201 } 202 return (m0); 203} 204 205/* 206 * Make space for a new header of length hlen at skip bytes 207 * into the packet. When doing this we allocate new mbufs only 208 * when absolutely necessary. The mbuf where the new header 209 * is to go is returned together with an offset into the mbuf. 210 * If NULL is returned then the mbuf chain may have been modified; 211 * the caller is assumed to always free the chain. 212 */ 213struct mbuf * 214m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 215{ 216 struct mbuf *m; 217 unsigned remain; 218 219 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); 220 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); 221 222 for (m = m0; m && skip > m->m_len; m = m->m_next) 223 skip -= m->m_len; 224 if (m == NULL) 225 return (NULL); 226 /* 227 * At this point skip is the offset into the mbuf m 228 * where the new header should be placed. Figure out 229 * if there's space to insert the new header. If so, 230 * and copying the remainder makese sense then do so. 231 * Otherwise insert a new mbuf in the chain, splitting 232 * the contents of m as needed. 233 */ 234 remain = m->m_len - skip; /* data to move */ 235 if (hlen > M_TRAILINGSPACE(m)) { 236 struct mbuf *n0, *n, **np; 237 int todo, len, done, alloc; 238 239 n0 = NULL; 240 np = &n0; 241 alloc = 0; 242 done = 0; 243 todo = remain; 244 while (todo > 0) { 245 if (todo > MHLEN) { 246 n = m_getcl(M_DONTWAIT, m->m_type, 0); 247 len = MCLBYTES; 248 } 249 else { 250 n = m_get(M_DONTWAIT, m->m_type); 251 len = MHLEN; 252 } 253 if (n == NULL) { 254 m_freem(n0); 255 return NULL; 256 } 257 *np = n; 258 np = &n->m_next; 259 alloc++; 260 len = min(todo, len); 261 memcpy(n->m_data, mtod(m, char *) + skip + done, len); 262 n->m_len = len; 263 done += len; 264 todo -= len; 265 } 266 267 if (hlen <= M_TRAILINGSPACE(m) + remain) { 268 m->m_len = skip + hlen; 269 *off = skip; 270 if (n0 != NULL) { 271 *np = m->m_next; 272 m->m_next = n0; 273 } 274 } 275 else { 276 n = m_get(M_DONTWAIT, m->m_type); 277 if (n == NULL) { 278 m_freem(n0); 279 return NULL; 280 } 281 alloc++; 282 283 if ((n->m_next = n0) == NULL) 284 np = &n->m_next; 285 n0 = n; 286 287 *np = m->m_next; 288 m->m_next = n0; 289 290 n->m_len = hlen; 291 m->m_len = skip; 292 293 m = n; /* header is at front ... */ 294 *off = 0; /* ... of new mbuf */ 295 } 296 297 IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc); 298 } else { 299 /* 300 * Copy the remainder to the back of the mbuf 301 * so there's space to write the new header. 302 */ 303 /* XXX can this be memcpy? does it handle overlap? */ 304 ovbcopy(mtod(m, char *) + skip, 305 mtod(m, char *) + skip + hlen, remain); 306 m->m_len += hlen; 307 *off = skip; 308 } 309 m0->m_pkthdr.len += hlen; /* adjust packet length */ 310 return m; 311} 312 313/* 314 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 315 * length is updated, and a pointer to the first byte of the padding 316 * (which is guaranteed to be all in one mbuf) is returned. 317 */ 318void * 319m_pad(struct mbuf *m, int n) 320{ 321 register struct mbuf *m0, *m1; 322 register int len, pad; 323 void *retval; 324 325 if (n <= 0) { /* No stupid arguments. */ 326 DPRINTF(("m_pad: pad length invalid (%d)\n", n)); 327 m_freem(m); 328 return NULL; 329 } 330 331 len = m->m_pkthdr.len; 332 pad = n; 333 m0 = m; 334 335 while (m0->m_len < len) { 336IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ 337 len -= m0->m_len; 338 m0 = m0->m_next; 339 } 340 341 if (m0->m_len != len) { 342 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", 343 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); 344 345 m_freem(m); 346 return NULL; 347 } 348 349 /* Check for zero-length trailing mbufs, and find the last one. */ 350 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 351 if (m1->m_next->m_len != 0) { 352 DPRINTF(("m_pad: length mismatch (should be %d " 353 "instead of %d)\n", 354 m->m_pkthdr.len, 355 m->m_pkthdr.len + m1->m_next->m_len)); 356 357 m_freem(m); 358 return NULL; 359 } 360 361 m0 = m1->m_next; 362 } 363 364 if (pad > M_TRAILINGSPACE(m0)) { 365 /* Add an mbuf to the chain. */ 366 MGET(m1, M_DONTWAIT, MT_DATA); 367 if (m1 == 0) { 368 m_freem(m0); 369 DPRINTF(("m_pad: unable to get extra mbuf\n")); 370 return NULL; 371 } 372 373 m0->m_next = m1; 374 m0 = m1; 375 m0->m_len = 0; 376 } 377 378 retval = m0->m_data + m0->m_len; 379 m0->m_len += pad; 380 m->m_pkthdr.len += pad; 381 382 return retval; 383} 384 385/* 386 * Remove hlen data at offset skip in the packet. This is used by 387 * the protocols strip protocol headers and associated data (e.g. IV, 388 * authenticator) on input. 389 */ 390int 391m_striphdr(struct mbuf *m, int skip, int hlen) 392{ 393 struct mbuf *m1; 394 int roff; 395 396 /* Find beginning of header */ 397 m1 = m_getptr(m, skip, &roff); 398 if (m1 == NULL) 399 return (EINVAL); 400 401 /* Remove the header and associated data from the mbuf. */ 402 if (roff == 0) { 403 /* The header was at the beginning of the mbuf */ 404 IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT); 405 m_adj(m1, hlen); 406 if ((m1->m_flags & M_PKTHDR) == 0) 407 m->m_pkthdr.len -= hlen; 408 } else if (roff + hlen >= m1->m_len) { 409 struct mbuf *mo; 410 411 /* 412 * Part or all of the header is at the end of this mbuf, 413 * so first let's remove the remainder of the header from 414 * the beginning of the remainder of the mbuf chain, if any. 415 */ 416 IPSEC_STATINC(IPSEC_STAT_INPUT_END); 417 if (roff + hlen > m1->m_len) { 418 /* Adjust the next mbuf by the remainder */ 419 m_adj(m1->m_next, roff + hlen - m1->m_len); 420 421 /* The second mbuf is guaranteed not to have a pkthdr... */ 422 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 423 } 424 425 /* Now, let's unlink the mbuf chain for a second...*/ 426 mo = m1->m_next; 427 m1->m_next = NULL; 428 429 /* ...and trim the end of the first part of the chain...sick */ 430 m_adj(m1, -(m1->m_len - roff)); 431 if ((m1->m_flags & M_PKTHDR) == 0) 432 m->m_pkthdr.len -= (m1->m_len - roff); 433 434 /* Finally, let's relink */ 435 m1->m_next = mo; 436 } else { 437 /* 438 * The header lies in the "middle" of the mbuf; copy 439 * the remainder of the mbuf down over the header. 440 */ 441 IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE); 442 ovbcopy(mtod(m1, u_char *) + roff + hlen, 443 mtod(m1, u_char *) + roff, 444 m1->m_len - (roff + hlen)); 445 m1->m_len -= hlen; 446 m->m_pkthdr.len -= hlen; 447 } 448 return (0); 449} 450 451/* 452 * Diagnostic routine to check mbuf alignment as required by the 453 * crypto device drivers (that use DMA). 454 */ 455void 456m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 457{ 458 int roff; 459 struct mbuf *m = m_getptr(m0, off, &roff); 460 void *addr; 461 462 if (m == NULL) 463 return; 464 printf("%s (off %u len %u): ", where, off, len); 465 addr = mtod(m, char *) + roff; 466 do { 467 int mlen; 468 469 if (((uintptr_t) addr) & 3) { 470 printf("addr misaligned %p,", addr); 471 break; 472 } 473 mlen = m->m_len; 474 if (mlen > len) 475 mlen = len; 476 len -= mlen; 477 if (len && (mlen & 3)) { 478 printf("len mismatch %u,", mlen); 479 break; 480 } 481 m = m->m_next; 482 addr = m ? mtod(m, void *) : NULL; 483 } while (m && len > 0); 484 for (m = m0; m; m = m->m_next) 485 printf(" [%p:%u]", mtod(m, void *), m->m_len); 486 printf("\n"); 487} 488