1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */ 30/* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */ 31 32/* 33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the project nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/malloc.h> 64#include <sys/mcache.h> 65#include <sys/mbuf.h> 66#include <sys/domain.h> 67#include <sys/protosw.h> 68#include <sys/socket.h> 69#include <sys/errno.h> 70#include <sys/time.h> 71#include <sys/kernel.h> 72#include <sys/syslog.h> 73#include <kern/queue.h> 74#include <kern/locks.h> 75 76#include <net/if.h> 77#include <net/route.h> 78 79#include <netinet/in.h> 80#include <netinet/in_var.h> 81#include <netinet/ip.h> 82#include <netinet/ip6.h> 83#include <netinet6/ip6_var.h> 84#include <netinet/icmp6.h> 85 86#include <net/net_osdep.h> 87#include <dev/random/randomdev.h> 88 89/* 90 * Define it to get a correct behavior on per-interface statistics. 91 */ 92#define IN6_IFSTAT_STRICT 93 94MBUFQ_HEAD(fq6_head); 95 96static void frag6_save_context(struct mbuf *, int); 97static void frag6_scrub_context(struct mbuf *); 98static int frag6_restore_context(struct mbuf *); 99 100static void frag6_icmp6_paramprob_error(struct fq6_head *); 101static void frag6_icmp6_timeex_error(struct fq6_head *); 102 103static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); 104static void frag6_deq(struct ip6asfrag *); 105static void frag6_insque(struct ip6q *, struct ip6q *); 106static void frag6_remque(struct ip6q *); 107static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *); 108 109static int frag6_timeout_run; /* frag6 timer is scheduled to run */ 110static void frag6_timeout(void *); 111static void frag6_sched_timeout(void); 112 113static struct ip6q *ip6q_alloc(int); 114static void ip6q_free(struct ip6q *); 115static void ip6q_updateparams(void); 116static struct ip6asfrag *ip6af_alloc(int); 117static void ip6af_free(struct ip6asfrag *); 118 119decl_lck_mtx_data(static, ip6qlock); 120static lck_attr_t *ip6qlock_attr; 121static lck_grp_t *ip6qlock_grp; 122static lck_grp_attr_t *ip6qlock_grp_attr; 123 124/* IPv6 fragment reassembly queues (protected by ip6qlock) */ 125static struct ip6q ip6q; /* ip6 reassembly queues */ 126static int ip6_maxfragpackets; /* max packets in reass queues */ 127static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */ 128static int ip6_maxfrags; /* max fragments in reass queues */ 129static u_int32_t frag6_nfrags; /* # of fragments in reass queues */ 130static u_int32_t ip6q_limit; /* ip6q allocation limit */ 131static u_int32_t ip6q_count; /* current # of allocated ip6q's */ 132static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */ 133static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */ 134 135static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS; 136static int sysctl_maxfrags SYSCTL_HANDLER_ARGS; 137 138SYSCTL_DECL(_net_inet6_ip6); 139 140SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, 141 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, 142 sysctl_maxfragpackets, "I", 143 "Maximum number of IPv6 fragment reassembly queue entries"); 144 145SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets, 146 CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0, 147 "Current number of IPv6 fragment reassembly queue entries"); 148 149SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, 150 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, 151 sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed"); 152 153/* 154 * Initialise reassembly queue and fragment identifier. 155 */ 156void 157frag6_init(void) 158{ 159 /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */ 160 _CASSERT(sizeof (struct ip6q) <= _MLEN); 161 /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */ 162 _CASSERT(sizeof (struct ip6asfrag) <= _MLEN); 163 164 /* IPv6 fragment reassembly queue lock */ 165 ip6qlock_grp_attr = lck_grp_attr_alloc_init(); 166 ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr); 167 ip6qlock_attr = lck_attr_alloc_init(); 168 lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr); 169 170 lck_mtx_lock(&ip6qlock); 171 /* Initialize IPv6 reassembly queue. */ 172 ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; 173 174 /* same limits as IPv4 */ 175 ip6_maxfragpackets = nmbclusters / 32; 176 ip6_maxfrags = ip6_maxfragpackets * 2; 177 ip6q_updateparams(); 178 lck_mtx_unlock(&ip6qlock); 179} 180 181static void 182frag6_save_context(struct mbuf *m, int val) 183{ 184 m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val; 185} 186 187static void 188frag6_scrub_context(struct mbuf *m) 189{ 190 m->m_pkthdr.pkt_hdr = NULL; 191} 192 193static int 194frag6_restore_context(struct mbuf *m) 195{ 196 return ((int)m->m_pkthdr.pkt_hdr); 197} 198 199/* 200 * Send any deferred ICMP param problem error messages; caller must not be 201 * holding ip6qlock and is expected to have saved the per-packet parameter 202 * value via frag6_save_context(). 203 */ 204static void 205frag6_icmp6_paramprob_error(struct fq6_head *diq6) 206{ 207 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); 208 209 if (!MBUFQ_EMPTY(diq6)) { 210 struct mbuf *merr, *merr_tmp; 211 int param; 212 MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) { 213 MBUFQ_REMOVE(diq6, merr); 214 MBUFQ_NEXT(merr) = NULL; 215 param = frag6_restore_context(merr); 216 frag6_scrub_context(merr); 217 icmp6_error(merr, ICMP6_PARAM_PROB, 218 ICMP6_PARAMPROB_HEADER, param); 219 } 220 } 221} 222 223/* 224 * Send any deferred ICMP time exceeded error messages; 225 * caller must not be holding ip6qlock. 226 */ 227static void 228frag6_icmp6_timeex_error(struct fq6_head *diq6) 229{ 230 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); 231 232 if (!MBUFQ_EMPTY(diq6)) { 233 struct mbuf *m, *m_tmp; 234 MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { 235 MBUFQ_REMOVE(diq6, m); 236 MBUFQ_NEXT(m) = NULL; 237 icmp6_error(m, ICMP6_TIME_EXCEEDED, 238 ICMP6_TIME_EXCEED_REASSEMBLY, 0); 239 } 240 } 241} 242 243/* 244 * In RFC2460, fragment and reassembly rule do not agree with each other, 245 * in terms of next header field handling in fragment header. 246 * While the sender will use the same value for all of the fragmented packets, 247 * receiver is suggested not to check the consistency. 248 * 249 * fragment rule (p20): 250 * (2) A Fragment header containing: 251 * The Next Header value that identifies the first header of 252 * the Fragmentable Part of the original packet. 253 * -> next header field is same for all fragments 254 * 255 * reassembly rule (p21): 256 * The Next Header field of the last header of the Unfragmentable 257 * Part is obtained from the Next Header field of the first 258 * fragment's Fragment header. 259 * -> should grab it from the first fragment only 260 * 261 * The following note also contradicts with fragment rule - noone is going to 262 * send different fragment with different next header field. 263 * 264 * additional note (p22): 265 * The Next Header values in the Fragment headers of different 266 * fragments of the same original packet may differ. Only the value 267 * from the Offset zero fragment packet is used for reassembly. 268 * -> should grab it from the first fragment only 269 * 270 * There is no explicit reason given in the RFC. Historical reason maybe? 271 */ 272/* 273 * Fragment input 274 */ 275int 276frag6_input(struct mbuf **mp, int *offp, int proto) 277{ 278#pragma unused(proto) 279 struct mbuf *m = *mp, *t; 280 struct ip6_hdr *ip6; 281 struct ip6_frag *ip6f; 282 struct ip6q *q6; 283 struct ip6asfrag *af6, *ip6af, *af6dwn; 284 int offset = *offp, nxt, i, next; 285 int first_frag = 0; 286 int fragoff, frgpartlen; /* must be larger than u_int16_t */ 287 struct ifnet *dstifp = NULL; 288 u_int8_t ecn, ecn0; 289 uint32_t csum, csum_flags; 290 struct fq6_head diq6; 291 int locked = 0; 292 293 VERIFY(m->m_flags & M_PKTHDR); 294 295 MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */ 296 297 /* Expect 32-bit aligned data pointer on strict-align platforms */ 298 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); 299 300 ip6 = mtod(m, struct ip6_hdr *); 301 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); 302 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); 303 304#ifdef IN6_IFSTAT_STRICT 305 /* find the destination interface of the packet. */ 306 if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { 307 uint32_t idx; 308 309 if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) { 310 if (idx > 0 && idx <= if_index) { 311 ifnet_head_lock_shared(); 312 dstifp = ifindex2ifnet[idx]; 313 ifnet_head_done(); 314 } 315 } 316 } 317#endif /* IN6_IFSTAT_STRICT */ 318 319 /* we are violating the spec, this may not be the dst interface */ 320 if (dstifp == NULL) 321 dstifp = m->m_pkthdr.rcvif; 322 323 /* jumbo payload can't contain a fragment header */ 324 if (ip6->ip6_plen == 0) { 325 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); 326 in6_ifstat_inc(dstifp, ifs6_reass_fail); 327 m = NULL; 328 goto done; 329 } 330 331 /* 332 * check whether fragment packet's fragment length is 333 * multiple of 8 octets. 334 * sizeof(struct ip6_frag) == 8 335 * sizeof(struct ip6_hdr) = 40 336 */ 337 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && 338 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { 339 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 340 offsetof(struct ip6_hdr, ip6_plen)); 341 in6_ifstat_inc(dstifp, ifs6_reass_fail); 342 m = NULL; 343 goto done; 344 } 345 346 /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */ 347 if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) { 348 ip6stat.ip6s_fragments++; 349 ip6stat.ip6s_fragdropped++; 350 in6_ifstat_inc(dstifp, ifs6_reass_fail); 351 m_freem(m); 352 m = NULL; 353 goto done; 354 } 355 356 /* offset now points to data portion */ 357 offset += sizeof(struct ip6_frag); 358 359 /* 360 * Leverage partial checksum offload for simple UDP/IP fragments, 361 * as that is the most common case. 362 * 363 * Perform 1's complement adjustment of octets that got included/ 364 * excluded in the hardware-calculated checksum value. 365 */ 366 if (ip6f->ip6f_nxt == IPPROTO_UDP && 367 offset == (sizeof (*ip6) + sizeof (*ip6f)) && 368 (m->m_pkthdr.csum_flags & 369 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == 370 (CSUM_DATA_VALID | CSUM_PARTIAL)) { 371 uint32_t start; 372 373 start = m->m_pkthdr.csum_rx_start; 374 csum = m->m_pkthdr.csum_rx_val; 375 376 if (start != offset) { 377 uint16_t s, d; 378 379 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { 380 s = ip6->ip6_src.s6_addr16[1]; 381 ip6->ip6_src.s6_addr16[1] = 0 ; 382 } 383 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { 384 d = ip6->ip6_dst.s6_addr16[1]; 385 ip6->ip6_dst.s6_addr16[1] = 0; 386 } 387 388 /* callee folds in sum */ 389 csum = m_adj_sum16(m, start, offset, csum); 390 391 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 392 ip6->ip6_src.s6_addr16[1] = s; 393 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 394 ip6->ip6_dst.s6_addr16[1] = d; 395 396 } 397 csum_flags = m->m_pkthdr.csum_flags; 398 } else { 399 csum = 0; 400 csum_flags = 0; 401 } 402 403 /* Invalidate checksum */ 404 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; 405 406 ip6stat.ip6s_fragments++; 407 in6_ifstat_inc(dstifp, ifs6_reass_reqd); 408 409 lck_mtx_lock(&ip6qlock); 410 locked = 1; 411 412 for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) 413 if (ip6f->ip6f_ident == q6->ip6q_ident && 414 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && 415 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)) 416 break; 417 418 if (q6 == &ip6q) { 419 /* 420 * the first fragment to arrive, create a reassembly queue. 421 */ 422 first_frag = 1; 423 424 q6 = ip6q_alloc(M_DONTWAIT); 425 if (q6 == NULL) 426 goto dropfrag; 427 428 frag6_insque(q6, &ip6q); 429 frag6_nfragpackets++; 430 431 /* ip6q_nxt will be filled afterwards, from 1st fragment */ 432 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; 433#ifdef notyet 434 q6->ip6q_nxtp = (u_char *)nxtp; 435#endif 436 q6->ip6q_ident = ip6f->ip6f_ident; 437 q6->ip6q_ttl = IPV6_FRAGTTL; 438 q6->ip6q_src = ip6->ip6_src; 439 q6->ip6q_dst = ip6->ip6_dst; 440 q6->ip6q_ecn = 441 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 442 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ 443 444 q6->ip6q_nfrag = 0; 445 446 /* 447 * If the first fragment has valid checksum offload 448 * info, the rest of fragments are eligible as well. 449 */ 450 if (csum_flags != 0) { 451 q6->ip6q_csum = csum; 452 q6->ip6q_csum_flags = csum_flags; 453 } 454 } 455 456 /* 457 * If it's the 1st fragment, record the length of the 458 * unfragmentable part and the next header of the fragment header. 459 */ 460 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 461 if (fragoff == 0) { 462 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - 463 sizeof(struct ip6_frag); 464 q6->ip6q_nxt = ip6f->ip6f_nxt; 465 } 466 467 /* 468 * Check that the reassembled packet would not exceed 65535 bytes 469 * in size. 470 * If it would exceed, discard the fragment and return an ICMP error. 471 */ 472 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; 473 if (q6->ip6q_unfrglen >= 0) { 474 /* The 1st fragment has already arrived. */ 475 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { 476 lck_mtx_unlock(&ip6qlock); 477 locked = 0; 478 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 479 offset - sizeof(struct ip6_frag) + 480 offsetof(struct ip6_frag, ip6f_offlg)); 481 m = NULL; 482 goto done; 483 } 484 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { 485 lck_mtx_unlock(&ip6qlock); 486 locked = 0; 487 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 488 offset - sizeof(struct ip6_frag) + 489 offsetof(struct ip6_frag, ip6f_offlg)); 490 m = NULL; 491 goto done; 492 } 493 /* 494 * If it's the first fragment, do the above check for each 495 * fragment already stored in the reassembly queue. 496 */ 497 if (fragoff == 0) { 498 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 499 af6 = af6dwn) { 500 af6dwn = af6->ip6af_down; 501 502 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > 503 IPV6_MAXPACKET) { 504 struct mbuf *merr = IP6_REASS_MBUF(af6); 505 struct ip6_hdr *ip6err; 506 int erroff = af6->ip6af_offset; 507 508 /* dequeue the fragment. */ 509 frag6_deq(af6); 510 ip6af_free(af6); 511 512 /* adjust pointer. */ 513 ip6err = mtod(merr, struct ip6_hdr *); 514 515 /* 516 * Restore source and destination addresses 517 * in the erroneous IPv6 header. 518 */ 519 ip6err->ip6_src = q6->ip6q_src; 520 ip6err->ip6_dst = q6->ip6q_dst; 521 522 frag6_save_context(merr, 523 erroff - sizeof (struct ip6_frag) + 524 offsetof(struct ip6_frag, ip6f_offlg)); 525 526 MBUFQ_ENQUEUE(&diq6, merr); 527 } 528 } 529 } 530 531 ip6af = ip6af_alloc(M_DONTWAIT); 532 if (ip6af == NULL) 533 goto dropfrag; 534 535 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; 536 ip6af->ip6af_off = fragoff; 537 ip6af->ip6af_frglen = frgpartlen; 538 ip6af->ip6af_offset = offset; 539 IP6_REASS_MBUF(ip6af) = m; 540 541 if (first_frag) { 542 af6 = (struct ip6asfrag *)q6; 543 goto insert; 544 } 545 546 /* 547 * Handle ECN by comparing this segment with the first one; 548 * if CE is set, do not lose CE. 549 * drop if CE and not-ECT are mixed for the same packet. 550 */ 551 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 552 ecn0 = q6->ip6q_ecn; 553 if (ecn == IPTOS_ECN_CE) { 554 if (ecn0 == IPTOS_ECN_NOTECT) { 555 ip6af_free(ip6af); 556 goto dropfrag; 557 } 558 if (ecn0 != IPTOS_ECN_CE) 559 q6->ip6q_ecn = IPTOS_ECN_CE; 560 } 561 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { 562 ip6af_free(ip6af); 563 goto dropfrag; 564 } 565 566 /* 567 * Find a segment which begins after this one does. 568 */ 569 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 570 af6 = af6->ip6af_down) 571 if (af6->ip6af_off > ip6af->ip6af_off) 572 break; 573 574#if 0 575 /* 576 * If there is a preceding segment, it may provide some of 577 * our data already. If so, drop the data from the incoming 578 * segment. If it provides all of our data, drop us. 579 * 580 * If some of the data is dropped from the preceding 581 * segment, then it's checksum is invalidated. 582 */ 583 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 584 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 585 - ip6af->ip6af_off; 586 if (i > 0) { 587 if (i >= ip6af->ip6af_frglen) 588 goto dropfrag; 589 m_adj(IP6_REASS_MBUF(ip6af), i); 590 q6->ip6q_csum_flags = 0; 591 ip6af->ip6af_off += i; 592 ip6af->ip6af_frglen -= i; 593 } 594 } 595 596 /* 597 * While we overlap succeeding segments trim them or, 598 * if they are completely covered, dequeue them. 599 */ 600 while (af6 != (struct ip6asfrag *)q6 && 601 ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) { 602 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 603 if (i < af6->ip6af_frglen) { 604 af6->ip6af_frglen -= i; 605 af6->ip6af_off += i; 606 m_adj(IP6_REASS_MBUF(af6), i); 607 q6->ip6q_csum_flags = 0; 608 break; 609 } 610 af6 = af6->ip6af_down; 611 m_freem(IP6_REASS_MBUF(af6->ip6af_up)); 612 frag6_deq(af6->ip6af_up); 613 } 614#else 615 /* 616 * If the incoming framgent overlaps some existing fragments in 617 * the reassembly queue, drop it, since it is dangerous to override 618 * existing fragments from a security point of view. 619 * We don't know which fragment is the bad guy - here we trust 620 * fragment that came in earlier, with no real reason. 621 * 622 * Note: due to changes after disabling this part, mbuf passed to 623 * m_adj() below now does not meet the requirement. 624 */ 625 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 626 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 627 - ip6af->ip6af_off; 628 if (i > 0) { 629#if 0 /* suppress the noisy log */ 630 log(LOG_ERR, "%d bytes of a fragment from %s " 631 "overlaps the previous fragment\n", 632 i, ip6_sprintf(&q6->ip6q_src)); 633#endif 634 ip6af_free(ip6af); 635 goto dropfrag; 636 } 637 } 638 if (af6 != (struct ip6asfrag *)q6) { 639 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 640 if (i > 0) { 641#if 0 /* suppress the noisy log */ 642 log(LOG_ERR, "%d bytes of a fragment from %s " 643 "overlaps the succeeding fragment", 644 i, ip6_sprintf(&q6->ip6q_src)); 645#endif 646 ip6af_free(ip6af); 647 goto dropfrag; 648 } 649 } 650#endif 651 652 /* 653 * If this fragment contains similar checksum offload info 654 * as that of the existing ones, accumulate checksum. Otherwise, 655 * invalidate checksum offload info for the entire datagram. 656 */ 657 if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) 658 q6->ip6q_csum += csum; 659 else if (q6->ip6q_csum_flags != 0) 660 q6->ip6q_csum_flags = 0; 661 662insert: 663 664 /* 665 * Stick new segment in its place; 666 * check for complete reassembly. 667 * Move to front of packet queue, as we are 668 * the most recently active fragmented packet. 669 */ 670 frag6_enq(ip6af, af6->ip6af_up); 671 frag6_nfrags++; 672 q6->ip6q_nfrag++; 673#if 0 /* xxx */ 674 if (q6 != ip6q.ip6q_next) { 675 frag6_remque(q6); 676 frag6_insque(q6, &ip6q); 677 } 678#endif 679 next = 0; 680 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 681 af6 = af6->ip6af_down) { 682 if (af6->ip6af_off != next) { 683 lck_mtx_unlock(&ip6qlock); 684 locked = 0; 685 m = NULL; 686 goto done; 687 } 688 next += af6->ip6af_frglen; 689 } 690 if (af6->ip6af_up->ip6af_mff) { 691 lck_mtx_unlock(&ip6qlock); 692 locked = 0; 693 m = NULL; 694 goto done; 695 } 696 697 /* 698 * Reassembly is complete; concatenate fragments. 699 */ 700 ip6af = q6->ip6q_down; 701 t = m = IP6_REASS_MBUF(ip6af); 702 af6 = ip6af->ip6af_down; 703 frag6_deq(ip6af); 704 while (af6 != (struct ip6asfrag *)q6) { 705 af6dwn = af6->ip6af_down; 706 frag6_deq(af6); 707 while (t->m_next) 708 t = t->m_next; 709 t->m_next = IP6_REASS_MBUF(af6); 710 m_adj(t->m_next, af6->ip6af_offset); 711 ip6af_free(af6); 712 af6 = af6dwn; 713 } 714 715 /* 716 * Store partial hardware checksum info from the fragment queue; 717 * the receive start offset is set to 40 bytes (see code at the 718 * top of this routine.) 719 */ 720 if (q6->ip6q_csum_flags != 0) { 721 csum = q6->ip6q_csum; 722 723 ADDCARRY(csum); 724 725 m->m_pkthdr.csum_rx_val = csum; 726 m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr); 727 m->m_pkthdr.csum_flags = q6->ip6q_csum_flags; 728 } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || 729 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { 730 /* loopback checksums are always OK */ 731 m->m_pkthdr.csum_data = 0xffff; 732 m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; 733 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 734 } 735 736 /* adjust offset to point where the original next header starts */ 737 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); 738 ip6af_free(ip6af); 739 ip6 = mtod(m, struct ip6_hdr *); 740 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); 741 ip6->ip6_src = q6->ip6q_src; 742 ip6->ip6_dst = q6->ip6q_dst; 743 if (q6->ip6q_ecn == IPTOS_ECN_CE) 744 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); 745 746 nxt = q6->ip6q_nxt; 747#ifdef notyet 748 *q6->ip6q_nxtp = (u_char)(nxt & 0xff); 749#endif 750 751 /* Delete frag6 header */ 752 if (m->m_len >= offset + sizeof(struct ip6_frag)) { 753 /* This is the only possible case with !PULLDOWN_TEST */ 754 ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), 755 offset); 756 m->m_data += sizeof(struct ip6_frag); 757 m->m_len -= sizeof(struct ip6_frag); 758 } else { 759 /* this comes with no copy if the boundary is on cluster */ 760 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { 761 frag6_remque(q6); 762 frag6_nfragpackets--; 763 frag6_nfrags -= q6->ip6q_nfrag; 764 ip6q_free(q6); 765 goto dropfrag; 766 } 767 m_adj(t, sizeof(struct ip6_frag)); 768 m_cat(m, t); 769 } 770 771 /* 772 * Store NXT to the original. 773 */ 774 { 775 char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ 776 *prvnxtp = nxt; 777 } 778 779 frag6_remque(q6); 780 frag6_nfragpackets--; 781 frag6_nfrags -= q6->ip6q_nfrag; 782 ip6q_free(q6); 783 784 if (m->m_flags & M_PKTHDR) /* Isn't it always true? */ 785 m_fixhdr(m); 786 787 ip6stat.ip6s_reassembled++; 788 789 /* 790 * Tell launch routine the next header 791 */ 792 *mp = m; 793 *offp = offset; 794 795 /* arm the purge timer if not already and if there's work to do */ 796 frag6_sched_timeout(); 797 lck_mtx_unlock(&ip6qlock); 798 in6_ifstat_inc(dstifp, ifs6_reass_ok); 799 frag6_icmp6_paramprob_error(&diq6); 800 VERIFY(MBUFQ_EMPTY(&diq6)); 801 return (nxt); 802 803done: 804 VERIFY(m == NULL); 805 if (!locked) { 806 if (frag6_nfragpackets == 0) { 807 frag6_icmp6_paramprob_error(&diq6); 808 VERIFY(MBUFQ_EMPTY(&diq6)); 809 return (IPPROTO_DONE); 810 } 811 lck_mtx_lock(&ip6qlock); 812 } 813 /* arm the purge timer if not already and if there's work to do */ 814 frag6_sched_timeout(); 815 lck_mtx_unlock(&ip6qlock); 816 frag6_icmp6_paramprob_error(&diq6); 817 VERIFY(MBUFQ_EMPTY(&diq6)); 818 return (IPPROTO_DONE); 819 820dropfrag: 821 ip6stat.ip6s_fragdropped++; 822 /* arm the purge timer if not already and if there's work to do */ 823 frag6_sched_timeout(); 824 lck_mtx_unlock(&ip6qlock); 825 in6_ifstat_inc(dstifp, ifs6_reass_fail); 826 m_freem(m); 827 frag6_icmp6_paramprob_error(&diq6); 828 VERIFY(MBUFQ_EMPTY(&diq6)); 829 return (IPPROTO_DONE); 830} 831 832/* 833 * Free a fragment reassembly header and all 834 * associated datagrams. 835 */ 836void 837frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) 838{ 839 struct ip6asfrag *af6, *down6; 840 841 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 842 843 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 844 af6 = down6) { 845 struct mbuf *m = IP6_REASS_MBUF(af6); 846 847 down6 = af6->ip6af_down; 848 frag6_deq(af6); 849 850 /* 851 * Return ICMP time exceeded error for the 1st fragment. 852 * Just free other fragments. 853 */ 854 if (af6->ip6af_off == 0) { 855 struct ip6_hdr *ip6; 856 857 /* adjust pointer */ 858 ip6 = mtod(m, struct ip6_hdr *); 859 860 /* restore source and destination addresses */ 861 ip6->ip6_src = q6->ip6q_src; 862 ip6->ip6_dst = q6->ip6q_dst; 863 864 MBUFQ_ENQUEUE(diq6, m); 865 } else { 866 MBUFQ_ENQUEUE(dfq6, m); 867 } 868 ip6af_free(af6); 869 870 } 871 frag6_remque(q6); 872 frag6_nfragpackets--; 873 frag6_nfrags -= q6->ip6q_nfrag; 874 ip6q_free(q6); 875} 876 877/* 878 * Put an ip fragment on a reassembly chain. 879 * Like insque, but pointers in middle of structure. 880 */ 881void 882frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) 883{ 884 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 885 886 af6->ip6af_up = up6; 887 af6->ip6af_down = up6->ip6af_down; 888 up6->ip6af_down->ip6af_up = af6; 889 up6->ip6af_down = af6; 890} 891 892/* 893 * To frag6_enq as remque is to insque. 894 */ 895void 896frag6_deq(struct ip6asfrag *af6) 897{ 898 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 899 900 af6->ip6af_up->ip6af_down = af6->ip6af_down; 901 af6->ip6af_down->ip6af_up = af6->ip6af_up; 902} 903 904void 905frag6_insque(struct ip6q *new, struct ip6q *old) 906{ 907 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 908 909 new->ip6q_prev = old; 910 new->ip6q_next = old->ip6q_next; 911 old->ip6q_next->ip6q_prev= new; 912 old->ip6q_next = new; 913} 914 915void 916frag6_remque(struct ip6q *p6) 917{ 918 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 919 920 p6->ip6q_prev->ip6q_next = p6->ip6q_next; 921 p6->ip6q_next->ip6q_prev = p6->ip6q_prev; 922} 923 924/* 925 * IPv6 reassembling timer processing; 926 * if a timer expires on a reassembly 927 * queue, discard it. 928 */ 929static void 930frag6_timeout(void *arg) 931{ 932#pragma unused(arg) 933 struct fq6_head dfq6, diq6; 934 struct ip6q *q6; 935 936 MBUFQ_INIT(&dfq6); /* for deferred frees */ 937 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ 938 939 /* 940 * Update coarse-grained networking timestamp (in sec.); the idea 941 * is to piggy-back on the timeout callout to update the counter 942 * returnable via net_uptime(). 943 */ 944 net_update_uptime(); 945 946 lck_mtx_lock(&ip6qlock); 947 q6 = ip6q.ip6q_next; 948 if (q6) 949 while (q6 != &ip6q) { 950 --q6->ip6q_ttl; 951 q6 = q6->ip6q_next; 952 if (q6->ip6q_prev->ip6q_ttl == 0) { 953 ip6stat.ip6s_fragtimeout++; 954 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 955 frag6_freef(q6->ip6q_prev, &dfq6, &diq6); 956 } 957 } 958 /* 959 * If we are over the maximum number of fragments 960 * (due to the limit being lowered), drain off 961 * enough to get down to the new limit. 962 */ 963 if (ip6_maxfragpackets >= 0) { 964 while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets && 965 ip6q.ip6q_prev) { 966 ip6stat.ip6s_fragoverflow++; 967 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 968 frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6); 969 } 970 } 971 /* re-arm the purge timer if there's work to do */ 972 frag6_timeout_run = 0; 973 frag6_sched_timeout(); 974 lck_mtx_unlock(&ip6qlock); 975 976 /* free fragments that need to be freed */ 977 if (!MBUFQ_EMPTY(&dfq6)) 978 MBUFQ_DRAIN(&dfq6); 979 980 frag6_icmp6_timeex_error(&diq6); 981 982 VERIFY(MBUFQ_EMPTY(&dfq6)); 983 VERIFY(MBUFQ_EMPTY(&diq6)); 984} 985 986static void 987frag6_sched_timeout(void) 988{ 989 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 990 991 if (!frag6_timeout_run && frag6_nfragpackets > 0) { 992 frag6_timeout_run = 1; 993 timeout(frag6_timeout, NULL, hz); 994 } 995} 996 997/* 998 * Drain off all datagram fragments. 999 */ 1000void 1001frag6_drain(void) 1002{ 1003 struct fq6_head dfq6, diq6; 1004 1005 MBUFQ_INIT(&dfq6); /* for deferred frees */ 1006 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ 1007 1008 lck_mtx_lock(&ip6qlock); 1009 while (ip6q.ip6q_next != &ip6q) { 1010 ip6stat.ip6s_fragdropped++; 1011 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 1012 frag6_freef(ip6q.ip6q_next, &dfq6, &diq6); 1013 } 1014 lck_mtx_unlock(&ip6qlock); 1015 1016 /* free fragments that need to be freed */ 1017 if (!MBUFQ_EMPTY(&dfq6)) 1018 MBUFQ_DRAIN(&dfq6); 1019 1020 frag6_icmp6_timeex_error(&diq6); 1021 1022 VERIFY(MBUFQ_EMPTY(&dfq6)); 1023 VERIFY(MBUFQ_EMPTY(&diq6)); 1024} 1025 1026static struct ip6q * 1027ip6q_alloc(int how) 1028{ 1029 struct mbuf *t; 1030 struct ip6q *q6; 1031 1032 /* 1033 * See comments in ip6q_updateparams(). Keep the count separate 1034 * from frag6_nfragpackets since the latter represents the elements 1035 * already in the reassembly queues. 1036 */ 1037 if (ip6q_limit > 0 && ip6q_count > ip6q_limit) 1038 return (NULL); 1039 1040 t = m_get(how, MT_FTABLE); 1041 if (t != NULL) { 1042 atomic_add_32(&ip6q_count, 1); 1043 q6 = mtod(t, struct ip6q *); 1044 bzero(q6, sizeof (*q6)); 1045 } else { 1046 q6 = NULL; 1047 } 1048 return (q6); 1049} 1050 1051static void 1052ip6q_free(struct ip6q *q6) 1053{ 1054 (void) m_free(dtom(q6)); 1055 atomic_add_32(&ip6q_count, -1); 1056} 1057 1058static struct ip6asfrag * 1059ip6af_alloc(int how) 1060{ 1061 struct mbuf *t; 1062 struct ip6asfrag *af6; 1063 1064 /* 1065 * See comments in ip6q_updateparams(). Keep the count separate 1066 * from frag6_nfrags since the latter represents the elements 1067 * already in the reassembly queues. 1068 */ 1069 if (ip6af_limit > 0 && ip6af_count > ip6af_limit) 1070 return (NULL); 1071 1072 t = m_get(how, MT_FTABLE); 1073 if (t != NULL) { 1074 atomic_add_32(&ip6af_count, 1); 1075 af6 = mtod(t, struct ip6asfrag *); 1076 bzero(af6, sizeof (*af6)); 1077 } else { 1078 af6 = NULL; 1079 } 1080 return (af6); 1081} 1082 1083static void 1084ip6af_free(struct ip6asfrag *af6) 1085{ 1086 (void) m_free(dtom(af6)); 1087 atomic_add_32(&ip6af_count, -1); 1088} 1089 1090static void 1091ip6q_updateparams(void) 1092{ 1093 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 1094 /* 1095 * -1 for unlimited allocation. 1096 */ 1097 if (ip6_maxfragpackets < 0) 1098 ip6q_limit = 0; 1099 if (ip6_maxfrags < 0) 1100 ip6af_limit = 0; 1101 /* 1102 * Positive number for specific bound. 1103 */ 1104 if (ip6_maxfragpackets > 0) 1105 ip6q_limit = ip6_maxfragpackets; 1106 if (ip6_maxfrags > 0) 1107 ip6af_limit = ip6_maxfrags; 1108 /* 1109 * Zero specifies no further fragment queue allocation -- set the 1110 * bound very low, but rely on implementation elsewhere to actually 1111 * prevent allocation and reclaim current queues. 1112 */ 1113 if (ip6_maxfragpackets == 0) 1114 ip6q_limit = 1; 1115 if (ip6_maxfrags == 0) 1116 ip6af_limit = 1; 1117 /* 1118 * Arm the purge timer if not already and if there's work to do 1119 */ 1120 frag6_sched_timeout(); 1121} 1122 1123static int 1124sysctl_maxfragpackets SYSCTL_HANDLER_ARGS 1125{ 1126#pragma unused(arg1, arg2) 1127 int error, i; 1128 1129 lck_mtx_lock(&ip6qlock); 1130 i = ip6_maxfragpackets; 1131 error = sysctl_handle_int(oidp, &i, 0, req); 1132 if (error || req->newptr == USER_ADDR_NULL) 1133 goto done; 1134 /* impose bounds */ 1135 if (i < -1 || i > (nmbclusters / 4)) { 1136 error = EINVAL; 1137 goto done; 1138 } 1139 ip6_maxfragpackets = i; 1140 ip6q_updateparams(); 1141done: 1142 lck_mtx_unlock(&ip6qlock); 1143 return (error); 1144} 1145 1146static int 1147sysctl_maxfrags SYSCTL_HANDLER_ARGS 1148{ 1149#pragma unused(arg1, arg2) 1150 int error, i; 1151 1152 lck_mtx_lock(&ip6qlock); 1153 i = ip6_maxfrags; 1154 error = sysctl_handle_int(oidp, &i, 0, req); 1155 if (error || req->newptr == USER_ADDR_NULL) 1156 goto done; 1157 /* impose bounds */ 1158 if (i < -1 || i > (nmbclusters / 4)) { 1159 error = EINVAL; 1160 goto done; 1161 } 1162 ip6_maxfrags= i; 1163 ip6q_updateparams(); /* see if we need to arm timer */ 1164done: 1165 lck_mtx_unlock(&ip6qlock); 1166 return (error); 1167} 1168