1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */ 30/* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */ 31 32/* 33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the project nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/malloc.h> 64#include <sys/mcache.h> 65#include <sys/mbuf.h> 66#include <sys/domain.h> 67#include <sys/protosw.h> 68#include <sys/socket.h> 69#include <sys/errno.h> 70#include <sys/time.h> 71#include <sys/kernel.h> 72#include <sys/syslog.h> 73#include <kern/queue.h> 74#include <kern/locks.h> 75 76#include <net/if.h> 77#include <net/route.h> 78 79#include <netinet/in.h> 80#include <netinet/in_var.h> 81#include <netinet/ip.h> 82#include <netinet/ip6.h> 83#include <netinet6/ip6_var.h> 84#include <netinet/icmp6.h> 85 86#include <net/net_osdep.h> 87#include <dev/random/randomdev.h> 88 89/* 90 * Define it to get a correct behavior on per-interface statistics. 91 */ 92#define IN6_IFSTAT_STRICT 93 94MBUFQ_HEAD(fq6_head); 95 96static void frag6_save_context(struct mbuf *, int); 97static void frag6_scrub_context(struct mbuf *); 98static int frag6_restore_context(struct mbuf *); 99 100static void frag6_icmp6_paramprob_error(struct fq6_head *); 101static void frag6_icmp6_timeex_error(struct fq6_head *); 102 103static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); 104static void frag6_deq(struct ip6asfrag *); 105static void frag6_insque(struct ip6q *, struct ip6q *); 106static void frag6_remque(struct ip6q *); 107static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *); 108 109static int frag6_timeout_run; /* frag6 timer is scheduled to run */ 110static void frag6_timeout(void *); 111static void frag6_sched_timeout(void); 112 113static struct ip6q *ip6q_alloc(int); 114static void ip6q_free(struct ip6q *); 115static void ip6q_updateparams(void); 116static struct ip6asfrag *ip6af_alloc(int); 117static void ip6af_free(struct ip6asfrag *); 118 119decl_lck_mtx_data(static, ip6qlock); 120static lck_attr_t *ip6qlock_attr; 121static lck_grp_t *ip6qlock_grp; 122static lck_grp_attr_t *ip6qlock_grp_attr; 123 124/* IPv6 fragment reassembly queues (protected by ip6qlock) */ 125static struct ip6q ip6q; /* ip6 reassembly queues */ 126static int ip6_maxfragpackets; /* max packets in reass queues */ 127static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */ 128static int ip6_maxfrags; /* max fragments in reass queues */ 129static u_int32_t frag6_nfrags; /* # of fragments in reass queues */ 130static u_int32_t ip6q_limit; /* ip6q allocation limit */ 131static u_int32_t ip6q_count; /* current # of allocated ip6q's */ 132static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */ 133static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */ 134 135static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS; 136static int sysctl_maxfrags SYSCTL_HANDLER_ARGS; 137 138SYSCTL_DECL(_net_inet6_ip6); 139 140SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, 141 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, 142 sysctl_maxfragpackets, "I", 143 "Maximum number of IPv6 fragment reassembly queue entries"); 144 145SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets, 146 CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0, 147 "Current number of IPv6 fragment reassembly queue entries"); 148 149SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, 150 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, 151 sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed"); 152 153/* 154 * Initialise reassembly queue and fragment identifier. 155 */ 156void 157frag6_init(void) 158{ 159 /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */ 160 _CASSERT(sizeof (struct ip6q) <= _MLEN); 161 /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */ 162 _CASSERT(sizeof (struct ip6asfrag) <= _MLEN); 163 164 /* IPv6 fragment reassembly queue lock */ 165 ip6qlock_grp_attr = lck_grp_attr_alloc_init(); 166 ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr); 167 ip6qlock_attr = lck_attr_alloc_init(); 168 lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr); 169 170 lck_mtx_lock(&ip6qlock); 171 /* Initialize IPv6 reassembly queue. */ 172 ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; 173 174 /* same limits as IPv4 */ 175 ip6_maxfragpackets = nmbclusters / 32; 176 ip6_maxfrags = ip6_maxfragpackets * 2; 177 ip6q_updateparams(); 178 lck_mtx_unlock(&ip6qlock); 179} 180 181static void 182frag6_save_context(struct mbuf *m, int val) 183{ 184 m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val; 185} 186 187static void 188frag6_scrub_context(struct mbuf *m) 189{ 190 m->m_pkthdr.pkt_hdr = NULL; 191} 192 193static int 194frag6_restore_context(struct mbuf *m) 195{ 196 return ((int)m->m_pkthdr.pkt_hdr); 197} 198 199/* 200 * Send any deferred ICMP param problem error messages; caller must not be 201 * holding ip6qlock and is expected to have saved the per-packet parameter 202 * value via frag6_save_context(). 203 */ 204static void 205frag6_icmp6_paramprob_error(struct fq6_head *diq6) 206{ 207 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); 208 209 if (!MBUFQ_EMPTY(diq6)) { 210 struct mbuf *merr, *merr_tmp; 211 int param; 212 MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) { 213 MBUFQ_REMOVE(diq6, merr); 214 MBUFQ_NEXT(merr) = NULL; 215 param = frag6_restore_context(merr); 216 frag6_scrub_context(merr); 217 icmp6_error(merr, ICMP6_PARAM_PROB, 218 ICMP6_PARAMPROB_HEADER, param); 219 } 220 } 221} 222 223/* 224 * Send any deferred ICMP time exceeded error messages; 225 * caller must not be holding ip6qlock. 226 */ 227static void 228frag6_icmp6_timeex_error(struct fq6_head *diq6) 229{ 230 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); 231 232 if (!MBUFQ_EMPTY(diq6)) { 233 struct mbuf *m, *m_tmp; 234 MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { 235 MBUFQ_REMOVE(diq6, m); 236 MBUFQ_NEXT(m) = NULL; 237 icmp6_error(m, ICMP6_TIME_EXCEEDED, 238 ICMP6_TIME_EXCEED_REASSEMBLY, 0); 239 } 240 } 241} 242 243/* 244 * In RFC2460, fragment and reassembly rule do not agree with each other, 245 * in terms of next header field handling in fragment header. 246 * While the sender will use the same value for all of the fragmented packets, 247 * receiver is suggested not to check the consistency. 248 * 249 * fragment rule (p20): 250 * (2) A Fragment header containing: 251 * The Next Header value that identifies the first header of 252 * the Fragmentable Part of the original packet. 253 * -> next header field is same for all fragments 254 * 255 * reassembly rule (p21): 256 * The Next Header field of the last header of the Unfragmentable 257 * Part is obtained from the Next Header field of the first 258 * fragment's Fragment header. 259 * -> should grab it from the first fragment only 260 * 261 * The following note also contradicts with fragment rule - noone is going to 262 * send different fragment with different next header field. 263 * 264 * additional note (p22): 265 * The Next Header values in the Fragment headers of different 266 * fragments of the same original packet may differ. Only the value 267 * from the Offset zero fragment packet is used for reassembly. 268 * -> should grab it from the first fragment only 269 * 270 * There is no explicit reason given in the RFC. Historical reason maybe? 271 */ 272/* 273 * Fragment input 274 */ 275int 276frag6_input(struct mbuf **mp, int *offp, int proto) 277{ 278#pragma unused(proto) 279 struct mbuf *m = *mp, *t; 280 struct ip6_hdr *ip6; 281 struct ip6_frag *ip6f; 282 struct ip6q *q6; 283 struct ip6asfrag *af6, *ip6af, *af6dwn; 284 int offset = *offp, nxt, i, next; 285 int first_frag = 0; 286 int fragoff, frgpartlen; /* must be larger than u_int16_t */ 287 struct ifnet *dstifp = NULL; 288 u_int8_t ecn, ecn0; 289 uint32_t csum, csum_flags; 290 struct fq6_head diq6; 291 int locked = 0; 292 293 VERIFY(m->m_flags & M_PKTHDR); 294 295 MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */ 296 297 /* Expect 32-bit aligned data pointer on strict-align platforms */ 298 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); 299 300 ip6 = mtod(m, struct ip6_hdr *); 301#ifndef PULLDOWN_TEST 302 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); 303 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); 304#else 305 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); 306 if (ip6f == NULL) 307 goto done; 308#endif 309 310#ifdef IN6_IFSTAT_STRICT 311 /* find the destination interface of the packet. */ 312 if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { 313 uint32_t idx; 314 315 if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) { 316 if (idx > 0 && idx <= if_index) { 317 ifnet_head_lock_shared(); 318 dstifp = ifindex2ifnet[idx]; 319 ifnet_head_done(); 320 } 321 } 322 } 323#endif /* IN6_IFSTAT_STRICT */ 324 325 /* we are violating the spec, this may not be the dst interface */ 326 if (dstifp == NULL) 327 dstifp = m->m_pkthdr.rcvif; 328 329 /* jumbo payload can't contain a fragment header */ 330 if (ip6->ip6_plen == 0) { 331 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); 332 in6_ifstat_inc(dstifp, ifs6_reass_fail); 333 m = NULL; 334 goto done; 335 } 336 337 /* 338 * check whether fragment packet's fragment length is 339 * multiple of 8 octets. 340 * sizeof(struct ip6_frag) == 8 341 * sizeof(struct ip6_hdr) = 40 342 */ 343 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && 344 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { 345 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 346 offsetof(struct ip6_hdr, ip6_plen)); 347 in6_ifstat_inc(dstifp, ifs6_reass_fail); 348 m = NULL; 349 goto done; 350 } 351 352 /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */ 353 if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) { 354 ip6stat.ip6s_fragments++; 355 ip6stat.ip6s_fragdropped++; 356 in6_ifstat_inc(dstifp, ifs6_reass_fail); 357 m_freem(m); 358 m = NULL; 359 goto done; 360 } 361 362 /* offset now points to data portion */ 363 offset += sizeof(struct ip6_frag); 364 365 /* 366 * Leverage partial checksum offload for simple UDP/IP fragments, 367 * as that is the most common case. 368 * 369 * Perform 1's complement adjustment of octets that got included/ 370 * excluded in the hardware-calculated checksum value. 371 */ 372 if (ip6f->ip6f_nxt == IPPROTO_UDP && 373 offset == (sizeof (*ip6) + sizeof (*ip6f)) && 374 (m->m_pkthdr.csum_flags & 375 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == 376 (CSUM_DATA_VALID | CSUM_PARTIAL)) { 377 uint32_t start; 378 379 start = m->m_pkthdr.csum_rx_start; 380 csum = m->m_pkthdr.csum_rx_val; 381 382 if (start != offset) { 383 uint16_t s, d; 384 385 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { 386 s = ip6->ip6_src.s6_addr16[1]; 387 ip6->ip6_src.s6_addr16[1] = 0 ; 388 } 389 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { 390 d = ip6->ip6_dst.s6_addr16[1]; 391 ip6->ip6_dst.s6_addr16[1] = 0; 392 } 393 394 /* callee folds in sum */ 395 csum = m_adj_sum16(m, start, offset, csum); 396 397 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 398 ip6->ip6_src.s6_addr16[1] = s; 399 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 400 ip6->ip6_dst.s6_addr16[1] = d; 401 402 } 403 csum_flags = m->m_pkthdr.csum_flags; 404 } else { 405 csum = 0; 406 csum_flags = 0; 407 } 408 409 /* Invalidate checksum */ 410 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; 411 412 ip6stat.ip6s_fragments++; 413 in6_ifstat_inc(dstifp, ifs6_reass_reqd); 414 415 lck_mtx_lock(&ip6qlock); 416 locked = 1; 417 418 for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) 419 if (ip6f->ip6f_ident == q6->ip6q_ident && 420 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && 421 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)) 422 break; 423 424 if (q6 == &ip6q) { 425 /* 426 * the first fragment to arrive, create a reassembly queue. 427 */ 428 first_frag = 1; 429 430 q6 = ip6q_alloc(M_DONTWAIT); 431 if (q6 == NULL) 432 goto dropfrag; 433 434 frag6_insque(q6, &ip6q); 435 frag6_nfragpackets++; 436 437 /* ip6q_nxt will be filled afterwards, from 1st fragment */ 438 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; 439#ifdef notyet 440 q6->ip6q_nxtp = (u_char *)nxtp; 441#endif 442 q6->ip6q_ident = ip6f->ip6f_ident; 443 q6->ip6q_ttl = IPV6_FRAGTTL; 444 q6->ip6q_src = ip6->ip6_src; 445 q6->ip6q_dst = ip6->ip6_dst; 446 q6->ip6q_ecn = 447 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 448 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ 449 450 q6->ip6q_nfrag = 0; 451 452 /* 453 * If the first fragment has valid checksum offload 454 * info, the rest of fragments are eligible as well. 455 */ 456 if (csum_flags != 0) { 457 q6->ip6q_csum = csum; 458 q6->ip6q_csum_flags = csum_flags; 459 } 460 } 461 462 /* 463 * If it's the 1st fragment, record the length of the 464 * unfragmentable part and the next header of the fragment header. 465 */ 466 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 467 if (fragoff == 0) { 468 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - 469 sizeof(struct ip6_frag); 470 q6->ip6q_nxt = ip6f->ip6f_nxt; 471 } 472 473 /* 474 * Check that the reassembled packet would not exceed 65535 bytes 475 * in size. 476 * If it would exceed, discard the fragment and return an ICMP error. 477 */ 478 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; 479 if (q6->ip6q_unfrglen >= 0) { 480 /* The 1st fragment has already arrived. */ 481 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { 482 lck_mtx_unlock(&ip6qlock); 483 locked = 0; 484 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 485 offset - sizeof(struct ip6_frag) + 486 offsetof(struct ip6_frag, ip6f_offlg)); 487 m = NULL; 488 goto done; 489 } 490 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { 491 lck_mtx_unlock(&ip6qlock); 492 locked = 0; 493 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 494 offset - sizeof(struct ip6_frag) + 495 offsetof(struct ip6_frag, ip6f_offlg)); 496 m = NULL; 497 goto done; 498 } 499 /* 500 * If it's the first fragment, do the above check for each 501 * fragment already stored in the reassembly queue. 502 */ 503 if (fragoff == 0) { 504 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 505 af6 = af6dwn) { 506 af6dwn = af6->ip6af_down; 507 508 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > 509 IPV6_MAXPACKET) { 510 struct mbuf *merr = IP6_REASS_MBUF(af6); 511 struct ip6_hdr *ip6err; 512 int erroff = af6->ip6af_offset; 513 514 /* dequeue the fragment. */ 515 frag6_deq(af6); 516 ip6af_free(af6); 517 518 /* adjust pointer. */ 519 ip6err = mtod(merr, struct ip6_hdr *); 520 521 /* 522 * Restore source and destination addresses 523 * in the erroneous IPv6 header. 524 */ 525 ip6err->ip6_src = q6->ip6q_src; 526 ip6err->ip6_dst = q6->ip6q_dst; 527 528 frag6_save_context(merr, 529 erroff - sizeof (struct ip6_frag) + 530 offsetof(struct ip6_frag, ip6f_offlg)); 531 532 MBUFQ_ENQUEUE(&diq6, merr); 533 } 534 } 535 } 536 537 ip6af = ip6af_alloc(M_DONTWAIT); 538 if (ip6af == NULL) 539 goto dropfrag; 540 541 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; 542 ip6af->ip6af_off = fragoff; 543 ip6af->ip6af_frglen = frgpartlen; 544 ip6af->ip6af_offset = offset; 545 IP6_REASS_MBUF(ip6af) = m; 546 547 if (first_frag) { 548 af6 = (struct ip6asfrag *)q6; 549 goto insert; 550 } 551 552 /* 553 * Handle ECN by comparing this segment with the first one; 554 * if CE is set, do not lose CE. 555 * drop if CE and not-ECT are mixed for the same packet. 556 */ 557 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 558 ecn0 = q6->ip6q_ecn; 559 if (ecn == IPTOS_ECN_CE) { 560 if (ecn0 == IPTOS_ECN_NOTECT) { 561 ip6af_free(ip6af); 562 goto dropfrag; 563 } 564 if (ecn0 != IPTOS_ECN_CE) 565 q6->ip6q_ecn = IPTOS_ECN_CE; 566 } 567 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { 568 ip6af_free(ip6af); 569 goto dropfrag; 570 } 571 572 /* 573 * Find a segment which begins after this one does. 574 */ 575 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 576 af6 = af6->ip6af_down) 577 if (af6->ip6af_off > ip6af->ip6af_off) 578 break; 579 580#if 0 581 /* 582 * If there is a preceding segment, it may provide some of 583 * our data already. If so, drop the data from the incoming 584 * segment. If it provides all of our data, drop us. 585 * 586 * If some of the data is dropped from the preceding 587 * segment, then it's checksum is invalidated. 588 */ 589 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 590 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 591 - ip6af->ip6af_off; 592 if (i > 0) { 593 if (i >= ip6af->ip6af_frglen) 594 goto dropfrag; 595 m_adj(IP6_REASS_MBUF(ip6af), i); 596 q6->ip6q_csum_flags = 0; 597 ip6af->ip6af_off += i; 598 ip6af->ip6af_frglen -= i; 599 } 600 } 601 602 /* 603 * While we overlap succeeding segments trim them or, 604 * if they are completely covered, dequeue them. 605 */ 606 while (af6 != (struct ip6asfrag *)q6 && 607 ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) { 608 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 609 if (i < af6->ip6af_frglen) { 610 af6->ip6af_frglen -= i; 611 af6->ip6af_off += i; 612 m_adj(IP6_REASS_MBUF(af6), i); 613 q6->ip6q_csum_flags = 0; 614 break; 615 } 616 af6 = af6->ip6af_down; 617 m_freem(IP6_REASS_MBUF(af6->ip6af_up)); 618 frag6_deq(af6->ip6af_up); 619 } 620#else 621 /* 622 * If the incoming framgent overlaps some existing fragments in 623 * the reassembly queue, drop it, since it is dangerous to override 624 * existing fragments from a security point of view. 625 * We don't know which fragment is the bad guy - here we trust 626 * fragment that came in earlier, with no real reason. 627 * 628 * Note: due to changes after disabling this part, mbuf passed to 629 * m_adj() below now does not meet the requirement. 630 */ 631 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 632 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 633 - ip6af->ip6af_off; 634 if (i > 0) { 635#if 0 /* suppress the noisy log */ 636 log(LOG_ERR, "%d bytes of a fragment from %s " 637 "overlaps the previous fragment\n", 638 i, ip6_sprintf(&q6->ip6q_src)); 639#endif 640 ip6af_free(ip6af); 641 goto dropfrag; 642 } 643 } 644 if (af6 != (struct ip6asfrag *)q6) { 645 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 646 if (i > 0) { 647#if 0 /* suppress the noisy log */ 648 log(LOG_ERR, "%d bytes of a fragment from %s " 649 "overlaps the succeeding fragment", 650 i, ip6_sprintf(&q6->ip6q_src)); 651#endif 652 ip6af_free(ip6af); 653 goto dropfrag; 654 } 655 } 656#endif 657 658 /* 659 * If this fragment contains similar checksum offload info 660 * as that of the existing ones, accumulate checksum. Otherwise, 661 * invalidate checksum offload info for the entire datagram. 662 */ 663 if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) 664 q6->ip6q_csum += csum; 665 else if (q6->ip6q_csum_flags != 0) 666 q6->ip6q_csum_flags = 0; 667 668insert: 669 670 /* 671 * Stick new segment in its place; 672 * check for complete reassembly. 673 * Move to front of packet queue, as we are 674 * the most recently active fragmented packet. 675 */ 676 frag6_enq(ip6af, af6->ip6af_up); 677 frag6_nfrags++; 678 q6->ip6q_nfrag++; 679#if 0 /* xxx */ 680 if (q6 != ip6q.ip6q_next) { 681 frag6_remque(q6); 682 frag6_insque(q6, &ip6q); 683 } 684#endif 685 next = 0; 686 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 687 af6 = af6->ip6af_down) { 688 if (af6->ip6af_off != next) { 689 lck_mtx_unlock(&ip6qlock); 690 locked = 0; 691 m = NULL; 692 goto done; 693 } 694 next += af6->ip6af_frglen; 695 } 696 if (af6->ip6af_up->ip6af_mff) { 697 lck_mtx_unlock(&ip6qlock); 698 locked = 0; 699 m = NULL; 700 goto done; 701 } 702 703 /* 704 * Reassembly is complete; concatenate fragments. 705 */ 706 ip6af = q6->ip6q_down; 707 t = m = IP6_REASS_MBUF(ip6af); 708 af6 = ip6af->ip6af_down; 709 frag6_deq(ip6af); 710 while (af6 != (struct ip6asfrag *)q6) { 711 af6dwn = af6->ip6af_down; 712 frag6_deq(af6); 713 while (t->m_next) 714 t = t->m_next; 715 t->m_next = IP6_REASS_MBUF(af6); 716 m_adj(t->m_next, af6->ip6af_offset); 717 ip6af_free(af6); 718 af6 = af6dwn; 719 } 720 721 /* 722 * Store partial hardware checksum info from the fragment queue; 723 * the receive start offset is set to 40 bytes (see code at the 724 * top of this routine.) 725 */ 726 if (q6->ip6q_csum_flags != 0) { 727 csum = q6->ip6q_csum; 728 729 ADDCARRY(csum); 730 731 m->m_pkthdr.csum_rx_val = csum; 732 m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr); 733 m->m_pkthdr.csum_flags = q6->ip6q_csum_flags; 734 } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || 735 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { 736 /* loopback checksums are always OK */ 737 m->m_pkthdr.csum_data = 0xffff; 738 m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; 739 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 740 } 741 742 /* adjust offset to point where the original next header starts */ 743 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); 744 ip6af_free(ip6af); 745 ip6 = mtod(m, struct ip6_hdr *); 746 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); 747 ip6->ip6_src = q6->ip6q_src; 748 ip6->ip6_dst = q6->ip6q_dst; 749 if (q6->ip6q_ecn == IPTOS_ECN_CE) 750 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); 751 752 nxt = q6->ip6q_nxt; 753#ifdef notyet 754 *q6->ip6q_nxtp = (u_char)(nxt & 0xff); 755#endif 756 757 /* Delete frag6 header */ 758 if (m->m_len >= offset + sizeof(struct ip6_frag)) { 759 /* This is the only possible case with !PULLDOWN_TEST */ 760 ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), 761 offset); 762 m->m_data += sizeof(struct ip6_frag); 763 m->m_len -= sizeof(struct ip6_frag); 764 } else { 765 /* this comes with no copy if the boundary is on cluster */ 766 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { 767 frag6_remque(q6); 768 frag6_nfragpackets--; 769 frag6_nfrags -= q6->ip6q_nfrag; 770 ip6q_free(q6); 771 goto dropfrag; 772 } 773 m_adj(t, sizeof(struct ip6_frag)); 774 m_cat(m, t); 775 } 776 777 /* 778 * Store NXT to the original. 779 */ 780 { 781 char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ 782 *prvnxtp = nxt; 783 } 784 785 frag6_remque(q6); 786 frag6_nfragpackets--; 787 frag6_nfrags -= q6->ip6q_nfrag; 788 ip6q_free(q6); 789 790 if (m->m_flags & M_PKTHDR) /* Isn't it always true? */ 791 m_fixhdr(m); 792 793 ip6stat.ip6s_reassembled++; 794 795 /* 796 * Tell launch routine the next header 797 */ 798 *mp = m; 799 *offp = offset; 800 801 /* arm the purge timer if not already and if there's work to do */ 802 frag6_sched_timeout(); 803 lck_mtx_unlock(&ip6qlock); 804 in6_ifstat_inc(dstifp, ifs6_reass_ok); 805 frag6_icmp6_paramprob_error(&diq6); 806 VERIFY(MBUFQ_EMPTY(&diq6)); 807 return (nxt); 808 809done: 810 VERIFY(m == NULL); 811 if (!locked) { 812 if (frag6_nfragpackets == 0) { 813 frag6_icmp6_paramprob_error(&diq6); 814 VERIFY(MBUFQ_EMPTY(&diq6)); 815 return (IPPROTO_DONE); 816 } 817 lck_mtx_lock(&ip6qlock); 818 } 819 /* arm the purge timer if not already and if there's work to do */ 820 frag6_sched_timeout(); 821 lck_mtx_unlock(&ip6qlock); 822 frag6_icmp6_paramprob_error(&diq6); 823 VERIFY(MBUFQ_EMPTY(&diq6)); 824 return (IPPROTO_DONE); 825 826dropfrag: 827 ip6stat.ip6s_fragdropped++; 828 /* arm the purge timer if not already and if there's work to do */ 829 frag6_sched_timeout(); 830 lck_mtx_unlock(&ip6qlock); 831 in6_ifstat_inc(dstifp, ifs6_reass_fail); 832 m_freem(m); 833 frag6_icmp6_paramprob_error(&diq6); 834 VERIFY(MBUFQ_EMPTY(&diq6)); 835 return (IPPROTO_DONE); 836} 837 838/* 839 * Free a fragment reassembly header and all 840 * associated datagrams. 841 */ 842void 843frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) 844{ 845 struct ip6asfrag *af6, *down6; 846 847 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 848 849 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 850 af6 = down6) { 851 struct mbuf *m = IP6_REASS_MBUF(af6); 852 853 down6 = af6->ip6af_down; 854 frag6_deq(af6); 855 856 /* 857 * Return ICMP time exceeded error for the 1st fragment. 858 * Just free other fragments. 859 */ 860 if (af6->ip6af_off == 0) { 861 struct ip6_hdr *ip6; 862 863 /* adjust pointer */ 864 ip6 = mtod(m, struct ip6_hdr *); 865 866 /* restore source and destination addresses */ 867 ip6->ip6_src = q6->ip6q_src; 868 ip6->ip6_dst = q6->ip6q_dst; 869 870 MBUFQ_ENQUEUE(diq6, m); 871 } else { 872 MBUFQ_ENQUEUE(dfq6, m); 873 } 874 ip6af_free(af6); 875 876 } 877 frag6_remque(q6); 878 frag6_nfragpackets--; 879 frag6_nfrags -= q6->ip6q_nfrag; 880 ip6q_free(q6); 881} 882 883/* 884 * Put an ip fragment on a reassembly chain. 885 * Like insque, but pointers in middle of structure. 886 */ 887void 888frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) 889{ 890 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 891 892 af6->ip6af_up = up6; 893 af6->ip6af_down = up6->ip6af_down; 894 up6->ip6af_down->ip6af_up = af6; 895 up6->ip6af_down = af6; 896} 897 898/* 899 * To frag6_enq as remque is to insque. 900 */ 901void 902frag6_deq(struct ip6asfrag *af6) 903{ 904 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 905 906 af6->ip6af_up->ip6af_down = af6->ip6af_down; 907 af6->ip6af_down->ip6af_up = af6->ip6af_up; 908} 909 910void 911frag6_insque(struct ip6q *new, struct ip6q *old) 912{ 913 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 914 915 new->ip6q_prev = old; 916 new->ip6q_next = old->ip6q_next; 917 old->ip6q_next->ip6q_prev= new; 918 old->ip6q_next = new; 919} 920 921void 922frag6_remque(struct ip6q *p6) 923{ 924 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 925 926 p6->ip6q_prev->ip6q_next = p6->ip6q_next; 927 p6->ip6q_next->ip6q_prev = p6->ip6q_prev; 928} 929 930/* 931 * IPv6 reassembling timer processing; 932 * if a timer expires on a reassembly 933 * queue, discard it. 934 */ 935static void 936frag6_timeout(void *arg) 937{ 938#pragma unused(arg) 939 struct fq6_head dfq6, diq6; 940 struct ip6q *q6; 941 942 MBUFQ_INIT(&dfq6); /* for deferred frees */ 943 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ 944 945 /* 946 * Update coarse-grained networking timestamp (in sec.); the idea 947 * is to piggy-back on the timeout callout to update the counter 948 * returnable via net_uptime(). 949 */ 950 net_update_uptime(); 951 952 lck_mtx_lock(&ip6qlock); 953 q6 = ip6q.ip6q_next; 954 if (q6) 955 while (q6 != &ip6q) { 956 --q6->ip6q_ttl; 957 q6 = q6->ip6q_next; 958 if (q6->ip6q_prev->ip6q_ttl == 0) { 959 ip6stat.ip6s_fragtimeout++; 960 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 961 frag6_freef(q6->ip6q_prev, &dfq6, &diq6); 962 } 963 } 964 /* 965 * If we are over the maximum number of fragments 966 * (due to the limit being lowered), drain off 967 * enough to get down to the new limit. 968 */ 969 if (ip6_maxfragpackets >= 0) { 970 while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets && 971 ip6q.ip6q_prev) { 972 ip6stat.ip6s_fragoverflow++; 973 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 974 frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6); 975 } 976 } 977 /* re-arm the purge timer if there's work to do */ 978 frag6_timeout_run = 0; 979 frag6_sched_timeout(); 980 lck_mtx_unlock(&ip6qlock); 981 982 /* free fragments that need to be freed */ 983 if (!MBUFQ_EMPTY(&dfq6)) 984 MBUFQ_DRAIN(&dfq6); 985 986 frag6_icmp6_timeex_error(&diq6); 987 988 VERIFY(MBUFQ_EMPTY(&dfq6)); 989 VERIFY(MBUFQ_EMPTY(&diq6)); 990} 991 992static void 993frag6_sched_timeout(void) 994{ 995 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 996 997 if (!frag6_timeout_run && frag6_nfragpackets > 0) { 998 frag6_timeout_run = 1; 999 timeout(frag6_timeout, NULL, hz); 1000 } 1001} 1002 1003/* 1004 * Drain off all datagram fragments. 1005 */ 1006void 1007frag6_drain(void) 1008{ 1009 struct fq6_head dfq6, diq6; 1010 1011 MBUFQ_INIT(&dfq6); /* for deferred frees */ 1012 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ 1013 1014 lck_mtx_lock(&ip6qlock); 1015 while (ip6q.ip6q_next != &ip6q) { 1016 ip6stat.ip6s_fragdropped++; 1017 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 1018 frag6_freef(ip6q.ip6q_next, &dfq6, &diq6); 1019 } 1020 lck_mtx_unlock(&ip6qlock); 1021 1022 /* free fragments that need to be freed */ 1023 if (!MBUFQ_EMPTY(&dfq6)) 1024 MBUFQ_DRAIN(&dfq6); 1025 1026 frag6_icmp6_timeex_error(&diq6); 1027 1028 VERIFY(MBUFQ_EMPTY(&dfq6)); 1029 VERIFY(MBUFQ_EMPTY(&diq6)); 1030} 1031 1032static struct ip6q * 1033ip6q_alloc(int how) 1034{ 1035 struct mbuf *t; 1036 struct ip6q *q6; 1037 1038 /* 1039 * See comments in ip6q_updateparams(). Keep the count separate 1040 * from frag6_nfragpackets since the latter represents the elements 1041 * already in the reassembly queues. 1042 */ 1043 if (ip6q_limit > 0 && ip6q_count > ip6q_limit) 1044 return (NULL); 1045 1046 t = m_get(how, MT_FTABLE); 1047 if (t != NULL) { 1048 atomic_add_32(&ip6q_count, 1); 1049 q6 = mtod(t, struct ip6q *); 1050 bzero(q6, sizeof (*q6)); 1051 } else { 1052 q6 = NULL; 1053 } 1054 return (q6); 1055} 1056 1057static void 1058ip6q_free(struct ip6q *q6) 1059{ 1060 (void) m_free(dtom(q6)); 1061 atomic_add_32(&ip6q_count, -1); 1062} 1063 1064static struct ip6asfrag * 1065ip6af_alloc(int how) 1066{ 1067 struct mbuf *t; 1068 struct ip6asfrag *af6; 1069 1070 /* 1071 * See comments in ip6q_updateparams(). Keep the count separate 1072 * from frag6_nfrags since the latter represents the elements 1073 * already in the reassembly queues. 1074 */ 1075 if (ip6af_limit > 0 && ip6af_count > ip6af_limit) 1076 return (NULL); 1077 1078 t = m_get(how, MT_FTABLE); 1079 if (t != NULL) { 1080 atomic_add_32(&ip6af_count, 1); 1081 af6 = mtod(t, struct ip6asfrag *); 1082 bzero(af6, sizeof (*af6)); 1083 } else { 1084 af6 = NULL; 1085 } 1086 return (af6); 1087} 1088 1089static void 1090ip6af_free(struct ip6asfrag *af6) 1091{ 1092 (void) m_free(dtom(af6)); 1093 atomic_add_32(&ip6af_count, -1); 1094} 1095 1096static void 1097ip6q_updateparams(void) 1098{ 1099 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); 1100 /* 1101 * -1 for unlimited allocation. 1102 */ 1103 if (ip6_maxfragpackets < 0) 1104 ip6q_limit = 0; 1105 if (ip6_maxfrags < 0) 1106 ip6af_limit = 0; 1107 /* 1108 * Positive number for specific bound. 1109 */ 1110 if (ip6_maxfragpackets > 0) 1111 ip6q_limit = ip6_maxfragpackets; 1112 if (ip6_maxfrags > 0) 1113 ip6af_limit = ip6_maxfrags; 1114 /* 1115 * Zero specifies no further fragment queue allocation -- set the 1116 * bound very low, but rely on implementation elsewhere to actually 1117 * prevent allocation and reclaim current queues. 1118 */ 1119 if (ip6_maxfragpackets == 0) 1120 ip6q_limit = 1; 1121 if (ip6_maxfrags == 0) 1122 ip6af_limit = 1; 1123 /* 1124 * Arm the purge timer if not already and if there's work to do 1125 */ 1126 frag6_sched_timeout(); 1127} 1128 1129static int 1130sysctl_maxfragpackets SYSCTL_HANDLER_ARGS 1131{ 1132#pragma unused(arg1, arg2) 1133 int error, i; 1134 1135 lck_mtx_lock(&ip6qlock); 1136 i = ip6_maxfragpackets; 1137 error = sysctl_handle_int(oidp, &i, 0, req); 1138 if (error || req->newptr == USER_ADDR_NULL) 1139 goto done; 1140 /* impose bounds */ 1141 if (i < -1 || i > (nmbclusters / 4)) { 1142 error = EINVAL; 1143 goto done; 1144 } 1145 ip6_maxfragpackets = i; 1146 ip6q_updateparams(); 1147done: 1148 lck_mtx_unlock(&ip6qlock); 1149 return (error); 1150} 1151 1152static int 1153sysctl_maxfrags SYSCTL_HANDLER_ARGS 1154{ 1155#pragma unused(arg1, arg2) 1156 int error, i; 1157 1158 lck_mtx_lock(&ip6qlock); 1159 i = ip6_maxfrags; 1160 error = sysctl_handle_int(oidp, &i, 0, req); 1161 if (error || req->newptr == USER_ADDR_NULL) 1162 goto done; 1163 /* impose bounds */ 1164 if (i < -1 || i > (nmbclusters / 4)) { 1165 error = EINVAL; 1166 goto done; 1167 } 1168 ip6_maxfrags= i; 1169 ip6q_updateparams(); /* see if we need to arm timer */ 1170done: 1171 lck_mtx_unlock(&ip6qlock); 1172 return (error); 1173} 1174