1/* 2 * Copyright (c) 2012-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/mbuf.h> 33#include <sys/mcache.h> 34#include <sys/socket.h> 35#include <sys/socketvar.h> 36#include <sys/syslog.h> 37#include <sys/protosw.h> 38 39#include <kern/zalloc.h> 40#include <kern/locks.h> 41 42#include <mach/thread_act.h> 43#include <mach/sdt.h> 44 45#include <dev/random/randomdev.h> 46 47#include <net/if.h> 48#include <netinet/in.h> 49#include <netinet/in_var.h> 50#include <netinet/tcp.h> 51#include <netinet/tcp_fsm.h> 52#include <netinet/tcp_seq.h> 53#include <netinet/tcp_var.h> 54#include <netinet/mptcp_var.h> 55#include <netinet/mptcp.h> 56#include <netinet/mptcp_seq.h> 57#include <netinet/mptcp_opt.h> 58#include <netinet/mptcp_timer.h> 59 60int mptcp_enable = 1; 61SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, 62 &mptcp_enable, 0, "Enable Multipath TCP Support"); 63 64int mptcp_dbg = 0; 65SYSCTL_INT(_net_inet_mptcp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, 66 &mptcp_dbg, 0, "Enable Multipath TCP Debugging"); 67 68/* Number of times to try negotiating MPTCP on SYN retransmissions */ 69int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES; 70SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, 71 CTLFLAG_RW | CTLFLAG_LOCKED, 72 &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); 73 74/* 75 * By default, DSS checksum is turned off, revisit if we ever do 76 * MPTCP for non SSL Traffic. 77 */ 78int mptcp_dss_csum = 0; 79SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, 80 &mptcp_dss_csum, 0, "Enable DSS checksum"); 81 82/* 83 * When mptcp_fail_thresh number of retransmissions are sent, subflow failover 84 * is attempted on a different path. 85 */ 86int mptcp_fail_thresh = 1; 87SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, 88 &mptcp_fail_thresh, 0, "Failover threshold"); 89 90 91/* 92 * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime 93 * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout. 94 * Some carrier networks have a timeout of 10 or 15 minutes. 95 */ 96int mptcp_subflow_keeptime = 60*14; 97SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, 98 &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); 99 100/* 101 * MP_PRIO option. 102 */ 103int mptcp_mpprio_enable = 1; 104SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mpprio, CTLFLAG_RW | CTLFLAG_LOCKED, 105 &mptcp_mpprio_enable, 0, "Enable MP_PRIO option"); 106 107/* 108 * REMOVE_ADDR option. 109 */ 110int mptcp_remaddr_enable = 1; 111SYSCTL_INT(_net_inet_mptcp, OID_AUTO, remaddr, CTLFLAG_RW | CTLFLAG_LOCKED, 112 &mptcp_remaddr_enable, 0, "Enable REMOVE_ADDR option"); 113 114/* 115 * FastJoin Option 116 */ 117int mptcp_fastjoin = 1; 118SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fastjoin, CTLFLAG_RW | CTLFLAG_LOCKED, 119 &mptcp_fastjoin, 0, "Enable FastJoin Option"); 120 121int mptcp_zerortt_fastjoin = 0; 122SYSCTL_INT(_net_inet_mptcp, OID_AUTO, zerortt_fastjoin, CTLFLAG_RW | 123 CTLFLAG_LOCKED, &mptcp_zerortt_fastjoin, 0, 124 "Enable Zero RTT Fast Join"); 125 126/* 127 * R/W Notification on resume 128 */ 129int mptcp_rwnotify = 0; 130SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rwnotify, CTLFLAG_RW | CTLFLAG_LOCKED, 131 &mptcp_rwnotify, 0, "Enable RW notify on resume"); 132 133/* 134 * MPTCP input, called when data has been read from a subflow socket. 135 */ 136void 137mptcp_input(struct mptses *mpte, struct mbuf *m) 138{ 139 struct socket *mp_so; 140 struct mptcb *mp_tp = NULL; 141 u_int64_t mb_dsn; 142 u_int32_t mb_datalen; 143 int count = 0; 144 struct mbuf *save = NULL, *prev = NULL; 145 struct mbuf *freelist = NULL, *tail = NULL; 146 boolean_t in_fallback = FALSE; 147 148 VERIFY(m->m_flags & M_PKTHDR); 149 150 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ 151 mp_so = mpte->mpte_mppcb->mpp_socket; 152 153 DTRACE_MPTCP(input); 154 155 /* 156 * Each mbuf contains MPTCP Data Sequence Map 157 * Process the data for reassembly, delivery to MPTCP socket 158 * client, etc. 159 * 160 */ 161 count = mp_so->so_rcv.sb_cc; 162 163 VERIFY(m != NULL); 164 mp_tp = mpte->mpte_mptcb; 165 VERIFY(mp_tp != NULL); 166 167 /* Ok to check for this flag without lock as its set in this thread */ 168 in_fallback = (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP); 169 170 /* 171 * In the degraded fallback case, data is accepted without DSS map 172 */ 173 if (in_fallback) { 174fallback: 175 /* 176 * assume degraded flow as this may be the first packet 177 * without DSS, and the subflow state is not updated yet. 178 */ 179 if (sbappendstream(&mp_so->so_rcv, m)) 180 sorwakeup(mp_so); 181 DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, 182 struct socket *, mp_so, 183 struct sockbuf *, &mp_so->so_rcv, 184 struct sockbuf *, &mp_so->so_snd, 185 struct mptses *, mpte); 186 count = mp_so->so_rcv.sb_cc - count; 187 mptcplog3((LOG_DEBUG, "%s: fread %d bytes\n", __func__, count)); 188 return; 189 } 190 191 MPT_LOCK(mp_tp); 192 do { 193 /* If fallback occurs, mbufs will not have PKTF_MPTCP set */ 194 if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { 195 MPT_UNLOCK(mp_tp); 196 goto fallback; 197 } 198 199 save = m->m_next; 200 /* 201 * A single TCP packet formed of multiple mbufs 202 * holds DSS mapping in the first mbuf of the chain. 203 * Other mbufs in the chain may have M_PKTHDR set 204 * even though they belong to the same TCP packet 205 * and therefore use the DSS mapping stored in the 206 * first mbuf of the mbuf chain. mptcp_input() can 207 * get an mbuf chain with multiple TCP packets. 208 */ 209 while (save && (!(save->m_flags & M_PKTHDR) || 210 !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) { 211 prev = save; 212 save = save->m_next; 213 } 214 if (prev) 215 prev->m_next = NULL; 216 else 217 m->m_next = NULL; 218 219 mb_dsn = m->m_pkthdr.mp_dsn; 220 mb_datalen = m->m_pkthdr.mp_rlen; 221 222 if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvatmark)) { 223 tcpstat.tcps_mp_oodata++; 224 MPT_UNLOCK(mp_tp); 225 m_freem(m); 226 return; 227 /* 228 * Reassembly queue support here in future. Per spec, 229 * senders must implement retransmission timer to 230 * retransmit unacked data. Dropping out of order 231 * gives a slight hit on performance but allows us to 232 * deploy MPTCP and protects us against in-window DoS 233 * attacks that attempt to use up memory by sending 234 * out of order data. When doing load sharing across 235 * subflows, out of order support is a must. 236 */ 237 } 238 239 if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvatmark)) { 240 if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), 241 mp_tp->mpt_rcvatmark)) { 242 if (freelist == NULL) 243 freelist = m; 244 else 245 tail->m_next = m; 246 247 if (prev != NULL) 248 tail = prev; 249 else 250 tail = m; 251 252 m = save; 253 prev = save = NULL; 254 continue; 255 } else { 256 m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn)); 257 } 258 mptcplog((LOG_INFO, "%s: %llu %d 2 \n", __func__, 259 mp_tp->mpt_rcvatmark, m->m_pkthdr.len)); 260 } 261 262 MPT_UNLOCK(mp_tp); 263 if (sbappendstream(&mp_so->so_rcv, m)) { 264 sorwakeup(mp_so); 265 } 266 DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, 267 struct sockbuf *, &mp_so->so_rcv, 268 struct sockbuf *, &mp_so->so_snd, 269 struct mptses *, mpte, 270 struct mptcb *, mp_tp); 271 MPT_LOCK(mp_tp); 272 count = mp_so->so_rcv.sb_cc - count; 273 tcpstat.tcps_mp_rcvtotal++; 274 tcpstat.tcps_mp_rcvbytes += count; 275 mptcplog3((LOG_DEBUG, "%s: read %d bytes\n", __func__, count)); 276 /* 277 * The data received at the MPTCP layer will never exceed the 278 * receive window because anything to the right of the 279 * receive window will be trimmed at the subflow level. 280 */ 281 mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); 282 mp_tp->mpt_rcvatmark += count; 283 m = save; 284 prev = save = NULL; 285 count = mp_so->so_rcv.sb_cc; 286 } while (m); 287 MPT_UNLOCK(mp_tp); 288 289 if (freelist) 290 m_freem(freelist); 291} 292 293/* 294 * MPTCP output. 295 */ 296int 297mptcp_output(struct mptses *mpte) 298{ 299 struct mptsub *mpts; 300 struct mptsub *mpts_tried = NULL; 301 struct socket *mp_so; 302 int error = 0; 303 304 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ 305 mp_so = mpte->mpte_mppcb->mpp_socket; 306 if (mp_so->so_state & SS_CANTSENDMORE) { 307 return (EPIPE); 308 } 309 310try_again: 311 /* get the "best" subflow to be used for transmission */ 312 mpts = mptcp_get_subflow(mpte, NULL); 313 if (mpts == NULL) { 314 mptcplog((LOG_ERR, "%s: mp_so 0x%llx has no usable subflow\n", 315 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); 316 goto out; 317 } 318 319 mptcplog3((LOG_INFO, "%s: mp_so 0x%llx cid %d \n", __func__, 320 (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); 321 322 /* In case there's just one flow, we reattempt later */ 323 MPTS_LOCK(mpts); 324 if ((mpts_tried != NULL) && ((mpts == mpts_tried) || 325 (mpts->mpts_flags & MPTSF_FAILINGOVER))) { 326 MPTS_UNLOCK(mpts); 327 MPTS_LOCK(mpts_tried); 328 mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; 329 mpts_tried->mpts_flags |= MPTSF_ACTIVE; 330 MPTS_UNLOCK(mpts_tried); 331 MPT_LOCK(mpte->mpte_mptcb); 332 mptcp_start_timer(mpte->mpte_mptcb, MPTT_REXMT); 333 MPT_UNLOCK(mpte->mpte_mptcb); 334 mptcplog((LOG_INFO, "%s: mp_so 0x%llx retry later\n", 335 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); 336 goto out; 337 } 338 339 DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts, 340 struct socket *, mp_so); 341 error = mptcp_subflow_output(mpte, mpts); 342 if (error) { 343 /* can be a temporary loss of source address or other error */ 344 mpts->mpts_flags |= MPTSF_FAILINGOVER; 345 mpts->mpts_flags &= ~MPTSF_ACTIVE; 346 mpts_tried = mpts; 347 MPTS_UNLOCK(mpts); 348 mptcplog((LOG_INFO, "%s: error = %d \n", __func__, error)); 349 goto try_again; 350 } 351 /* The model is to have only one active flow at a time */ 352 mpts->mpts_flags |= MPTSF_ACTIVE; 353 MPTS_UNLOCK(mpts); 354 if (mpte->mpte_active_sub == NULL) { 355 mpte->mpte_active_sub = mpts; 356 } else if (mpte->mpte_active_sub != mpts) { 357 MPTS_LOCK(mpte->mpte_active_sub); 358 mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; 359 MPTS_UNLOCK(mpte->mpte_active_sub); 360 mpte->mpte_active_sub = mpts; 361 } 362out: 363 /* subflow errors should not be percolated back up */ 364 return (0); 365} 366 367/* 368 * Return the most eligible subflow to be used for sending data. 369 * This function also serves to check if any alternate subflow is available 370 * or not. 371 */ 372struct mptsub * 373mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) 374{ 375 struct mptsub *mpts; 376 struct mptsub *fallback = NULL; 377 struct socket *so = NULL; 378 379 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ 380 381 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { 382 MPTS_LOCK(mpts); 383 384 if ((ignore) && (mpts == ignore)) { 385 MPTS_UNLOCK(mpts); 386 continue; 387 } 388 389 /* There can only be one subflow in degraded state */ 390 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { 391 MPTS_UNLOCK(mpts); 392 break; 393 } 394 395 /* 396 * Subflows with Fastjoin allow data to be written before 397 * the subflow is mp capable. 398 */ 399 if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) && 400 !(mpts->mpts_flags & MPTSF_FASTJ_REQD)) { 401 MPTS_UNLOCK(mpts); 402 continue; 403 } 404 405 if (mpts->mpts_flags & MPTSF_SUSPENDED) { 406 MPTS_UNLOCK(mpts); 407 continue; 408 } 409 410 if ((mpts->mpts_flags & MPTSF_DISCONNECTED) || 411 (mpts->mpts_flags & MPTSF_DISCONNECTING)) { 412 MPTS_UNLOCK(mpts); 413 continue; 414 } 415 416 if (mpts->mpts_flags & MPTSF_FAILINGOVER) { 417 so = mpts->mpts_socket; 418 if ((so) && (!(so->so_flags & SOF_PCBCLEARING))) { 419 socket_lock(so, 1); 420 if ((so->so_snd.sb_cc == 0) && 421 (mptcp_no_rto_spike(so))) { 422 mpts->mpts_flags &= ~MPTSF_FAILINGOVER; 423 so->so_flags &= ~SOF_MP_TRYFAILOVER; 424 fallback = mpts; 425 socket_unlock(so, 1); 426 } else { 427 fallback = mpts; 428 socket_unlock(so, 1); 429 MPTS_UNLOCK(mpts); 430 continue; 431 } 432 } else { 433 MPTS_UNLOCK(mpts); 434 continue; 435 } 436 } 437 438 if (mpts->mpts_flags & MPTSF_PREFERRED) { 439 MPTS_UNLOCK(mpts); 440 break; 441 } 442 443 /* When there are no preferred flows, use first one in list */ 444 fallback = mpts; 445 446 MPTS_UNLOCK(mpts); 447 } 448 /* 449 * If there is no preferred or backup subflow, and there is no active 450 * subflow use the last usable subflow. 451 */ 452 if (mpts == NULL) { 453 return (fallback); 454 } 455 456 return (mpts); 457} 458 459struct mptsub * 460mptcp_get_pending_subflow(struct mptses *mpte, struct mptsub *ignore) 461{ 462 struct mptsub *mpts = NULL; 463 464 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ 465 466 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { 467 MPTS_LOCK(mpts); 468 469 if ((ignore) && (mpts == ignore)) { 470 MPTS_UNLOCK(mpts); 471 continue; 472 } 473 474 if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) { 475 MPTS_UNLOCK(mpts); 476 break; 477 } 478 479 MPTS_UNLOCK(mpts); 480 } 481 return (mpts); 482} 483 484void 485mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) 486{ 487 MPT_LOCK_ASSERT_HELD(mp_tp); 488 489 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, 490 uint32_t, event); 491 492 switch (mp_tp->mpt_state) { 493 case MPTCPS_CLOSED: 494 case MPTCPS_LISTEN: 495 mp_tp->mpt_state = MPTCPS_CLOSED; 496 break; 497 498 case MPTCPS_ESTABLISHED: 499 if (event == MPCE_CLOSE) { 500 mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; 501 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ 502 } 503 else if (event == MPCE_RECV_DATA_FIN) { 504 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ 505 mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; 506 } 507 break; 508 509 case MPTCPS_CLOSE_WAIT: 510 if (event == MPCE_CLOSE) { 511 mp_tp->mpt_state = MPTCPS_LAST_ACK; 512 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ 513 } 514 break; 515 516 case MPTCPS_FIN_WAIT_1: 517 if (event == MPCE_RECV_DATA_ACK) 518 mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; 519 else if (event == MPCE_RECV_DATA_FIN) { 520 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ 521 mp_tp->mpt_state = MPTCPS_CLOSING; 522 } 523 break; 524 525 case MPTCPS_CLOSING: 526 if (event == MPCE_RECV_DATA_ACK) 527 mp_tp->mpt_state = MPTCPS_TIME_WAIT; 528 break; 529 530 case MPTCPS_LAST_ACK: 531 if (event == MPCE_RECV_DATA_ACK) 532 mp_tp->mpt_state = MPTCPS_TERMINATE; 533 break; 534 535 case MPTCPS_FIN_WAIT_2: 536 if (event == MPCE_RECV_DATA_FIN) { 537 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ 538 mp_tp->mpt_state = MPTCPS_TIME_WAIT; 539 } 540 break; 541 542 case MPTCPS_TIME_WAIT: 543 break; 544 545 case MPTCPS_FASTCLOSE_WAIT: 546 if (event == MPCE_CLOSE) { 547 /* no need to adjust for data FIN */ 548 mp_tp->mpt_state = MPTCPS_TERMINATE; 549 } 550 break; 551 case MPTCPS_TERMINATE: 552 break; 553 default: 554 VERIFY(0); 555 /* NOTREACHED */ 556 } 557 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, 558 uint32_t, event); 559 mptcplog((LOG_INFO, "%s: state = %d\n", 560 __func__, mp_tp->mpt_state)); 561} 562 563/* 564 * Update the mptcb send state variables, but the actual sbdrop occurs 565 * in MPTCP layer 566 */ 567void 568mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack) 569{ 570 u_int64_t acked = 0; 571 572 acked = full_dack - mp_tp->mpt_snduna; 573 574 if (acked) { 575 mp_tp->mpt_snduna += acked; 576 /* In degraded mode, we may get some Data ACKs */ 577 if ((tp->t_mpflags & TMPF_TCP_FALLBACK) && 578 !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) && 579 MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) { 580 /* bring back sndnxt to retransmit MPTCP data */ 581 mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail; 582 mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC; 583 tp->t_inpcb->inp_socket->so_flags1 |= 584 SOF1_POST_FALLBACK_SYNC; 585 } 586 } 587 if ((full_dack == mp_tp->mpt_sndmax) && 588 (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1)) { 589 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK); 590 tp->t_mpflags &= ~TMPF_SEND_DFIN; 591 } 592} 593 594/* If you change this function, match up mptcp_update_rcv_state_f */ 595void 596mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, 597 uint16_t csum) 598{ 599 struct mptcb *mp_tp = tptomptp(tp); 600 u_int64_t full_dsn = 0; 601 602 NTOHL(dss_info->mdss_dsn); 603 NTOHL(dss_info->mdss_subflow_seqn); 604 NTOHS(dss_info->mdss_data_len); 605 606 /* XXX for autosndbuf grow sb here */ 607 MPT_LOCK(mp_tp); 608 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); 609 MPT_UNLOCK(mp_tp); 610 mptcp_update_rcv_state_meat(mp_tp, tp, 611 full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, 612 csum); 613 614} 615 616void 617mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, 618 u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len, 619 uint16_t csum) 620{ 621 if (mdss_data_len == 0) { 622 mptcplog((LOG_INFO, "%s: Received infinite mapping.", 623 __func__)); 624 if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { 625 mptcplog((LOG_ERR, "%s: Bad checksum value %x \n", 626 __func__, csum)); 627 } 628 mptcp_notify_mpfail(tp->t_inpcb->inp_socket); 629 return; 630 } 631 MPT_LOCK(mp_tp); 632 if (mptcp_dbg >= MP_VERBOSE_DEBUG_1) 633 printf("%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", 634 __func__, seqn, mdss_data_len, full_dsn, 635 mp_tp->mpt_rcvnxt); 636 637 /* Process a Data FIN packet , handled in mptcp_do_fin_opt */ 638 if ((seqn == 0) && (mdss_data_len == 1)) { 639 mptcplog((LOG_INFO, "%s: Data FIN DSS opt state = %d \n", 640 __func__, mp_tp->mpt_state)); 641 MPT_UNLOCK(mp_tp); 642 return; 643 } 644 MPT_UNLOCK(mp_tp); 645 mptcp_notify_mpready(tp->t_inpcb->inp_socket); 646 tp->t_rcv_map.mpt_dsn = full_dsn; 647 tp->t_rcv_map.mpt_sseq = seqn; 648 tp->t_rcv_map.mpt_len = mdss_data_len; 649 tp->t_rcv_map.mpt_csum = csum; 650 tp->t_mpflags |= TMPF_EMBED_DSN; 651} 652 653 654void 655mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *dss_info, struct tcpcb *tp, 656 uint16_t csum) 657{ 658 u_int64_t full_dsn = 0; 659 struct mptcb *mp_tp = tptomptp(tp); 660 661 NTOHL(dss_info->mdss_dsn); 662 NTOHL(dss_info->mdss_subflow_seqn); 663 NTOHS(dss_info->mdss_data_len); 664 MPT_LOCK(mp_tp); 665 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); 666 MPT_UNLOCK(mp_tp); 667 mptcp_update_rcv_state_meat(mp_tp, tp, 668 full_dsn, 669 dss_info->mdss_subflow_seqn, 670 dss_info->mdss_data_len, 671 csum); 672} 673 674void 675mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info, 676 struct tcpcb *tp, uint16_t csum) 677{ 678 u_int64_t dsn = mptcp_ntoh64(dss_info->mdss_dsn); 679 struct mptcb *mp_tp = tptomptp(tp); 680 681 NTOHL(dss_info->mdss_subflow_seqn); 682 NTOHS(dss_info->mdss_data_len); 683 mptcp_update_rcv_state_meat(mp_tp, tp, 684 dsn, 685 dss_info->mdss_subflow_seqn, 686 dss_info->mdss_data_len, 687 csum); 688} 689 690/* 691 * MPTCP Checksum support 692 * The checksum is calculated whenever the MPTCP DSS option is included 693 * in the TCP packet. The checksum includes the sum of the MPTCP psuedo 694 * header and the actual data indicated by the length specified in the 695 * DSS option. 696 */ 697 698uint16_t 699mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off) 700{ 701 struct mptcb *mp_tp = tptomptp(tp); 702 uint32_t sum = 0; 703 uint64_t dsn; 704 uint32_t sseq; 705 uint16_t len; 706 uint16_t csum; 707 708 if (mp_tp == NULL) 709 return (0); 710 711 if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) 712 return (0); 713 714 if (!(tp->t_mpflags & TMPF_EMBED_DSN)) 715 return (0); 716 717 if (tp->t_mpflags & TMPF_TCP_FALLBACK) 718 return (0); 719 720 /* 721 * The remote side may send a packet with fewer bytes than the 722 * claimed DSS checksum length. 723 */ 724 if ((int)m_length2(m, NULL) < (off + tp->t_rcv_map.mpt_len)) 725 return (0xffff); 726 727 if (tp->t_rcv_map.mpt_len != 0) 728 sum = m_sum16(m, off, tp->t_rcv_map.mpt_len); 729 730 dsn = mptcp_hton64(tp->t_rcv_map.mpt_dsn); 731 sseq = htonl(tp->t_rcv_map.mpt_sseq); 732 len = htons(tp->t_rcv_map.mpt_len); 733 csum = tp->t_rcv_map.mpt_csum; 734 sum += in_pseudo64(dsn, sseq, (len + csum)); 735 ADDCARRY(sum); 736 DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, 737 uint32_t, sum); 738 mptcplog((LOG_INFO, "%s: sum = %x \n", __func__, sum)); 739 return (~sum & 0xffff); 740} 741 742void 743mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len, 744 unsigned hdrlen, u_int64_t dss_val, u_int32_t *sseqp) 745{ 746 struct mptcb *mp_tp = tptomptp(tp); 747 u_int32_t sum = 0; 748 uint32_t sseq; 749 uint16_t dss_len; 750 uint16_t csum = 0; 751 uint16_t *csump = NULL; 752 753 if (mp_tp == NULL) 754 return; 755 756 if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) 757 return; 758 759 if (sseqp == NULL) 760 return; 761 762 if (len) 763 sum = m_sum16(m, hdrlen, len); 764 765 dss_val = mptcp_hton64(dss_val); 766 sseq = *sseqp; 767 dss_len = *(uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t)); 768 sum += in_pseudo64(dss_val, sseq, (dss_len + csum)); 769 770 ADDCARRY(sum); 771 sum = ~sum & 0xffff; 772 csump = (uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t) + 773 sizeof (uint16_t)); 774 DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, 775 uint32_t, sum); 776 *csump = sum; 777 mptcplog3((LOG_INFO, "%s: sum = %x \n", __func__, sum)); 778} 779