tcp_usrreq.c revision 307906
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006-2007 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * Portions of this software were developed by Robert N. M. Watson under 9 * contract to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_usrreq.c 307906 2016-10-25 12:58:36Z jch $"); 40 41#include "opt_ddb.h" 42#include "opt_inet.h" 43#include "opt_inet6.h" 44#include "opt_tcpdebug.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/limits.h> 49#include <sys/malloc.h> 50#include <sys/kernel.h> 51#include <sys/sysctl.h> 52#include <sys/mbuf.h> 53#ifdef INET6 54#include <sys/domain.h> 55#endif /* INET6 */ 56#include <sys/socket.h> 57#include <sys/socketvar.h> 58#include <sys/protosw.h> 59#include <sys/proc.h> 60#include <sys/jail.h> 61#include <sys/syslog.h> 62 63#ifdef DDB 64#include <ddb/ddb.h> 65#endif 66 67#include <net/if.h> 68#include <net/route.h> 69#include <net/vnet.h> 70 71#include <netinet/cc.h> 72#include <netinet/in.h> 73#include <netinet/in_pcb.h> 74#include <netinet/in_systm.h> 75#include <netinet/in_var.h> 76#include <netinet/ip_var.h> 77#ifdef INET6 78#include <netinet/ip6.h> 79#include <netinet6/in6_pcb.h> 80#include <netinet6/ip6_var.h> 81#include <netinet6/scope6_var.h> 82#endif 83#ifdef TCP_RFC7413 84#include <netinet/tcp_fastopen.h> 85#endif 86#include <netinet/tcp_fsm.h> 87#include <netinet/tcp_seq.h> 88#include <netinet/tcp_timer.h> 89#include <netinet/tcp_var.h> 90#include <netinet/tcpip.h> 91#ifdef TCPDEBUG 92#include <netinet/tcp_debug.h> 93#endif 94#ifdef TCP_OFFLOAD 95#include <netinet/tcp_offload.h> 96#endif 97 98/* 99 * TCP protocol interface to socket abstraction. 100 */ 101static int tcp_attach(struct socket *); 102#ifdef INET 103static int tcp_connect(struct tcpcb *, struct sockaddr *, 104 struct thread *td); 105#endif /* INET */ 106#ifdef INET6 107static int tcp6_connect(struct tcpcb *, struct sockaddr *, 108 struct thread *td); 109#endif /* INET6 */ 110static void tcp_disconnect(struct tcpcb *); 111static void tcp_usrclosed(struct tcpcb *); 112static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 113 114#ifdef TCPDEBUG 115#define TCPDEBUG0 int ostate = 0 116#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 117#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 118 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 119#else 120#define TCPDEBUG0 121#define TCPDEBUG1() 122#define TCPDEBUG2(req) 123#endif 124 125/* 126 * TCP attaches to socket via pru_attach(), reserving space, 127 * and an internet control block. 128 */ 129static int 130tcp_usr_attach(struct socket *so, int proto, struct thread *td) 131{ 132 struct inpcb *inp; 133 struct tcpcb *tp = NULL; 134 int error; 135 TCPDEBUG0; 136 137 inp = sotoinpcb(so); 138 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 139 TCPDEBUG1(); 140 141 error = tcp_attach(so); 142 if (error) 143 goto out; 144 145 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 146 so->so_linger = TCP_LINGERTIME; 147 148 inp = sotoinpcb(so); 149 tp = intotcpcb(inp); 150out: 151 TCPDEBUG2(PRU_ATTACH); 152 return error; 153} 154 155/* 156 * tcp_detach is called when the socket layer loses its final reference 157 * to the socket, be it a file descriptor reference, a reference from TCP, 158 * etc. At this point, there is only one case in which we will keep around 159 * inpcb state: time wait. 160 * 161 * This function can probably be re-absorbed back into tcp_usr_detach() now 162 * that there is a single detach path. 163 */ 164static void 165tcp_detach(struct socket *so, struct inpcb *inp) 166{ 167 struct tcpcb *tp; 168 169 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 170 INP_WLOCK_ASSERT(inp); 171 172 KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp")); 173 KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so")); 174 175 tp = intotcpcb(inp); 176 177 if (inp->inp_flags & INP_TIMEWAIT) { 178 /* 179 * There are two cases to handle: one in which the time wait 180 * state is being discarded (INP_DROPPED), and one in which 181 * this connection will remain in timewait. In the former, 182 * it is time to discard all state (except tcptw, which has 183 * already been discarded by the timewait close code, which 184 * should be further up the call stack somewhere). In the 185 * latter case, we detach from the socket, but leave the pcb 186 * present until timewait ends. 187 * 188 * XXXRW: Would it be cleaner to free the tcptw here? 189 * 190 * Astute question indeed, from twtcp perspective there are 191 * three cases to consider: 192 * 193 * #1 tcp_detach is called at tcptw creation time by 194 * tcp_twstart, then do not discard the newly created tcptw 195 * and leave inpcb present until timewait ends 196 * #2 tcp_detach is called at timewait end (or reuse) by 197 * tcp_twclose, then the tcptw has already been discarded 198 * and inpcb is freed here 199 * #3 tcp_detach is called() after timewait ends (or reuse) 200 * (e.g. by soclose), then tcptw has already been discarded 201 * and inpcb is freed here 202 * 203 * In all three cases the tcptw should not be freed here. 204 */ 205 if (inp->inp_flags & INP_DROPPED) { 206 in_pcbdetach(inp); 207 if (__predict_true(tp == NULL)) { 208 in_pcbfree(inp); 209 } else { 210 /* 211 * This case should not happen as in TIMEWAIT 212 * state the inp should not be destroyed before 213 * its tcptw. If INVARIANTS is defined, panic. 214 */ 215#ifdef INVARIANTS 216 panic("%s: Panic before an inp double-free: " 217 "INP_TIMEWAIT && INP_DROPPED && tp != NULL" 218 , __func__); 219#else 220 log(LOG_ERR, "%s: Avoid an inp double-free: " 221 "INP_TIMEWAIT && INP_DROPPED && tp != NULL" 222 , __func__); 223#endif 224 INP_WUNLOCK(inp); 225 } 226 } else { 227 in_pcbdetach(inp); 228 INP_WUNLOCK(inp); 229 } 230 } else { 231 /* 232 * If the connection is not in timewait, we consider two 233 * two conditions: one in which no further processing is 234 * necessary (dropped || embryonic), and one in which TCP is 235 * not yet done, but no longer requires the socket, so the 236 * pcb will persist for the time being. 237 * 238 * XXXRW: Does the second case still occur? 239 */ 240 if (inp->inp_flags & INP_DROPPED || 241 tp->t_state < TCPS_SYN_SENT) { 242 tcp_discardcb(tp); 243 in_pcbdetach(inp); 244 in_pcbfree(inp); 245 } else { 246 in_pcbdetach(inp); 247 INP_WUNLOCK(inp); 248 } 249 } 250} 251 252/* 253 * pru_detach() detaches the TCP protocol from the socket. 254 * If the protocol state is non-embryonic, then can't 255 * do this directly: have to initiate a pru_disconnect(), 256 * which may finish later; embryonic TCB's can just 257 * be discarded here. 258 */ 259static void 260tcp_usr_detach(struct socket *so) 261{ 262 struct inpcb *inp; 263 264 inp = sotoinpcb(so); 265 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 266 INP_INFO_WLOCK(&V_tcbinfo); 267 INP_WLOCK(inp); 268 KASSERT(inp->inp_socket != NULL, 269 ("tcp_usr_detach: inp_socket == NULL")); 270 tcp_detach(so, inp); 271 INP_INFO_WUNLOCK(&V_tcbinfo); 272} 273 274#ifdef INET 275/* 276 * Give the socket an address. 277 */ 278static int 279tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 280{ 281 int error = 0; 282 struct inpcb *inp; 283 struct tcpcb *tp = NULL; 284 struct sockaddr_in *sinp; 285 286 sinp = (struct sockaddr_in *)nam; 287 if (nam->sa_len != sizeof (*sinp)) 288 return (EINVAL); 289 /* 290 * Must check for multicast addresses and disallow binding 291 * to them. 292 */ 293 if (sinp->sin_family == AF_INET && 294 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 295 return (EAFNOSUPPORT); 296 297 TCPDEBUG0; 298 inp = sotoinpcb(so); 299 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 300 INP_WLOCK(inp); 301 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 302 error = EINVAL; 303 goto out; 304 } 305 tp = intotcpcb(inp); 306 TCPDEBUG1(); 307 INP_HASH_WLOCK(&V_tcbinfo); 308 error = in_pcbbind(inp, nam, td->td_ucred); 309 INP_HASH_WUNLOCK(&V_tcbinfo); 310out: 311 TCPDEBUG2(PRU_BIND); 312 INP_WUNLOCK(inp); 313 314 return (error); 315} 316#endif /* INET */ 317 318#ifdef INET6 319static int 320tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 321{ 322 int error = 0; 323 struct inpcb *inp; 324 struct tcpcb *tp = NULL; 325 struct sockaddr_in6 *sin6p; 326 327 sin6p = (struct sockaddr_in6 *)nam; 328 if (nam->sa_len != sizeof (*sin6p)) 329 return (EINVAL); 330 /* 331 * Must check for multicast addresses and disallow binding 332 * to them. 333 */ 334 if (sin6p->sin6_family == AF_INET6 && 335 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 336 return (EAFNOSUPPORT); 337 338 TCPDEBUG0; 339 inp = sotoinpcb(so); 340 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 341 INP_WLOCK(inp); 342 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 343 error = EINVAL; 344 goto out; 345 } 346 tp = intotcpcb(inp); 347 TCPDEBUG1(); 348 INP_HASH_WLOCK(&V_tcbinfo); 349 inp->inp_vflag &= ~INP_IPV4; 350 inp->inp_vflag |= INP_IPV6; 351#ifdef INET 352 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 353 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 354 inp->inp_vflag |= INP_IPV4; 355 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 356 struct sockaddr_in sin; 357 358 in6_sin6_2_sin(&sin, sin6p); 359 inp->inp_vflag |= INP_IPV4; 360 inp->inp_vflag &= ~INP_IPV6; 361 error = in_pcbbind(inp, (struct sockaddr *)&sin, 362 td->td_ucred); 363 INP_HASH_WUNLOCK(&V_tcbinfo); 364 goto out; 365 } 366 } 367#endif 368 error = in6_pcbbind(inp, nam, td->td_ucred); 369 INP_HASH_WUNLOCK(&V_tcbinfo); 370out: 371 TCPDEBUG2(PRU_BIND); 372 INP_WUNLOCK(inp); 373 return (error); 374} 375#endif /* INET6 */ 376 377#ifdef INET 378/* 379 * Prepare to accept connections. 380 */ 381static int 382tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 383{ 384 int error = 0; 385 struct inpcb *inp; 386 struct tcpcb *tp = NULL; 387 388 TCPDEBUG0; 389 inp = sotoinpcb(so); 390 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 391 INP_WLOCK(inp); 392 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 393 error = EINVAL; 394 goto out; 395 } 396 tp = intotcpcb(inp); 397 TCPDEBUG1(); 398 SOCK_LOCK(so); 399 error = solisten_proto_check(so); 400 INP_HASH_WLOCK(&V_tcbinfo); 401 if (error == 0 && inp->inp_lport == 0) 402 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 403 INP_HASH_WUNLOCK(&V_tcbinfo); 404 if (error == 0) { 405 tcp_state_change(tp, TCPS_LISTEN); 406 solisten_proto(so, backlog); 407#ifdef TCP_OFFLOAD 408 if ((so->so_options & SO_NO_OFFLOAD) == 0) 409 tcp_offload_listen_start(tp); 410#endif 411 } 412 SOCK_UNLOCK(so); 413 414#ifdef TCP_RFC7413 415 if (tp->t_flags & TF_FASTOPEN) 416 tp->t_tfo_pending = tcp_fastopen_alloc_counter(); 417#endif 418out: 419 TCPDEBUG2(PRU_LISTEN); 420 INP_WUNLOCK(inp); 421 return (error); 422} 423#endif /* INET */ 424 425#ifdef INET6 426static int 427tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 428{ 429 int error = 0; 430 struct inpcb *inp; 431 struct tcpcb *tp = NULL; 432 433 TCPDEBUG0; 434 inp = sotoinpcb(so); 435 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 436 INP_WLOCK(inp); 437 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 438 error = EINVAL; 439 goto out; 440 } 441 tp = intotcpcb(inp); 442 TCPDEBUG1(); 443 SOCK_LOCK(so); 444 error = solisten_proto_check(so); 445 INP_HASH_WLOCK(&V_tcbinfo); 446 if (error == 0 && inp->inp_lport == 0) { 447 inp->inp_vflag &= ~INP_IPV4; 448 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 449 inp->inp_vflag |= INP_IPV4; 450 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 451 } 452 INP_HASH_WUNLOCK(&V_tcbinfo); 453 if (error == 0) { 454 tcp_state_change(tp, TCPS_LISTEN); 455 solisten_proto(so, backlog); 456#ifdef TCP_OFFLOAD 457 if ((so->so_options & SO_NO_OFFLOAD) == 0) 458 tcp_offload_listen_start(tp); 459#endif 460 } 461 SOCK_UNLOCK(so); 462 463#ifdef TCP_RFC7413 464 if (tp->t_flags & TF_FASTOPEN) 465 tp->t_tfo_pending = tcp_fastopen_alloc_counter(); 466#endif 467out: 468 TCPDEBUG2(PRU_LISTEN); 469 INP_WUNLOCK(inp); 470 return (error); 471} 472#endif /* INET6 */ 473 474#ifdef INET 475/* 476 * Initiate connection to peer. 477 * Create a template for use in transmissions on this connection. 478 * Enter SYN_SENT state, and mark socket as connecting. 479 * Start keep-alive timer, and seed output sequence space. 480 * Send initial segment on connection. 481 */ 482static int 483tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 484{ 485 int error = 0; 486 struct inpcb *inp; 487 struct tcpcb *tp = NULL; 488 struct sockaddr_in *sinp; 489 490 sinp = (struct sockaddr_in *)nam; 491 if (nam->sa_len != sizeof (*sinp)) 492 return (EINVAL); 493 /* 494 * Must disallow TCP ``connections'' to multicast addresses. 495 */ 496 if (sinp->sin_family == AF_INET 497 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 498 return (EAFNOSUPPORT); 499 if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0) 500 return (error); 501 502 TCPDEBUG0; 503 inp = sotoinpcb(so); 504 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 505 INP_WLOCK(inp); 506 if (inp->inp_flags & INP_TIMEWAIT) { 507 error = EADDRINUSE; 508 goto out; 509 } 510 if (inp->inp_flags & INP_DROPPED) { 511 error = ECONNREFUSED; 512 goto out; 513 } 514 tp = intotcpcb(inp); 515 TCPDEBUG1(); 516 if ((error = tcp_connect(tp, nam, td)) != 0) 517 goto out; 518#ifdef TCP_OFFLOAD 519 if (registered_toedevs > 0 && 520 (so->so_options & SO_NO_OFFLOAD) == 0 && 521 (error = tcp_offload_connect(so, nam)) == 0) 522 goto out; 523#endif 524 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 525 error = tcp_output(tp); 526out: 527 TCPDEBUG2(PRU_CONNECT); 528 INP_WUNLOCK(inp); 529 return (error); 530} 531#endif /* INET */ 532 533#ifdef INET6 534static int 535tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 536{ 537 int error = 0; 538 struct inpcb *inp; 539 struct tcpcb *tp = NULL; 540 struct sockaddr_in6 *sin6p; 541 542 TCPDEBUG0; 543 544 sin6p = (struct sockaddr_in6 *)nam; 545 if (nam->sa_len != sizeof (*sin6p)) 546 return (EINVAL); 547 /* 548 * Must disallow TCP ``connections'' to multicast addresses. 549 */ 550 if (sin6p->sin6_family == AF_INET6 551 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 552 return (EAFNOSUPPORT); 553 554 inp = sotoinpcb(so); 555 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 556 INP_WLOCK(inp); 557 if (inp->inp_flags & INP_TIMEWAIT) { 558 error = EADDRINUSE; 559 goto out; 560 } 561 if (inp->inp_flags & INP_DROPPED) { 562 error = ECONNREFUSED; 563 goto out; 564 } 565 tp = intotcpcb(inp); 566 TCPDEBUG1(); 567#ifdef INET 568 /* 569 * XXXRW: Some confusion: V4/V6 flags relate to binding, and 570 * therefore probably require the hash lock, which isn't held here. 571 * Is this a significant problem? 572 */ 573 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 574 struct sockaddr_in sin; 575 576 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 577 error = EINVAL; 578 goto out; 579 } 580 581 in6_sin6_2_sin(&sin, sin6p); 582 inp->inp_vflag |= INP_IPV4; 583 inp->inp_vflag &= ~INP_IPV6; 584 if ((error = prison_remote_ip4(td->td_ucred, 585 &sin.sin_addr)) != 0) 586 goto out; 587 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 588 goto out; 589#ifdef TCP_OFFLOAD 590 if (registered_toedevs > 0 && 591 (so->so_options & SO_NO_OFFLOAD) == 0 && 592 (error = tcp_offload_connect(so, nam)) == 0) 593 goto out; 594#endif 595 error = tcp_output(tp); 596 goto out; 597 } 598#endif 599 inp->inp_vflag &= ~INP_IPV4; 600 inp->inp_vflag |= INP_IPV6; 601 inp->inp_inc.inc_flags |= INC_ISIPV6; 602 if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0) 603 goto out; 604 if ((error = tcp6_connect(tp, nam, td)) != 0) 605 goto out; 606#ifdef TCP_OFFLOAD 607 if (registered_toedevs > 0 && 608 (so->so_options & SO_NO_OFFLOAD) == 0 && 609 (error = tcp_offload_connect(so, nam)) == 0) 610 goto out; 611#endif 612 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 613 error = tcp_output(tp); 614 615out: 616 TCPDEBUG2(PRU_CONNECT); 617 INP_WUNLOCK(inp); 618 return (error); 619} 620#endif /* INET6 */ 621 622/* 623 * Initiate disconnect from peer. 624 * If connection never passed embryonic stage, just drop; 625 * else if don't need to let data drain, then can just drop anyways, 626 * else have to begin TCP shutdown process: mark socket disconnecting, 627 * drain unread data, state switch to reflect user close, and 628 * send segment (e.g. FIN) to peer. Socket will be really disconnected 629 * when peer sends FIN and acks ours. 630 * 631 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 632 */ 633static int 634tcp_usr_disconnect(struct socket *so) 635{ 636 struct inpcb *inp; 637 struct tcpcb *tp = NULL; 638 int error = 0; 639 640 TCPDEBUG0; 641 INP_INFO_WLOCK(&V_tcbinfo); 642 inp = sotoinpcb(so); 643 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 644 INP_WLOCK(inp); 645 if (inp->inp_flags & INP_TIMEWAIT) 646 goto out; 647 if (inp->inp_flags & INP_DROPPED) { 648 error = ECONNRESET; 649 goto out; 650 } 651 tp = intotcpcb(inp); 652 TCPDEBUG1(); 653 tcp_disconnect(tp); 654out: 655 TCPDEBUG2(PRU_DISCONNECT); 656 INP_WUNLOCK(inp); 657 INP_INFO_WUNLOCK(&V_tcbinfo); 658 return (error); 659} 660 661#ifdef INET 662/* 663 * Accept a connection. Essentially all the work is done at higher levels; 664 * just return the address of the peer, storing through addr. 665 */ 666static int 667tcp_usr_accept(struct socket *so, struct sockaddr **nam) 668{ 669 int error = 0; 670 struct inpcb *inp = NULL; 671 struct tcpcb *tp = NULL; 672 struct in_addr addr; 673 in_port_t port = 0; 674 TCPDEBUG0; 675 676 if (so->so_state & SS_ISDISCONNECTED) 677 return (ECONNABORTED); 678 679 inp = sotoinpcb(so); 680 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 681 INP_WLOCK(inp); 682 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 683 error = ECONNABORTED; 684 goto out; 685 } 686 tp = intotcpcb(inp); 687 TCPDEBUG1(); 688 689 /* 690 * We inline in_getpeeraddr and COMMON_END here, so that we can 691 * copy the data of interest and defer the malloc until after we 692 * release the lock. 693 */ 694 port = inp->inp_fport; 695 addr = inp->inp_faddr; 696 697out: 698 TCPDEBUG2(PRU_ACCEPT); 699 INP_WUNLOCK(inp); 700 if (error == 0) 701 *nam = in_sockaddr(port, &addr); 702 return error; 703} 704#endif /* INET */ 705 706#ifdef INET6 707static int 708tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 709{ 710 struct inpcb *inp = NULL; 711 int error = 0; 712 struct tcpcb *tp = NULL; 713 struct in_addr addr; 714 struct in6_addr addr6; 715 in_port_t port = 0; 716 int v4 = 0; 717 TCPDEBUG0; 718 719 if (so->so_state & SS_ISDISCONNECTED) 720 return (ECONNABORTED); 721 722 inp = sotoinpcb(so); 723 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 724 INP_INFO_RLOCK(&V_tcbinfo); 725 INP_WLOCK(inp); 726 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 727 error = ECONNABORTED; 728 goto out; 729 } 730 tp = intotcpcb(inp); 731 TCPDEBUG1(); 732 733 /* 734 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 735 * copy the data of interest and defer the malloc until after we 736 * release the lock. 737 */ 738 if (inp->inp_vflag & INP_IPV4) { 739 v4 = 1; 740 port = inp->inp_fport; 741 addr = inp->inp_faddr; 742 } else { 743 port = inp->inp_fport; 744 addr6 = inp->in6p_faddr; 745 } 746 747out: 748 TCPDEBUG2(PRU_ACCEPT); 749 INP_WUNLOCK(inp); 750 INP_INFO_RUNLOCK(&V_tcbinfo); 751 if (error == 0) { 752 if (v4) 753 *nam = in6_v4mapsin6_sockaddr(port, &addr); 754 else 755 *nam = in6_sockaddr(port, &addr6); 756 } 757 return error; 758} 759#endif /* INET6 */ 760 761/* 762 * Mark the connection as being incapable of further output. 763 */ 764static int 765tcp_usr_shutdown(struct socket *so) 766{ 767 int error = 0; 768 struct inpcb *inp; 769 struct tcpcb *tp = NULL; 770 771 TCPDEBUG0; 772 INP_INFO_WLOCK(&V_tcbinfo); 773 inp = sotoinpcb(so); 774 KASSERT(inp != NULL, ("inp == NULL")); 775 INP_WLOCK(inp); 776 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 777 error = ECONNRESET; 778 goto out; 779 } 780 tp = intotcpcb(inp); 781 TCPDEBUG1(); 782 socantsendmore(so); 783 tcp_usrclosed(tp); 784 if (!(inp->inp_flags & INP_DROPPED)) 785 error = tcp_output(tp); 786 787out: 788 TCPDEBUG2(PRU_SHUTDOWN); 789 INP_WUNLOCK(inp); 790 INP_INFO_WUNLOCK(&V_tcbinfo); 791 792 return (error); 793} 794 795/* 796 * After a receive, possibly send window update to peer. 797 */ 798static int 799tcp_usr_rcvd(struct socket *so, int flags) 800{ 801 struct inpcb *inp; 802 struct tcpcb *tp = NULL; 803 int error = 0; 804 805 TCPDEBUG0; 806 inp = sotoinpcb(so); 807 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 808 INP_WLOCK(inp); 809 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 810 error = ECONNRESET; 811 goto out; 812 } 813 tp = intotcpcb(inp); 814 TCPDEBUG1(); 815#ifdef TCP_RFC7413 816 /* 817 * For passively-created TFO connections, don't attempt a window 818 * update while still in SYN_RECEIVED as this may trigger an early 819 * SYN|ACK. It is preferable to have the SYN|ACK be sent along with 820 * application response data, or failing that, when the DELACK timer 821 * expires. 822 */ 823 if ((tp->t_flags & TF_FASTOPEN) && 824 (tp->t_state == TCPS_SYN_RECEIVED)) 825 goto out; 826#endif 827#ifdef TCP_OFFLOAD 828 if (tp->t_flags & TF_TOE) 829 tcp_offload_rcvd(tp); 830 else 831#endif 832 tcp_output(tp); 833 834out: 835 TCPDEBUG2(PRU_RCVD); 836 INP_WUNLOCK(inp); 837 return (error); 838} 839 840/* 841 * Do a send by putting data in output queue and updating urgent 842 * marker if URG set. Possibly send more data. Unlike the other 843 * pru_*() routines, the mbuf chains are our responsibility. We 844 * must either enqueue them or free them. The other pru_* routines 845 * generally are caller-frees. 846 */ 847static int 848tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 849 struct sockaddr *nam, struct mbuf *control, struct thread *td) 850{ 851 int error = 0; 852 struct inpcb *inp; 853 struct tcpcb *tp = NULL; 854#ifdef INET6 855 int isipv6; 856#endif 857 TCPDEBUG0; 858 859 /* 860 * We require the pcbinfo lock if we will close the socket as part of 861 * this call. 862 */ 863 if (flags & PRUS_EOF) 864 INP_INFO_WLOCK(&V_tcbinfo); 865 inp = sotoinpcb(so); 866 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 867 INP_WLOCK(inp); 868 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 869 if (control) 870 m_freem(control); 871 if (m) 872 m_freem(m); 873 error = ECONNRESET; 874 goto out; 875 } 876#ifdef INET6 877 isipv6 = nam && nam->sa_family == AF_INET6; 878#endif /* INET6 */ 879 tp = intotcpcb(inp); 880 TCPDEBUG1(); 881 if (control) { 882 /* TCP doesn't do control messages (rights, creds, etc) */ 883 if (control->m_len) { 884 m_freem(control); 885 if (m) 886 m_freem(m); 887 error = EINVAL; 888 goto out; 889 } 890 m_freem(control); /* empty control, just free it */ 891 } 892 if (!(flags & PRUS_OOB)) { 893 sbappendstream(&so->so_snd, m); 894 if (nam && tp->t_state < TCPS_SYN_SENT) { 895 /* 896 * Do implied connect if not yet connected, 897 * initialize window to default value, and 898 * initialize maxseg/maxopd using peer's cached 899 * MSS. 900 */ 901#ifdef INET6 902 if (isipv6) 903 error = tcp6_connect(tp, nam, td); 904#endif /* INET6 */ 905#if defined(INET6) && defined(INET) 906 else 907#endif 908#ifdef INET 909 error = tcp_connect(tp, nam, td); 910#endif 911 if (error) 912 goto out; 913 tp->snd_wnd = TTCP_CLIENT_SND_WND; 914 tcp_mss(tp, -1); 915 } 916 if (flags & PRUS_EOF) { 917 /* 918 * Close the send side of the connection after 919 * the data is sent. 920 */ 921 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 922 socantsendmore(so); 923 tcp_usrclosed(tp); 924 } 925 if (!(inp->inp_flags & INP_DROPPED)) { 926 if (flags & PRUS_MORETOCOME) 927 tp->t_flags |= TF_MORETOCOME; 928 error = tcp_output(tp); 929 if (flags & PRUS_MORETOCOME) 930 tp->t_flags &= ~TF_MORETOCOME; 931 } 932 } else { 933 /* 934 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 935 */ 936 SOCKBUF_LOCK(&so->so_snd); 937 if (sbspace(&so->so_snd) < -512) { 938 SOCKBUF_UNLOCK(&so->so_snd); 939 m_freem(m); 940 error = ENOBUFS; 941 goto out; 942 } 943 /* 944 * According to RFC961 (Assigned Protocols), 945 * the urgent pointer points to the last octet 946 * of urgent data. We continue, however, 947 * to consider it to indicate the first octet 948 * of data past the urgent section. 949 * Otherwise, snd_up should be one lower. 950 */ 951 sbappendstream_locked(&so->so_snd, m); 952 SOCKBUF_UNLOCK(&so->so_snd); 953 if (nam && tp->t_state < TCPS_SYN_SENT) { 954 /* 955 * Do implied connect if not yet connected, 956 * initialize window to default value, and 957 * initialize maxseg/maxopd using peer's cached 958 * MSS. 959 */ 960#ifdef INET6 961 if (isipv6) 962 error = tcp6_connect(tp, nam, td); 963#endif /* INET6 */ 964#if defined(INET6) && defined(INET) 965 else 966#endif 967#ifdef INET 968 error = tcp_connect(tp, nam, td); 969#endif 970 if (error) 971 goto out; 972 tp->snd_wnd = TTCP_CLIENT_SND_WND; 973 tcp_mss(tp, -1); 974 } 975 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 976 tp->t_flags |= TF_FORCEDATA; 977 error = tcp_output(tp); 978 tp->t_flags &= ~TF_FORCEDATA; 979 } 980out: 981 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 982 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 983 INP_WUNLOCK(inp); 984 if (flags & PRUS_EOF) 985 INP_INFO_WUNLOCK(&V_tcbinfo); 986 return (error); 987} 988 989/* 990 * Abort the TCP. Drop the connection abruptly. 991 */ 992static void 993tcp_usr_abort(struct socket *so) 994{ 995 struct inpcb *inp; 996 struct tcpcb *tp = NULL; 997 TCPDEBUG0; 998 999 inp = sotoinpcb(so); 1000 KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); 1001 1002 INP_INFO_WLOCK(&V_tcbinfo); 1003 INP_WLOCK(inp); 1004 KASSERT(inp->inp_socket != NULL, 1005 ("tcp_usr_abort: inp_socket == NULL")); 1006 1007 /* 1008 * If we still have full TCP state, and we're not dropped, drop. 1009 */ 1010 if (!(inp->inp_flags & INP_TIMEWAIT) && 1011 !(inp->inp_flags & INP_DROPPED)) { 1012 tp = intotcpcb(inp); 1013 TCPDEBUG1(); 1014 tcp_drop(tp, ECONNABORTED); 1015 TCPDEBUG2(PRU_ABORT); 1016 } 1017 if (!(inp->inp_flags & INP_DROPPED)) { 1018 SOCK_LOCK(so); 1019 so->so_state |= SS_PROTOREF; 1020 SOCK_UNLOCK(so); 1021 inp->inp_flags |= INP_SOCKREF; 1022 } 1023 INP_WUNLOCK(inp); 1024 INP_INFO_WUNLOCK(&V_tcbinfo); 1025} 1026 1027/* 1028 * TCP socket is closed. Start friendly disconnect. 1029 */ 1030static void 1031tcp_usr_close(struct socket *so) 1032{ 1033 struct inpcb *inp; 1034 struct tcpcb *tp = NULL; 1035 TCPDEBUG0; 1036 1037 inp = sotoinpcb(so); 1038 KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); 1039 1040 INP_INFO_WLOCK(&V_tcbinfo); 1041 INP_WLOCK(inp); 1042 KASSERT(inp->inp_socket != NULL, 1043 ("tcp_usr_close: inp_socket == NULL")); 1044 1045 /* 1046 * If we still have full TCP state, and we're not dropped, initiate 1047 * a disconnect. 1048 */ 1049 if (!(inp->inp_flags & INP_TIMEWAIT) && 1050 !(inp->inp_flags & INP_DROPPED)) { 1051 tp = intotcpcb(inp); 1052 TCPDEBUG1(); 1053 tcp_disconnect(tp); 1054 TCPDEBUG2(PRU_CLOSE); 1055 } 1056 if (!(inp->inp_flags & INP_DROPPED)) { 1057 SOCK_LOCK(so); 1058 so->so_state |= SS_PROTOREF; 1059 SOCK_UNLOCK(so); 1060 inp->inp_flags |= INP_SOCKREF; 1061 } 1062 INP_WUNLOCK(inp); 1063 INP_INFO_WUNLOCK(&V_tcbinfo); 1064} 1065 1066/* 1067 * Receive out-of-band data. 1068 */ 1069static int 1070tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 1071{ 1072 int error = 0; 1073 struct inpcb *inp; 1074 struct tcpcb *tp = NULL; 1075 1076 TCPDEBUG0; 1077 inp = sotoinpcb(so); 1078 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 1079 INP_WLOCK(inp); 1080 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 1081 error = ECONNRESET; 1082 goto out; 1083 } 1084 tp = intotcpcb(inp); 1085 TCPDEBUG1(); 1086 if ((so->so_oobmark == 0 && 1087 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 1088 so->so_options & SO_OOBINLINE || 1089 tp->t_oobflags & TCPOOB_HADDATA) { 1090 error = EINVAL; 1091 goto out; 1092 } 1093 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 1094 error = EWOULDBLOCK; 1095 goto out; 1096 } 1097 m->m_len = 1; 1098 *mtod(m, caddr_t) = tp->t_iobc; 1099 if ((flags & MSG_PEEK) == 0) 1100 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1101 1102out: 1103 TCPDEBUG2(PRU_RCVOOB); 1104 INP_WUNLOCK(inp); 1105 return (error); 1106} 1107 1108#ifdef INET 1109struct pr_usrreqs tcp_usrreqs = { 1110 .pru_abort = tcp_usr_abort, 1111 .pru_accept = tcp_usr_accept, 1112 .pru_attach = tcp_usr_attach, 1113 .pru_bind = tcp_usr_bind, 1114 .pru_connect = tcp_usr_connect, 1115 .pru_control = in_control, 1116 .pru_detach = tcp_usr_detach, 1117 .pru_disconnect = tcp_usr_disconnect, 1118 .pru_listen = tcp_usr_listen, 1119 .pru_peeraddr = in_getpeeraddr, 1120 .pru_rcvd = tcp_usr_rcvd, 1121 .pru_rcvoob = tcp_usr_rcvoob, 1122 .pru_send = tcp_usr_send, 1123 .pru_shutdown = tcp_usr_shutdown, 1124 .pru_sockaddr = in_getsockaddr, 1125 .pru_sosetlabel = in_pcbsosetlabel, 1126 .pru_close = tcp_usr_close, 1127}; 1128#endif /* INET */ 1129 1130#ifdef INET6 1131struct pr_usrreqs tcp6_usrreqs = { 1132 .pru_abort = tcp_usr_abort, 1133 .pru_accept = tcp6_usr_accept, 1134 .pru_attach = tcp_usr_attach, 1135 .pru_bind = tcp6_usr_bind, 1136 .pru_connect = tcp6_usr_connect, 1137 .pru_control = in6_control, 1138 .pru_detach = tcp_usr_detach, 1139 .pru_disconnect = tcp_usr_disconnect, 1140 .pru_listen = tcp6_usr_listen, 1141 .pru_peeraddr = in6_mapped_peeraddr, 1142 .pru_rcvd = tcp_usr_rcvd, 1143 .pru_rcvoob = tcp_usr_rcvoob, 1144 .pru_send = tcp_usr_send, 1145 .pru_shutdown = tcp_usr_shutdown, 1146 .pru_sockaddr = in6_mapped_sockaddr, 1147 .pru_sosetlabel = in_pcbsosetlabel, 1148 .pru_close = tcp_usr_close, 1149}; 1150#endif /* INET6 */ 1151 1152#ifdef INET 1153/* 1154 * Common subroutine to open a TCP connection to remote host specified 1155 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1156 * port number if needed. Call in_pcbconnect_setup to do the routing and 1157 * to choose a local host address (interface). If there is an existing 1158 * incarnation of the same connection in TIME-WAIT state and if the remote 1159 * host was sending CC options and if the connection duration was < MSL, then 1160 * truncate the previous TIME-WAIT state and proceed. 1161 * Initialize connection parameters and enter SYN-SENT state. 1162 */ 1163static int 1164tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) 1165{ 1166 struct inpcb *inp = tp->t_inpcb, *oinp; 1167 struct socket *so = inp->inp_socket; 1168 struct in_addr laddr; 1169 u_short lport; 1170 int error; 1171 1172 INP_WLOCK_ASSERT(inp); 1173 INP_HASH_WLOCK(&V_tcbinfo); 1174 1175 if (inp->inp_lport == 0) { 1176 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1177 if (error) 1178 goto out; 1179 } 1180 1181 /* 1182 * Cannot simply call in_pcbconnect, because there might be an 1183 * earlier incarnation of this same connection still in 1184 * TIME_WAIT state, creating an ADDRINUSE error. 1185 */ 1186 laddr = inp->inp_laddr; 1187 lport = inp->inp_lport; 1188 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1189 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1190 if (error && oinp == NULL) 1191 goto out; 1192 if (oinp) { 1193 error = EADDRINUSE; 1194 goto out; 1195 } 1196 inp->inp_laddr = laddr; 1197 in_pcbrehash(inp); 1198 INP_HASH_WUNLOCK(&V_tcbinfo); 1199 1200 /* 1201 * Compute window scaling to request: 1202 * Scale to fit into sweet spot. See tcp_syncache.c. 1203 * XXX: This should move to tcp_output(). 1204 */ 1205 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1206 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 1207 tp->request_r_scale++; 1208 1209 soisconnecting(so); 1210 TCPSTAT_INC(tcps_connattempt); 1211 tcp_state_change(tp, TCPS_SYN_SENT); 1212 tp->iss = tcp_new_isn(tp); 1213 tcp_sendseqinit(tp); 1214 1215 return 0; 1216 1217out: 1218 INP_HASH_WUNLOCK(&V_tcbinfo); 1219 return (error); 1220} 1221#endif /* INET */ 1222 1223#ifdef INET6 1224static int 1225tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) 1226{ 1227 struct inpcb *inp = tp->t_inpcb, *oinp; 1228 struct socket *so = inp->inp_socket; 1229 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1230 struct in6_addr addr6; 1231 int error; 1232 1233 INP_WLOCK_ASSERT(inp); 1234 INP_HASH_WLOCK(&V_tcbinfo); 1235 1236 if (inp->inp_lport == 0) { 1237 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1238 if (error) 1239 goto out; 1240 } 1241 1242 /* 1243 * Cannot simply call in_pcbconnect, because there might be an 1244 * earlier incarnation of this same connection still in 1245 * TIME_WAIT state, creating an ADDRINUSE error. 1246 * in6_pcbladdr() also handles scope zone IDs. 1247 * 1248 * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked() 1249 * outside of in6_pcb.c if there were an in6_pcbconnect_setup(). 1250 */ 1251 error = in6_pcbladdr(inp, nam, &addr6); 1252 if (error) 1253 goto out; 1254 oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo, 1255 &sin6->sin6_addr, sin6->sin6_port, 1256 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1257 ? &addr6 1258 : &inp->in6p_laddr, 1259 inp->inp_lport, 0, NULL); 1260 if (oinp) { 1261 error = EADDRINUSE; 1262 goto out; 1263 } 1264 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1265 inp->in6p_laddr = addr6; 1266 inp->in6p_faddr = sin6->sin6_addr; 1267 inp->inp_fport = sin6->sin6_port; 1268 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 1269 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 1270 if (inp->inp_flags & IN6P_AUTOFLOWLABEL) 1271 inp->inp_flow |= 1272 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1273 in_pcbrehash(inp); 1274 INP_HASH_WUNLOCK(&V_tcbinfo); 1275 1276 /* Compute window scaling to request. */ 1277 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1278 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 1279 tp->request_r_scale++; 1280 1281 soisconnecting(so); 1282 TCPSTAT_INC(tcps_connattempt); 1283 tcp_state_change(tp, TCPS_SYN_SENT); 1284 tp->iss = tcp_new_isn(tp); 1285 tcp_sendseqinit(tp); 1286 1287 return 0; 1288 1289out: 1290 INP_HASH_WUNLOCK(&V_tcbinfo); 1291 return error; 1292} 1293#endif /* INET6 */ 1294 1295/* 1296 * Export TCP internal state information via a struct tcp_info, based on the 1297 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1298 * (TCP state machine, etc). We export all information using FreeBSD-native 1299 * constants -- for example, the numeric values for tcpi_state will differ 1300 * from Linux. 1301 */ 1302static void 1303tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 1304{ 1305 1306 INP_WLOCK_ASSERT(tp->t_inpcb); 1307 bzero(ti, sizeof(*ti)); 1308 1309 ti->tcpi_state = tp->t_state; 1310 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1311 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1312 if (tp->t_flags & TF_SACK_PERMIT) 1313 ti->tcpi_options |= TCPI_OPT_SACK; 1314 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1315 ti->tcpi_options |= TCPI_OPT_WSCALE; 1316 ti->tcpi_snd_wscale = tp->snd_scale; 1317 ti->tcpi_rcv_wscale = tp->rcv_scale; 1318 } 1319 1320 ti->tcpi_rto = tp->t_rxtcur * tick; 1321 ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick; 1322 ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; 1323 ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; 1324 1325 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1326 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1327 1328 /* 1329 * FreeBSD-specific extension fields for tcp_info. 1330 */ 1331 ti->tcpi_rcv_space = tp->rcv_wnd; 1332 ti->tcpi_rcv_nxt = tp->rcv_nxt; 1333 ti->tcpi_snd_wnd = tp->snd_wnd; 1334 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ 1335 ti->tcpi_snd_nxt = tp->snd_nxt; 1336 ti->tcpi_snd_mss = tp->t_maxseg; 1337 ti->tcpi_rcv_mss = tp->t_maxseg; 1338 if (tp->t_flags & TF_TOE) 1339 ti->tcpi_options |= TCPI_OPT_TOE; 1340 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; 1341 ti->tcpi_rcv_ooopack = tp->t_rcvoopack; 1342 ti->tcpi_snd_zerowin = tp->t_sndzerowin; 1343} 1344 1345/* 1346 * tcp_ctloutput() must drop the inpcb lock before performing copyin on 1347 * socket option arguments. When it re-acquires the lock after the copy, it 1348 * has to revalidate that the connection is still valid for the socket 1349 * option. 1350 */ 1351#define INP_WLOCK_RECHECK(inp) do { \ 1352 INP_WLOCK(inp); \ 1353 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ 1354 INP_WUNLOCK(inp); \ 1355 return (ECONNRESET); \ 1356 } \ 1357 tp = intotcpcb(inp); \ 1358} while(0) 1359 1360int 1361tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1362{ 1363 int error, opt, optval; 1364 u_int ui; 1365 struct inpcb *inp; 1366 struct tcpcb *tp; 1367 struct tcp_info ti; 1368 char buf[TCP_CA_NAME_MAX]; 1369 struct cc_algo *algo; 1370 1371 error = 0; 1372 inp = sotoinpcb(so); 1373 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1374 INP_WLOCK(inp); 1375 if (sopt->sopt_level != IPPROTO_TCP) { 1376#ifdef INET6 1377 if (inp->inp_vflag & INP_IPV6PROTO) { 1378 INP_WUNLOCK(inp); 1379 error = ip6_ctloutput(so, sopt); 1380 } 1381#endif /* INET6 */ 1382#if defined(INET6) && defined(INET) 1383 else 1384#endif 1385#ifdef INET 1386 { 1387 INP_WUNLOCK(inp); 1388 error = ip_ctloutput(so, sopt); 1389 } 1390#endif 1391 return (error); 1392 } 1393 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 1394 INP_WUNLOCK(inp); 1395 return (ECONNRESET); 1396 } 1397 1398 switch (sopt->sopt_dir) { 1399 case SOPT_SET: 1400 switch (sopt->sopt_name) { 1401#ifdef TCP_SIGNATURE 1402 case TCP_MD5SIG: 1403 INP_WUNLOCK(inp); 1404 error = sooptcopyin(sopt, &optval, sizeof optval, 1405 sizeof optval); 1406 if (error) 1407 return (error); 1408 1409 INP_WLOCK_RECHECK(inp); 1410 if (optval > 0) 1411 tp->t_flags |= TF_SIGNATURE; 1412 else 1413 tp->t_flags &= ~TF_SIGNATURE; 1414 goto unlock_and_done; 1415#endif /* TCP_SIGNATURE */ 1416 1417 case TCP_NODELAY: 1418 case TCP_NOOPT: 1419 INP_WUNLOCK(inp); 1420 error = sooptcopyin(sopt, &optval, sizeof optval, 1421 sizeof optval); 1422 if (error) 1423 return (error); 1424 1425 INP_WLOCK_RECHECK(inp); 1426 switch (sopt->sopt_name) { 1427 case TCP_NODELAY: 1428 opt = TF_NODELAY; 1429 break; 1430 case TCP_NOOPT: 1431 opt = TF_NOOPT; 1432 break; 1433 default: 1434 opt = 0; /* dead code to fool gcc */ 1435 break; 1436 } 1437 1438 if (optval) 1439 tp->t_flags |= opt; 1440 else 1441 tp->t_flags &= ~opt; 1442unlock_and_done: 1443#ifdef TCP_OFFLOAD 1444 if (tp->t_flags & TF_TOE) { 1445 tcp_offload_ctloutput(tp, sopt->sopt_dir, 1446 sopt->sopt_name); 1447 } 1448#endif 1449 INP_WUNLOCK(inp); 1450 break; 1451 1452 case TCP_NOPUSH: 1453 INP_WUNLOCK(inp); 1454 error = sooptcopyin(sopt, &optval, sizeof optval, 1455 sizeof optval); 1456 if (error) 1457 return (error); 1458 1459 INP_WLOCK_RECHECK(inp); 1460 if (optval) 1461 tp->t_flags |= TF_NOPUSH; 1462 else if (tp->t_flags & TF_NOPUSH) { 1463 tp->t_flags &= ~TF_NOPUSH; 1464 if (TCPS_HAVEESTABLISHED(tp->t_state)) 1465 error = tcp_output(tp); 1466 } 1467 goto unlock_and_done; 1468 1469 case TCP_MAXSEG: 1470 INP_WUNLOCK(inp); 1471 error = sooptcopyin(sopt, &optval, sizeof optval, 1472 sizeof optval); 1473 if (error) 1474 return (error); 1475 1476 INP_WLOCK_RECHECK(inp); 1477 if (optval > 0 && optval <= tp->t_maxseg && 1478 optval + 40 >= V_tcp_minmss) 1479 tp->t_maxseg = optval; 1480 else 1481 error = EINVAL; 1482 goto unlock_and_done; 1483 1484 case TCP_INFO: 1485 INP_WUNLOCK(inp); 1486 error = EINVAL; 1487 break; 1488 1489 case TCP_CONGESTION: 1490 INP_WUNLOCK(inp); 1491 bzero(buf, sizeof(buf)); 1492 error = sooptcopyin(sopt, &buf, sizeof(buf), 1); 1493 if (error) 1494 break; 1495 INP_WLOCK_RECHECK(inp); 1496 /* 1497 * Return EINVAL if we can't find the requested cc algo. 1498 */ 1499 error = EINVAL; 1500 CC_LIST_RLOCK(); 1501 STAILQ_FOREACH(algo, &cc_list, entries) { 1502 if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) 1503 == 0) { 1504 /* We've found the requested algo. */ 1505 error = 0; 1506 /* 1507 * We hold a write lock over the tcb 1508 * so it's safe to do these things 1509 * without ordering concerns. 1510 */ 1511 if (CC_ALGO(tp)->cb_destroy != NULL) 1512 CC_ALGO(tp)->cb_destroy(tp->ccv); 1513 CC_ALGO(tp) = algo; 1514 /* 1515 * If something goes pear shaped 1516 * initialising the new algo, 1517 * fall back to newreno (which 1518 * does not require initialisation). 1519 */ 1520 if (algo->cb_init != NULL) 1521 if (algo->cb_init(tp->ccv) > 0) { 1522 CC_ALGO(tp) = &newreno_cc_algo; 1523 /* 1524 * The only reason init 1525 * should fail is 1526 * because of malloc. 1527 */ 1528 error = ENOMEM; 1529 } 1530 break; /* Break the STAILQ_FOREACH. */ 1531 } 1532 } 1533 CC_LIST_RUNLOCK(); 1534 goto unlock_and_done; 1535 1536 case TCP_KEEPIDLE: 1537 case TCP_KEEPINTVL: 1538 case TCP_KEEPINIT: 1539 INP_WUNLOCK(inp); 1540 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); 1541 if (error) 1542 return (error); 1543 1544 if (ui > (UINT_MAX / hz)) { 1545 error = EINVAL; 1546 break; 1547 } 1548 ui *= hz; 1549 1550 INP_WLOCK_RECHECK(inp); 1551 switch (sopt->sopt_name) { 1552 case TCP_KEEPIDLE: 1553 tp->t_keepidle = ui; 1554 /* 1555 * XXX: better check current remaining 1556 * timeout and "merge" it with new value. 1557 */ 1558 if ((tp->t_state > TCPS_LISTEN) && 1559 (tp->t_state <= TCPS_CLOSING)) 1560 tcp_timer_activate(tp, TT_KEEP, 1561 TP_KEEPIDLE(tp)); 1562 break; 1563 case TCP_KEEPINTVL: 1564 tp->t_keepintvl = ui; 1565 if ((tp->t_state == TCPS_FIN_WAIT_2) && 1566 (TP_MAXIDLE(tp) > 0)) 1567 tcp_timer_activate(tp, TT_2MSL, 1568 TP_MAXIDLE(tp)); 1569 break; 1570 case TCP_KEEPINIT: 1571 tp->t_keepinit = ui; 1572 if (tp->t_state == TCPS_SYN_RECEIVED || 1573 tp->t_state == TCPS_SYN_SENT) 1574 tcp_timer_activate(tp, TT_KEEP, 1575 TP_KEEPINIT(tp)); 1576 break; 1577 } 1578 goto unlock_and_done; 1579 1580 case TCP_KEEPCNT: 1581 INP_WUNLOCK(inp); 1582 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); 1583 if (error) 1584 return (error); 1585 1586 INP_WLOCK_RECHECK(inp); 1587 tp->t_keepcnt = ui; 1588 if ((tp->t_state == TCPS_FIN_WAIT_2) && 1589 (TP_MAXIDLE(tp) > 0)) 1590 tcp_timer_activate(tp, TT_2MSL, 1591 TP_MAXIDLE(tp)); 1592 goto unlock_and_done; 1593 1594#ifdef TCP_RFC7413 1595 case TCP_FASTOPEN: 1596 INP_WUNLOCK(inp); 1597 if (!V_tcp_fastopen_enabled) 1598 return (EPERM); 1599 1600 error = sooptcopyin(sopt, &optval, sizeof optval, 1601 sizeof optval); 1602 if (error) 1603 return (error); 1604 1605 INP_WLOCK_RECHECK(inp); 1606 if (optval) { 1607 tp->t_flags |= TF_FASTOPEN; 1608 if ((tp->t_state == TCPS_LISTEN) && 1609 (tp->t_tfo_pending == NULL)) 1610 tp->t_tfo_pending = 1611 tcp_fastopen_alloc_counter(); 1612 } else 1613 tp->t_flags &= ~TF_FASTOPEN; 1614 goto unlock_and_done; 1615#endif 1616 1617 default: 1618 INP_WUNLOCK(inp); 1619 error = ENOPROTOOPT; 1620 break; 1621 } 1622 break; 1623 1624 case SOPT_GET: 1625 tp = intotcpcb(inp); 1626 switch (sopt->sopt_name) { 1627#ifdef TCP_SIGNATURE 1628 case TCP_MD5SIG: 1629 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1630 INP_WUNLOCK(inp); 1631 error = sooptcopyout(sopt, &optval, sizeof optval); 1632 break; 1633#endif 1634 1635 case TCP_NODELAY: 1636 optval = tp->t_flags & TF_NODELAY; 1637 INP_WUNLOCK(inp); 1638 error = sooptcopyout(sopt, &optval, sizeof optval); 1639 break; 1640 case TCP_MAXSEG: 1641 optval = tp->t_maxseg; 1642 INP_WUNLOCK(inp); 1643 error = sooptcopyout(sopt, &optval, sizeof optval); 1644 break; 1645 case TCP_NOOPT: 1646 optval = tp->t_flags & TF_NOOPT; 1647 INP_WUNLOCK(inp); 1648 error = sooptcopyout(sopt, &optval, sizeof optval); 1649 break; 1650 case TCP_NOPUSH: 1651 optval = tp->t_flags & TF_NOPUSH; 1652 INP_WUNLOCK(inp); 1653 error = sooptcopyout(sopt, &optval, sizeof optval); 1654 break; 1655 case TCP_INFO: 1656 tcp_fill_info(tp, &ti); 1657 INP_WUNLOCK(inp); 1658 error = sooptcopyout(sopt, &ti, sizeof ti); 1659 break; 1660 case TCP_CONGESTION: 1661 bzero(buf, sizeof(buf)); 1662 strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); 1663 INP_WUNLOCK(inp); 1664 error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX); 1665 break; 1666 case TCP_KEEPIDLE: 1667 case TCP_KEEPINTVL: 1668 case TCP_KEEPINIT: 1669 case TCP_KEEPCNT: 1670 switch (sopt->sopt_name) { 1671 case TCP_KEEPIDLE: 1672 ui = tp->t_keepidle / hz; 1673 break; 1674 case TCP_KEEPINTVL: 1675 ui = tp->t_keepintvl / hz; 1676 break; 1677 case TCP_KEEPINIT: 1678 ui = tp->t_keepinit / hz; 1679 break; 1680 case TCP_KEEPCNT: 1681 ui = tp->t_keepcnt; 1682 break; 1683 } 1684 INP_WUNLOCK(inp); 1685 error = sooptcopyout(sopt, &ui, sizeof(ui)); 1686 break; 1687#ifdef TCP_RFC7413 1688 case TCP_FASTOPEN: 1689 optval = tp->t_flags & TF_FASTOPEN; 1690 INP_WUNLOCK(inp); 1691 error = sooptcopyout(sopt, &optval, sizeof optval); 1692 break; 1693#endif 1694 default: 1695 INP_WUNLOCK(inp); 1696 error = ENOPROTOOPT; 1697 break; 1698 } 1699 break; 1700 } 1701 return (error); 1702} 1703#undef INP_WLOCK_RECHECK 1704 1705/* 1706 * Attach TCP protocol to socket, allocating 1707 * internet protocol control block, tcp control block, 1708 * bufer space, and entering LISTEN state if to accept connections. 1709 */ 1710static int 1711tcp_attach(struct socket *so) 1712{ 1713 struct tcpcb *tp; 1714 struct inpcb *inp; 1715 int error; 1716 1717 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1718 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); 1719 if (error) 1720 return (error); 1721 } 1722 so->so_rcv.sb_flags |= SB_AUTOSIZE; 1723 so->so_snd.sb_flags |= SB_AUTOSIZE; 1724 INP_INFO_WLOCK(&V_tcbinfo); 1725 error = in_pcballoc(so, &V_tcbinfo); 1726 if (error) { 1727 INP_INFO_WUNLOCK(&V_tcbinfo); 1728 return (error); 1729 } 1730 inp = sotoinpcb(so); 1731#ifdef INET6 1732 if (inp->inp_vflag & INP_IPV6PROTO) { 1733 inp->inp_vflag |= INP_IPV6; 1734 inp->in6p_hops = -1; /* use kernel default */ 1735 } 1736 else 1737#endif 1738 inp->inp_vflag |= INP_IPV4; 1739 tp = tcp_newtcpcb(inp); 1740 if (tp == NULL) { 1741 in_pcbdetach(inp); 1742 in_pcbfree(inp); 1743 INP_INFO_WUNLOCK(&V_tcbinfo); 1744 return (ENOBUFS); 1745 } 1746 tp->t_state = TCPS_CLOSED; 1747 INP_WUNLOCK(inp); 1748 INP_INFO_WUNLOCK(&V_tcbinfo); 1749 return (0); 1750} 1751 1752/* 1753 * Initiate (or continue) disconnect. 1754 * If embryonic state, just send reset (once). 1755 * If in ``let data drain'' option and linger null, just drop. 1756 * Otherwise (hard), mark socket disconnecting and drop 1757 * current input data; switch states based on user close, and 1758 * send segment to peer (with FIN). 1759 */ 1760static void 1761tcp_disconnect(struct tcpcb *tp) 1762{ 1763 struct inpcb *inp = tp->t_inpcb; 1764 struct socket *so = inp->inp_socket; 1765 1766 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 1767 INP_WLOCK_ASSERT(inp); 1768 1769 /* 1770 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1771 * socket is still open. 1772 */ 1773 if (tp->t_state < TCPS_ESTABLISHED) { 1774 tp = tcp_close(tp); 1775 KASSERT(tp != NULL, 1776 ("tcp_disconnect: tcp_close() returned NULL")); 1777 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1778 tp = tcp_drop(tp, 0); 1779 KASSERT(tp != NULL, 1780 ("tcp_disconnect: tcp_drop() returned NULL")); 1781 } else { 1782 soisdisconnecting(so); 1783 sbflush(&so->so_rcv); 1784 tcp_usrclosed(tp); 1785 if (!(inp->inp_flags & INP_DROPPED)) 1786 tcp_output(tp); 1787 } 1788} 1789 1790/* 1791 * User issued close, and wish to trail through shutdown states: 1792 * if never received SYN, just forget it. If got a SYN from peer, 1793 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1794 * If already got a FIN from peer, then almost done; go to LAST_ACK 1795 * state. In all other cases, have already sent FIN to peer (e.g. 1796 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1797 * for peer to send FIN or not respond to keep-alives, etc. 1798 * We can let the user exit from the close as soon as the FIN is acked. 1799 */ 1800static void 1801tcp_usrclosed(struct tcpcb *tp) 1802{ 1803 1804 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 1805 INP_WLOCK_ASSERT(tp->t_inpcb); 1806 1807 switch (tp->t_state) { 1808 case TCPS_LISTEN: 1809#ifdef TCP_OFFLOAD 1810 tcp_offload_listen_stop(tp); 1811#endif 1812 tcp_state_change(tp, TCPS_CLOSED); 1813 /* FALLTHROUGH */ 1814 case TCPS_CLOSED: 1815 tp = tcp_close(tp); 1816 /* 1817 * tcp_close() should never return NULL here as the socket is 1818 * still open. 1819 */ 1820 KASSERT(tp != NULL, 1821 ("tcp_usrclosed: tcp_close() returned NULL")); 1822 break; 1823 1824 case TCPS_SYN_SENT: 1825 case TCPS_SYN_RECEIVED: 1826 tp->t_flags |= TF_NEEDFIN; 1827 break; 1828 1829 case TCPS_ESTABLISHED: 1830 tcp_state_change(tp, TCPS_FIN_WAIT_1); 1831 break; 1832 1833 case TCPS_CLOSE_WAIT: 1834 tcp_state_change(tp, TCPS_LAST_ACK); 1835 break; 1836 } 1837 if (tp->t_state >= TCPS_FIN_WAIT_2) { 1838 soisdisconnected(tp->t_inpcb->inp_socket); 1839 /* Prevent the connection hanging in FIN_WAIT_2 forever. */ 1840 if (tp->t_state == TCPS_FIN_WAIT_2) { 1841 int timeout; 1842 1843 timeout = (tcp_fast_finwait2_recycle) ? 1844 tcp_finwait2_timeout : TP_MAXIDLE(tp); 1845 tcp_timer_activate(tp, TT_2MSL, timeout); 1846 } 1847 } 1848} 1849 1850#ifdef DDB 1851static void 1852db_print_indent(int indent) 1853{ 1854 int i; 1855 1856 for (i = 0; i < indent; i++) 1857 db_printf(" "); 1858} 1859 1860static void 1861db_print_tstate(int t_state) 1862{ 1863 1864 switch (t_state) { 1865 case TCPS_CLOSED: 1866 db_printf("TCPS_CLOSED"); 1867 return; 1868 1869 case TCPS_LISTEN: 1870 db_printf("TCPS_LISTEN"); 1871 return; 1872 1873 case TCPS_SYN_SENT: 1874 db_printf("TCPS_SYN_SENT"); 1875 return; 1876 1877 case TCPS_SYN_RECEIVED: 1878 db_printf("TCPS_SYN_RECEIVED"); 1879 return; 1880 1881 case TCPS_ESTABLISHED: 1882 db_printf("TCPS_ESTABLISHED"); 1883 return; 1884 1885 case TCPS_CLOSE_WAIT: 1886 db_printf("TCPS_CLOSE_WAIT"); 1887 return; 1888 1889 case TCPS_FIN_WAIT_1: 1890 db_printf("TCPS_FIN_WAIT_1"); 1891 return; 1892 1893 case TCPS_CLOSING: 1894 db_printf("TCPS_CLOSING"); 1895 return; 1896 1897 case TCPS_LAST_ACK: 1898 db_printf("TCPS_LAST_ACK"); 1899 return; 1900 1901 case TCPS_FIN_WAIT_2: 1902 db_printf("TCPS_FIN_WAIT_2"); 1903 return; 1904 1905 case TCPS_TIME_WAIT: 1906 db_printf("TCPS_TIME_WAIT"); 1907 return; 1908 1909 default: 1910 db_printf("unknown"); 1911 return; 1912 } 1913} 1914 1915static void 1916db_print_tflags(u_int t_flags) 1917{ 1918 int comma; 1919 1920 comma = 0; 1921 if (t_flags & TF_ACKNOW) { 1922 db_printf("%sTF_ACKNOW", comma ? ", " : ""); 1923 comma = 1; 1924 } 1925 if (t_flags & TF_DELACK) { 1926 db_printf("%sTF_DELACK", comma ? ", " : ""); 1927 comma = 1; 1928 } 1929 if (t_flags & TF_NODELAY) { 1930 db_printf("%sTF_NODELAY", comma ? ", " : ""); 1931 comma = 1; 1932 } 1933 if (t_flags & TF_NOOPT) { 1934 db_printf("%sTF_NOOPT", comma ? ", " : ""); 1935 comma = 1; 1936 } 1937 if (t_flags & TF_SENTFIN) { 1938 db_printf("%sTF_SENTFIN", comma ? ", " : ""); 1939 comma = 1; 1940 } 1941 if (t_flags & TF_REQ_SCALE) { 1942 db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); 1943 comma = 1; 1944 } 1945 if (t_flags & TF_RCVD_SCALE) { 1946 db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); 1947 comma = 1; 1948 } 1949 if (t_flags & TF_REQ_TSTMP) { 1950 db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); 1951 comma = 1; 1952 } 1953 if (t_flags & TF_RCVD_TSTMP) { 1954 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); 1955 comma = 1; 1956 } 1957 if (t_flags & TF_SACK_PERMIT) { 1958 db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); 1959 comma = 1; 1960 } 1961 if (t_flags & TF_NEEDSYN) { 1962 db_printf("%sTF_NEEDSYN", comma ? ", " : ""); 1963 comma = 1; 1964 } 1965 if (t_flags & TF_NEEDFIN) { 1966 db_printf("%sTF_NEEDFIN", comma ? ", " : ""); 1967 comma = 1; 1968 } 1969 if (t_flags & TF_NOPUSH) { 1970 db_printf("%sTF_NOPUSH", comma ? ", " : ""); 1971 comma = 1; 1972 } 1973 if (t_flags & TF_MORETOCOME) { 1974 db_printf("%sTF_MORETOCOME", comma ? ", " : ""); 1975 comma = 1; 1976 } 1977 if (t_flags & TF_LQ_OVERFLOW) { 1978 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : ""); 1979 comma = 1; 1980 } 1981 if (t_flags & TF_LASTIDLE) { 1982 db_printf("%sTF_LASTIDLE", comma ? ", " : ""); 1983 comma = 1; 1984 } 1985 if (t_flags & TF_RXWIN0SENT) { 1986 db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); 1987 comma = 1; 1988 } 1989 if (t_flags & TF_FASTRECOVERY) { 1990 db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); 1991 comma = 1; 1992 } 1993 if (t_flags & TF_CONGRECOVERY) { 1994 db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); 1995 comma = 1; 1996 } 1997 if (t_flags & TF_WASFRECOVERY) { 1998 db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); 1999 comma = 1; 2000 } 2001 if (t_flags & TF_SIGNATURE) { 2002 db_printf("%sTF_SIGNATURE", comma ? ", " : ""); 2003 comma = 1; 2004 } 2005 if (t_flags & TF_FORCEDATA) { 2006 db_printf("%sTF_FORCEDATA", comma ? ", " : ""); 2007 comma = 1; 2008 } 2009 if (t_flags & TF_TSO) { 2010 db_printf("%sTF_TSO", comma ? ", " : ""); 2011 comma = 1; 2012 } 2013 if (t_flags & TF_ECN_PERMIT) { 2014 db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); 2015 comma = 1; 2016 } 2017 if (t_flags & TF_FASTOPEN) { 2018 db_printf("%sTF_FASTOPEN", comma ? ", " : ""); 2019 comma = 1; 2020 } 2021} 2022 2023static void 2024db_print_toobflags(char t_oobflags) 2025{ 2026 int comma; 2027 2028 comma = 0; 2029 if (t_oobflags & TCPOOB_HAVEDATA) { 2030 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); 2031 comma = 1; 2032 } 2033 if (t_oobflags & TCPOOB_HADDATA) { 2034 db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); 2035 comma = 1; 2036 } 2037} 2038 2039static void 2040db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) 2041{ 2042 2043 db_print_indent(indent); 2044 db_printf("%s at %p\n", name, tp); 2045 2046 indent += 2; 2047 2048 db_print_indent(indent); 2049 db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", 2050 LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); 2051 2052 db_print_indent(indent); 2053 db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", 2054 &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep); 2055 2056 db_print_indent(indent); 2057 db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl, 2058 &tp->t_timers->tt_delack, tp->t_inpcb); 2059 2060 db_print_indent(indent); 2061 db_printf("t_state: %d (", tp->t_state); 2062 db_print_tstate(tp->t_state); 2063 db_printf(")\n"); 2064 2065 db_print_indent(indent); 2066 db_printf("t_flags: 0x%x (", tp->t_flags); 2067 db_print_tflags(tp->t_flags); 2068 db_printf(")\n"); 2069 2070 db_print_indent(indent); 2071 db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n", 2072 tp->snd_una, tp->snd_max, tp->snd_nxt); 2073 2074 db_print_indent(indent); 2075 db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n", 2076 tp->snd_up, tp->snd_wl1, tp->snd_wl2); 2077 2078 db_print_indent(indent); 2079 db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n", 2080 tp->iss, tp->irs, tp->rcv_nxt); 2081 2082 db_print_indent(indent); 2083 db_printf("rcv_adv: 0x%08x rcv_wnd: %lu rcv_up: 0x%08x\n", 2084 tp->rcv_adv, tp->rcv_wnd, tp->rcv_up); 2085 2086 db_print_indent(indent); 2087 db_printf("snd_wnd: %lu snd_cwnd: %lu\n", 2088 tp->snd_wnd, tp->snd_cwnd); 2089 2090 db_print_indent(indent); 2091 db_printf("snd_ssthresh: %lu snd_recover: " 2092 "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); 2093 2094 db_print_indent(indent); 2095 db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n", 2096 tp->t_maxopd, tp->t_rcvtime, tp->t_starttime); 2097 2098 db_print_indent(indent); 2099 db_printf("t_rttime: %u t_rtsq: 0x%08x\n", 2100 tp->t_rtttime, tp->t_rtseq); 2101 2102 db_print_indent(indent); 2103 db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n", 2104 tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); 2105 2106 db_print_indent(indent); 2107 db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u " 2108 "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, 2109 tp->t_rttbest); 2110 2111 db_print_indent(indent); 2112 db_printf("t_rttupdated: %lu max_sndwnd: %lu t_softerror: %d\n", 2113 tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); 2114 2115 db_print_indent(indent); 2116 db_printf("t_oobflags: 0x%x (", tp->t_oobflags); 2117 db_print_toobflags(tp->t_oobflags); 2118 db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); 2119 2120 db_print_indent(indent); 2121 db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", 2122 tp->snd_scale, tp->rcv_scale, tp->request_r_scale); 2123 2124 db_print_indent(indent); 2125 db_printf("ts_recent: %u ts_recent_age: %u\n", 2126 tp->ts_recent, tp->ts_recent_age); 2127 2128 db_print_indent(indent); 2129 db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: " 2130 "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev); 2131 2132 db_print_indent(indent); 2133 db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x " 2134 "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, 2135 tp->snd_recover_prev, tp->t_badrxtwin); 2136 2137 db_print_indent(indent); 2138 db_printf("snd_numholes: %d snd_holes first: %p\n", 2139 tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes)); 2140 2141 db_print_indent(indent); 2142 db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: " 2143 "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata); 2144 2145 /* Skip sackblks, sackhint. */ 2146 2147 db_print_indent(indent); 2148 db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n", 2149 tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt); 2150} 2151 2152DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) 2153{ 2154 struct tcpcb *tp; 2155 2156 if (!have_addr) { 2157 db_printf("usage: show tcpcb <addr>\n"); 2158 return; 2159 } 2160 tp = (struct tcpcb *)addr; 2161 2162 db_print_tcpcb(tp, "tcpcb", 0); 2163} 2164#endif 2165