tcp_usrreq.c revision 18795
1/* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 34 * $Id: tcp_usrreq.c,v 1.25 1996/09/13 23:51:44 pst Exp $ 35 */ 36 37#include <sys/param.h> 38#include <sys/queue.h> 39#include <sys/systm.h> 40#include <sys/kernel.h> 41#include <sys/sysctl.h> 42#include <sys/malloc.h> 43#include <sys/mbuf.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <sys/protosw.h> 47#include <sys/errno.h> 48#include <sys/stat.h> 49 50#include <net/if.h> 51#include <net/route.h> 52 53#include <netinet/in.h> 54#include <netinet/in_systm.h> 55#include <netinet/ip.h> 56#include <netinet/in_pcb.h> 57#include <netinet/in_var.h> 58#include <netinet/ip_var.h> 59#include <netinet/tcp.h> 60#include <netinet/tcp_fsm.h> 61#include <netinet/tcp_seq.h> 62#include <netinet/tcp_timer.h> 63#include <netinet/tcp_var.h> 64#include <netinet/tcpip.h> 65#ifdef TCPDEBUG 66#include <netinet/tcp_debug.h> 67#endif 68 69/* 70 * TCP protocol interface to socket abstraction. 71 */ 72extern char *tcpstates[]; 73 74static int tcp_attach __P((struct socket *)); 75static int tcp_connect __P((struct tcpcb *, struct mbuf *)); 76static struct tcpcb * 77 tcp_disconnect __P((struct tcpcb *)); 78static struct tcpcb * 79 tcp_usrclosed __P((struct tcpcb *)); 80 81#ifdef notdef 82/* 83 * Process a TCP user request for TCP tb. If this is a send request 84 * then m is the mbuf chain of send data. If this is a timer expiration 85 * (called from the software clock routine), then timertype tells which timer. 86 */ 87/*ARGSUSED*/ 88int 89tcp_usrreq(so, req, m, nam, control) 90 struct socket *so; 91 int req; 92 struct mbuf *m, *nam, *control; 93{ 94 register struct inpcb *inp; 95 register struct tcpcb *tp = 0; 96 struct sockaddr_in *sinp; 97 int s; 98 int error = 0; 99#ifdef TCPDEBUG 100 int ostate; 101#endif 102 103 if (req == PRU_CONTROL) 104 return (in_control(so, (u_long)m, (caddr_t)nam, 105 (struct ifnet *)control)); 106 if (control && control->m_len) { 107 m_freem(control); 108 if (m) 109 m_freem(m); 110 return (EINVAL); 111 } 112 113 s = splnet(); 114 inp = sotoinpcb(so); 115 /* 116 * When a TCP is attached to a socket, then there will be 117 * a (struct inpcb) pointed at by the socket, and this 118 * structure will point at a subsidary (struct tcpcb). 119 */ 120 if (inp == 0 && req != PRU_ATTACH) { 121 splx(s); 122#if 0 123 /* 124 * The following corrects an mbuf leak under rare 125 * circumstances, but has not been fully tested. 126 */ 127 if (m && req != PRU_SENSE) 128 m_freem(m); 129#else 130 /* safer version of fix for mbuf leak */ 131 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 132 m_freem(m); 133#endif 134 return (EINVAL); /* XXX */ 135 } 136 if (inp) { 137 tp = intotcpcb(inp); 138 /* WHAT IF TP IS 0? */ 139#ifdef KPROF 140 tcp_acounts[tp->t_state][req]++; 141#endif 142#ifdef TCPDEBUG 143 ostate = tp->t_state; 144 } else 145 ostate = 0; 146#else /* TCPDEBUG */ 147 } 148#endif /* TCPDEBUG */ 149 150 switch (req) { 151 152 /* 153 * TCP attaches to socket via PRU_ATTACH, reserving space, 154 * and an internet control block. 155 */ 156 case PRU_ATTACH: 157 if (inp) { 158 error = EISCONN; 159 break; 160 } 161 error = tcp_attach(so); 162 if (error) 163 break; 164 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 165 so->so_linger = TCP_LINGERTIME * hz; 166 tp = sototcpcb(so); 167 break; 168 169 /* 170 * PRU_DETACH detaches the TCP protocol from the socket. 171 * If the protocol state is non-embryonic, then can't 172 * do this directly: have to initiate a PRU_DISCONNECT, 173 * which may finish later; embryonic TCB's can just 174 * be discarded here. 175 */ 176 case PRU_DETACH: 177 if (tp->t_state > TCPS_LISTEN) 178 tp = tcp_disconnect(tp); 179 else 180 tp = tcp_close(tp); 181 break; 182 183 /* 184 * Give the socket an address. 185 */ 186 case PRU_BIND: 187 /* 188 * Must check for multicast addresses and disallow binding 189 * to them. 190 */ 191 sinp = mtod(nam, struct sockaddr_in *); 192 if (sinp->sin_family == AF_INET && 193 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 194 error = EAFNOSUPPORT; 195 break; 196 } 197 error = in_pcbbind(inp, nam); 198 if (error) 199 break; 200 break; 201 202 /* 203 * Prepare to accept connections. 204 */ 205 case PRU_LISTEN: 206 if (inp->inp_lport == 0) 207 error = in_pcbbind(inp, NULL); 208 if (error == 0) 209 tp->t_state = TCPS_LISTEN; 210 break; 211 212 /* 213 * Initiate connection to peer. 214 * Create a template for use in transmissions on this connection. 215 * Enter SYN_SENT state, and mark socket as connecting. 216 * Start keep-alive timer, and seed output sequence space. 217 * Send initial segment on connection. 218 */ 219 case PRU_CONNECT: 220 /* 221 * Must disallow TCP ``connections'' to multicast addresses. 222 */ 223 sinp = mtod(nam, struct sockaddr_in *); 224 if (sinp->sin_family == AF_INET 225 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 226 error = EAFNOSUPPORT; 227 break; 228 } 229 230 if ((error = tcp_connect(tp, nam)) != 0) 231 break; 232 error = tcp_output(tp); 233 break; 234 235 /* 236 * Create a TCP connection between two sockets. 237 */ 238 case PRU_CONNECT2: 239 error = EOPNOTSUPP; 240 break; 241 242 /* 243 * Initiate disconnect from peer. 244 * If connection never passed embryonic stage, just drop; 245 * else if don't need to let data drain, then can just drop anyways, 246 * else have to begin TCP shutdown process: mark socket disconnecting, 247 * drain unread data, state switch to reflect user close, and 248 * send segment (e.g. FIN) to peer. Socket will be really disconnected 249 * when peer sends FIN and acks ours. 250 * 251 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 252 */ 253 case PRU_DISCONNECT: 254 tp = tcp_disconnect(tp); 255 break; 256 257 /* 258 * Accept a connection. Essentially all the work is 259 * done at higher levels; just return the address 260 * of the peer, storing through addr. 261 */ 262 case PRU_ACCEPT: 263 in_setpeeraddr(inp, nam); 264 break; 265 266 /* 267 * Mark the connection as being incapable of further output. 268 */ 269 case PRU_SHUTDOWN: 270 socantsendmore(so); 271 tp = tcp_usrclosed(tp); 272 if (tp) 273 error = tcp_output(tp); 274 break; 275 276 /* 277 * After a receive, possibly send window update to peer. 278 */ 279 case PRU_RCVD: 280 (void) tcp_output(tp); 281 break; 282 283 /* 284 * Do a send by putting data in output queue and updating urgent 285 * marker if URG set. Possibly send more data. 286 */ 287 case PRU_SEND_EOF: 288 case PRU_SEND: 289 sbappend(&so->so_snd, m); 290 if (nam && tp->t_state < TCPS_SYN_SENT) { 291 /* 292 * Do implied connect if not yet connected, 293 * initialize window to default value, and 294 * initialize maxseg/maxopd using peer's cached 295 * MSS. 296 */ 297 error = tcp_connect(tp, nam); 298 if (error) 299 break; 300 tp->snd_wnd = TTCP_CLIENT_SND_WND; 301 tcp_mss(tp, -1); 302 } 303 304 if (req == PRU_SEND_EOF) { 305 /* 306 * Close the send side of the connection after 307 * the data is sent. 308 */ 309 socantsendmore(so); 310 tp = tcp_usrclosed(tp); 311 } 312 if (tp != NULL) 313 error = tcp_output(tp); 314 break; 315 316 /* 317 * Abort the TCP. 318 */ 319 case PRU_ABORT: 320 tp = tcp_drop(tp, ECONNABORTED); 321 break; 322 323 case PRU_SENSE: 324 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 325 (void) splx(s); 326 return (0); 327 328 case PRU_RCVOOB: 329 if ((so->so_oobmark == 0 && 330 (so->so_state & SS_RCVATMARK) == 0) || 331 so->so_options & SO_OOBINLINE || 332 tp->t_oobflags & TCPOOB_HADDATA) { 333 error = EINVAL; 334 break; 335 } 336 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 337 error = EWOULDBLOCK; 338 break; 339 } 340 m->m_len = 1; 341 *mtod(m, caddr_t) = tp->t_iobc; 342 if (((int)nam & MSG_PEEK) == 0) 343 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 344 break; 345 346 case PRU_SENDOOB: 347 if (sbspace(&so->so_snd) < -512) { 348 m_freem(m); 349 error = ENOBUFS; 350 break; 351 } 352 /* 353 * According to RFC961 (Assigned Protocols), 354 * the urgent pointer points to the last octet 355 * of urgent data. We continue, however, 356 * to consider it to indicate the first octet 357 * of data past the urgent section. 358 * Otherwise, snd_up should be one lower. 359 */ 360 sbappend(&so->so_snd, m); 361 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 362 tp->t_force = 1; 363 error = tcp_output(tp); 364 tp->t_force = 0; 365 break; 366 367 case PRU_SOCKADDR: 368 in_setsockaddr(inp, nam); 369 break; 370 371 case PRU_PEERADDR: 372 in_setpeeraddr(inp, nam); 373 break; 374 375 /* 376 * TCP slow timer went off; going through this 377 * routine for tracing's sake. 378 */ 379 case PRU_SLOWTIMO: 380 tp = tcp_timers(tp, (int)nam); 381#ifdef TCPDEBUG 382 req |= (int)nam << 8; /* for debug's sake */ 383#endif 384 break; 385 386 default: 387 panic("tcp_usrreq"); 388 } 389#ifdef TCPDEBUG 390 if (tp && (so->so_options & SO_DEBUG)) 391 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); 392#endif 393 splx(s); 394 return (error); 395} 396#endif 397 398#ifdef TCPDEBUG 399#define TCPDEBUG0 int ostate 400#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 401#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 402 tcp_trace(TA_USER, ostate, tp, 0, req) 403#else 404#define TCPDEBUG0 405#define TCPDEBUG1() 406#define TCPDEBUG2(req) 407#endif 408 409/* 410 * TCP attaches to socket via pru_attach(), reserving space, 411 * and an internet control block. 412 */ 413static int 414tcp_usr_attach(struct socket *so, int proto) 415{ 416 int s = splnet(); 417 int error; 418 struct inpcb *inp = sotoinpcb(so); 419 struct tcpcb *tp = 0; 420 TCPDEBUG0; 421 422 TCPDEBUG1(); 423 if (inp) { 424 error = EISCONN; 425 goto out; 426 } 427 428 error = tcp_attach(so); 429 if (error) 430 goto out; 431 432 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 433 so->so_linger = TCP_LINGERTIME * hz; 434 tp = sototcpcb(so); 435out: 436 TCPDEBUG2(PRU_ATTACH); 437 splx(s); 438 return error; 439} 440 441/* 442 * pru_detach() detaches the TCP protocol from the socket. 443 * If the protocol state is non-embryonic, then can't 444 * do this directly: have to initiate a pru_disconnect(), 445 * which may finish later; embryonic TCB's can just 446 * be discarded here. 447 */ 448static int 449tcp_usr_detach(struct socket *so) 450{ 451 int s = splnet(); 452 int error = 0; 453 struct inpcb *inp = sotoinpcb(so); 454 struct tcpcb *tp; 455 TCPDEBUG0; 456 457 if (inp == 0) { 458 splx(s); 459 return EINVAL; /* XXX */ 460 } 461 tp = intotcpcb(inp); 462 TCPDEBUG1(); 463 if (tp->t_state > TCPS_LISTEN) 464 tp = tcp_disconnect(tp); 465 else 466 tp = tcp_close(tp); 467 468 TCPDEBUG2(PRU_DETACH); 469 splx(s); 470 return error; 471} 472 473#define COMMON_START() TCPDEBUG0; \ 474 do { \ 475 if (inp == 0) { \ 476 splx(s); \ 477 return EINVAL; \ 478 } \ 479 tp = intotcpcb(inp); \ 480 TCPDEBUG1(); \ 481 } while(0) 482 483#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out 484 485 486/* 487 * Give the socket an address. 488 */ 489static int 490tcp_usr_bind(struct socket *so, struct mbuf *nam) 491{ 492 int s = splnet(); 493 int error = 0; 494 struct inpcb *inp = sotoinpcb(so); 495 struct tcpcb *tp; 496 struct sockaddr_in *sinp; 497 498 COMMON_START(); 499 500 /* 501 * Must check for multicast addresses and disallow binding 502 * to them. 503 */ 504 sinp = mtod(nam, struct sockaddr_in *); 505 if (sinp->sin_family == AF_INET && 506 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 507 error = EAFNOSUPPORT; 508 goto out; 509 } 510 error = in_pcbbind(inp, nam); 511 if (error) 512 goto out; 513 COMMON_END(PRU_BIND); 514 515} 516 517/* 518 * Prepare to accept connections. 519 */ 520static int 521tcp_usr_listen(struct socket *so) 522{ 523 int s = splnet(); 524 int error = 0; 525 struct inpcb *inp = sotoinpcb(so); 526 struct tcpcb *tp; 527 528 COMMON_START(); 529 if (inp->inp_lport == 0) 530 error = in_pcbbind(inp, NULL); 531 if (error == 0) 532 tp->t_state = TCPS_LISTEN; 533 COMMON_END(PRU_LISTEN); 534} 535 536/* 537 * Initiate connection to peer. 538 * Create a template for use in transmissions on this connection. 539 * Enter SYN_SENT state, and mark socket as connecting. 540 * Start keep-alive timer, and seed output sequence space. 541 * Send initial segment on connection. 542 */ 543static int 544tcp_usr_connect(struct socket *so, struct mbuf *nam) 545{ 546 int s = splnet(); 547 int error = 0; 548 struct inpcb *inp = sotoinpcb(so); 549 struct tcpcb *tp; 550 struct sockaddr_in *sinp; 551 552 COMMON_START(); 553 554 /* 555 * Must disallow TCP ``connections'' to multicast addresses. 556 */ 557 sinp = mtod(nam, struct sockaddr_in *); 558 if (sinp->sin_family == AF_INET 559 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 560 error = EAFNOSUPPORT; 561 goto out; 562 } 563 564 if ((error = tcp_connect(tp, nam)) != 0) 565 goto out; 566 error = tcp_output(tp); 567 COMMON_END(PRU_CONNECT); 568} 569 570/* 571 * Initiate disconnect from peer. 572 * If connection never passed embryonic stage, just drop; 573 * else if don't need to let data drain, then can just drop anyways, 574 * else have to begin TCP shutdown process: mark socket disconnecting, 575 * drain unread data, state switch to reflect user close, and 576 * send segment (e.g. FIN) to peer. Socket will be really disconnected 577 * when peer sends FIN and acks ours. 578 * 579 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 580 */ 581static int 582tcp_usr_disconnect(struct socket *so) 583{ 584 int s = splnet(); 585 int error = 0; 586 struct inpcb *inp = sotoinpcb(so); 587 struct tcpcb *tp; 588 589 COMMON_START(); 590 tp = tcp_disconnect(tp); 591 COMMON_END(PRU_DISCONNECT); 592} 593 594/* 595 * Accept a connection. Essentially all the work is 596 * done at higher levels; just return the address 597 * of the peer, storing through addr. 598 */ 599static int 600tcp_usr_accept(struct socket *so, struct mbuf *nam) 601{ 602 int s = splnet(); 603 int error = 0; 604 struct inpcb *inp = sotoinpcb(so); 605 struct tcpcb *tp; 606 607 COMMON_START(); 608 in_setpeeraddr(inp, nam); 609 COMMON_END(PRU_ACCEPT); 610} 611 612/* 613 * Mark the connection as being incapable of further output. 614 */ 615static int 616tcp_usr_shutdown(struct socket *so) 617{ 618 int s = splnet(); 619 int error = 0; 620 struct inpcb *inp = sotoinpcb(so); 621 struct tcpcb *tp; 622 623 COMMON_START(); 624 socantsendmore(so); 625 tp = tcp_usrclosed(tp); 626 if (tp) 627 error = tcp_output(tp); 628 COMMON_END(PRU_SHUTDOWN); 629} 630 631/* 632 * After a receive, possibly send window update to peer. 633 */ 634static int 635tcp_usr_rcvd(struct socket *so, int flags) 636{ 637 int s = splnet(); 638 int error = 0; 639 struct inpcb *inp = sotoinpcb(so); 640 struct tcpcb *tp; 641 642 COMMON_START(); 643 tcp_output(tp); 644 COMMON_END(PRU_RCVD); 645} 646 647/* 648 * Do a send by putting data in output queue and updating urgent 649 * marker if URG set. Possibly send more data. 650 */ 651static int 652tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam, 653 struct mbuf *control) 654{ 655 int s = splnet(); 656 int error = 0; 657 struct inpcb *inp = sotoinpcb(so); 658 struct tcpcb *tp; 659 660 COMMON_START(); 661 if (control && control->m_len) { 662 m_freem(control); /* XXX shouldn't caller do this??? */ 663 if (m) 664 m_freem(m); 665 return EINVAL; 666 } 667 668 if(!(flags & PRUS_OOB)) { 669 sbappend(&so->so_snd, m); 670 if (nam && tp->t_state < TCPS_SYN_SENT) { 671 /* 672 * Do implied connect if not yet connected, 673 * initialize window to default value, and 674 * initialize maxseg/maxopd using peer's cached 675 * MSS. 676 */ 677 error = tcp_connect(tp, nam); 678 if (error) 679 goto out; 680 tp->snd_wnd = TTCP_CLIENT_SND_WND; 681 tcp_mss(tp, -1); 682 } 683 684 if (flags & PRUS_EOF) { 685 /* 686 * Close the send side of the connection after 687 * the data is sent. 688 */ 689 socantsendmore(so); 690 tp = tcp_usrclosed(tp); 691 } 692 if (tp != NULL) 693 error = tcp_output(tp); 694 } else { 695 if (sbspace(&so->so_snd) < -512) { 696 m_freem(m); 697 error = ENOBUFS; 698 goto out; 699 } 700 /* 701 * According to RFC961 (Assigned Protocols), 702 * the urgent pointer points to the last octet 703 * of urgent data. We continue, however, 704 * to consider it to indicate the first octet 705 * of data past the urgent section. 706 * Otherwise, snd_up should be one lower. 707 */ 708 sbappend(&so->so_snd, m); 709 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 710 tp->t_force = 1; 711 error = tcp_output(tp); 712 tp->t_force = 0; 713 } 714 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 715 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 716} 717 718/* 719 * Abort the TCP. 720 */ 721static int 722tcp_usr_abort(struct socket *so) 723{ 724 int s = splnet(); 725 int error = 0; 726 struct inpcb *inp = sotoinpcb(so); 727 struct tcpcb *tp; 728 729 COMMON_START(); 730 tp = tcp_drop(tp, ECONNABORTED); 731 COMMON_END(PRU_ABORT); 732} 733 734/* 735 * Fill in st_bklsize for fstat() operations on a socket. 736 */ 737static int 738tcp_usr_sense(struct socket *so, struct stat *sb) 739{ 740 int s = splnet(); 741 742 sb->st_blksize = so->so_snd.sb_hiwat; 743 splx(s); 744 return 0; 745} 746 747/* 748 * Receive out-of-band data. 749 */ 750static int 751tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 752{ 753 int s = splnet(); 754 int error = 0; 755 struct inpcb *inp = sotoinpcb(so); 756 struct tcpcb *tp; 757 758 COMMON_START(); 759 if ((so->so_oobmark == 0 && 760 (so->so_state & SS_RCVATMARK) == 0) || 761 so->so_options & SO_OOBINLINE || 762 tp->t_oobflags & TCPOOB_HADDATA) { 763 error = EINVAL; 764 goto out; 765 } 766 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 767 error = EWOULDBLOCK; 768 goto out; 769 } 770 m->m_len = 1; 771 *mtod(m, caddr_t) = tp->t_iobc; 772 if ((flags & MSG_PEEK) == 0) 773 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 774 COMMON_END(PRU_RCVOOB); 775} 776 777static int 778tcp_usr_sockaddr(struct socket *so, struct mbuf *nam) 779{ 780 int s = splnet(); 781 int error = 0; 782 struct inpcb *inp = sotoinpcb(so); 783 struct tcpcb *tp; 784 785 COMMON_START(); 786 in_setsockaddr(inp, nam); 787 COMMON_END(PRU_SOCKADDR); 788} 789 790static int 791tcp_usr_peeraddr(struct socket *so, struct mbuf *nam) 792{ 793 int s = splnet(); 794 int error = 0; 795 struct inpcb *inp = sotoinpcb(so); 796 struct tcpcb *tp; 797 798 COMMON_START(); 799 in_setpeeraddr(inp, nam); 800 COMMON_END(PRU_PEERADDR); 801} 802 803/* 804 * XXX - this should just be a call to in_control, but we need to get 805 * the types worked out. 806 */ 807static int 808tcp_usr_control(struct socket *so, int cmd, caddr_t arg, struct ifnet *ifp) 809{ 810 return in_control(so, cmd, arg, ifp); 811} 812 813/* xxx - should be const */ 814struct pr_usrreqs tcp_usrreqs = { 815 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, 816 tcp_usr_connect, pru_connect2_notsupp, tcp_usr_control, tcp_usr_detach, 817 tcp_usr_disconnect, tcp_usr_listen, tcp_usr_peeraddr, tcp_usr_rcvd, 818 tcp_usr_rcvoob, tcp_usr_send, tcp_usr_sense, tcp_usr_shutdown, 819 tcp_usr_sockaddr 820}; 821 822/* 823 * Common subroutine to open a TCP connection to remote host specified 824 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 825 * port number if needed. Call in_pcbladdr to do the routing and to choose 826 * a local host address (interface). If there is an existing incarnation 827 * of the same connection in TIME-WAIT state and if the remote host was 828 * sending CC options and if the connection duration was < MSL, then 829 * truncate the previous TIME-WAIT state and proceed. 830 * Initialize connection parameters and enter SYN-SENT state. 831 */ 832static int 833tcp_connect(tp, nam) 834 register struct tcpcb *tp; 835 struct mbuf *nam; 836{ 837 struct inpcb *inp = tp->t_inpcb, *oinp; 838 struct socket *so = inp->inp_socket; 839 struct tcpcb *otp; 840 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); 841 struct sockaddr_in *ifaddr; 842 int error; 843 struct rmxp_tao *taop; 844 struct rmxp_tao tao_noncached; 845 846 if (inp->inp_lport == 0) { 847 error = in_pcbbind(inp, NULL); 848 if (error) 849 return error; 850 } 851 852 /* 853 * Cannot simply call in_pcbconnect, because there might be an 854 * earlier incarnation of this same connection still in 855 * TIME_WAIT state, creating an ADDRINUSE error. 856 */ 857 error = in_pcbladdr(inp, nam, &ifaddr); 858 if (error) 859 return error; 860 oinp = in_pcblookuphash(inp->inp_pcbinfo, 861 sin->sin_addr, sin->sin_port, 862 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr 863 : ifaddr->sin_addr, 864 inp->inp_lport, 0); 865 if (oinp) { 866 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 867 otp->t_state == TCPS_TIME_WAIT && 868 otp->t_duration < TCPTV_MSL && 869 (otp->t_flags & TF_RCVD_CC)) 870 otp = tcp_close(otp); 871 else 872 return EADDRINUSE; 873 } 874 if (inp->inp_laddr.s_addr == INADDR_ANY) 875 inp->inp_laddr = ifaddr->sin_addr; 876 inp->inp_faddr = sin->sin_addr; 877 inp->inp_fport = sin->sin_port; 878 in_pcbrehash(inp); 879 880 tp->t_template = tcp_template(tp); 881 if (tp->t_template == 0) { 882 in_pcbdisconnect(inp); 883 return ENOBUFS; 884 } 885 886 /* Compute window scaling to request. */ 887 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 888 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 889 tp->request_r_scale++; 890 891 soisconnecting(so); 892 tcpstat.tcps_connattempt++; 893 tp->t_state = TCPS_SYN_SENT; 894 tp->t_timer[TCPT_KEEP] = tcp_keepinit; 895 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; 896 tcp_sendseqinit(tp); 897 898 /* 899 * Generate a CC value for this connection and 900 * check whether CC or CCnew should be used. 901 */ 902 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) { 903 taop = &tao_noncached; 904 bzero(taop, sizeof(*taop)); 905 } 906 907 tp->cc_send = CC_INC(tcp_ccgen); 908 if (taop->tao_ccsent != 0 && 909 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 910 taop->tao_ccsent = tp->cc_send; 911 } else { 912 taop->tao_ccsent = 0; 913 tp->t_flags |= TF_SENDCCNEW; 914 } 915 916 return 0; 917} 918 919int 920tcp_ctloutput(op, so, level, optname, mp) 921 int op; 922 struct socket *so; 923 int level, optname; 924 struct mbuf **mp; 925{ 926 int error = 0, s; 927 struct inpcb *inp; 928 register struct tcpcb *tp; 929 register struct mbuf *m; 930 register int i; 931 932 s = splnet(); 933 inp = sotoinpcb(so); 934 if (inp == NULL) { 935 splx(s); 936 if (op == PRCO_SETOPT && *mp) 937 (void) m_free(*mp); 938 return (ECONNRESET); 939 } 940 if (level != IPPROTO_TCP) { 941 error = ip_ctloutput(op, so, level, optname, mp); 942 splx(s); 943 return (error); 944 } 945 tp = intotcpcb(inp); 946 947 switch (op) { 948 949 case PRCO_SETOPT: 950 m = *mp; 951 switch (optname) { 952 953 case TCP_NODELAY: 954 if (m == NULL || m->m_len < sizeof (int)) 955 error = EINVAL; 956 else if (*mtod(m, int *)) 957 tp->t_flags |= TF_NODELAY; 958 else 959 tp->t_flags &= ~TF_NODELAY; 960 break; 961 962 case TCP_MAXSEG: 963 if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg) 964 tp->t_maxseg = i; 965 else 966 error = EINVAL; 967 break; 968 969 case TCP_NOOPT: 970 if (m == NULL || m->m_len < sizeof (int)) 971 error = EINVAL; 972 else if (*mtod(m, int *)) 973 tp->t_flags |= TF_NOOPT; 974 else 975 tp->t_flags &= ~TF_NOOPT; 976 break; 977 978 case TCP_NOPUSH: 979 if (m == NULL || m->m_len < sizeof (int)) 980 error = EINVAL; 981 else if (*mtod(m, int *)) 982 tp->t_flags |= TF_NOPUSH; 983 else 984 tp->t_flags &= ~TF_NOPUSH; 985 break; 986 987 default: 988 error = ENOPROTOOPT; 989 break; 990 } 991 if (m) 992 (void) m_free(m); 993 break; 994 995 case PRCO_GETOPT: 996 *mp = m = m_get(M_WAIT, MT_SOOPTS); 997 m->m_len = sizeof(int); 998 999 switch (optname) { 1000 case TCP_NODELAY: 1001 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 1002 break; 1003 case TCP_MAXSEG: 1004 *mtod(m, int *) = tp->t_maxseg; 1005 break; 1006 case TCP_NOOPT: 1007 *mtod(m, int *) = tp->t_flags & TF_NOOPT; 1008 break; 1009 case TCP_NOPUSH: 1010 *mtod(m, int *) = tp->t_flags & TF_NOPUSH; 1011 break; 1012 default: 1013 error = ENOPROTOOPT; 1014 break; 1015 } 1016 break; 1017 } 1018 splx(s); 1019 return (error); 1020} 1021 1022/* 1023 * tcp_sendspace and tcp_recvspace are the default send and receive window 1024 * sizes, respectively. These are obsolescent (this information should 1025 * be set by the route). 1026 */ 1027u_long tcp_sendspace = 1024*16; 1028SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, 1029 CTLFLAG_RW, &tcp_sendspace , 0, ""); 1030u_long tcp_recvspace = 1024*16; 1031SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, 1032 CTLFLAG_RW, &tcp_recvspace , 0, ""); 1033 1034/* 1035 * Attach TCP protocol to socket, allocating 1036 * internet protocol control block, tcp control block, 1037 * bufer space, and entering LISTEN state if to accept connections. 1038 */ 1039static int 1040tcp_attach(so) 1041 struct socket *so; 1042{ 1043 register struct tcpcb *tp; 1044 struct inpcb *inp; 1045 int error; 1046 1047 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1048 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1049 if (error) 1050 return (error); 1051 } 1052 error = in_pcballoc(so, &tcbinfo); 1053 if (error) 1054 return (error); 1055 inp = sotoinpcb(so); 1056 tp = tcp_newtcpcb(inp); 1057 if (tp == 0) { 1058 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 1059 1060 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 1061 in_pcbdetach(inp); 1062 so->so_state |= nofd; 1063 return (ENOBUFS); 1064 } 1065 tp->t_state = TCPS_CLOSED; 1066 return (0); 1067} 1068 1069/* 1070 * Initiate (or continue) disconnect. 1071 * If embryonic state, just send reset (once). 1072 * If in ``let data drain'' option and linger null, just drop. 1073 * Otherwise (hard), mark socket disconnecting and drop 1074 * current input data; switch states based on user close, and 1075 * send segment to peer (with FIN). 1076 */ 1077static struct tcpcb * 1078tcp_disconnect(tp) 1079 register struct tcpcb *tp; 1080{ 1081 struct socket *so = tp->t_inpcb->inp_socket; 1082 1083 if (tp->t_state < TCPS_ESTABLISHED) 1084 tp = tcp_close(tp); 1085 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1086 tp = tcp_drop(tp, 0); 1087 else { 1088 soisdisconnecting(so); 1089 sbflush(&so->so_rcv); 1090 tp = tcp_usrclosed(tp); 1091 if (tp) 1092 (void) tcp_output(tp); 1093 } 1094 return (tp); 1095} 1096 1097/* 1098 * User issued close, and wish to trail through shutdown states: 1099 * if never received SYN, just forget it. If got a SYN from peer, 1100 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1101 * If already got a FIN from peer, then almost done; go to LAST_ACK 1102 * state. In all other cases, have already sent FIN to peer (e.g. 1103 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1104 * for peer to send FIN or not respond to keep-alives, etc. 1105 * We can let the user exit from the close as soon as the FIN is acked. 1106 */ 1107static struct tcpcb * 1108tcp_usrclosed(tp) 1109 register struct tcpcb *tp; 1110{ 1111 1112 switch (tp->t_state) { 1113 1114 case TCPS_CLOSED: 1115 case TCPS_LISTEN: 1116 tp->t_state = TCPS_CLOSED; 1117 tp = tcp_close(tp); 1118 break; 1119 1120 case TCPS_SYN_SENT: 1121 case TCPS_SYN_RECEIVED: 1122 tp->t_flags |= TF_NEEDFIN; 1123 break; 1124 1125 case TCPS_ESTABLISHED: 1126 tp->t_state = TCPS_FIN_WAIT_1; 1127 break; 1128 1129 case TCPS_CLOSE_WAIT: 1130 tp->t_state = TCPS_LAST_ACK; 1131 break; 1132 } 1133 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1134 soisdisconnected(tp->t_inpcb->inp_socket); 1135 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1136 if (tp->t_state == TCPS_FIN_WAIT_2) 1137 tp->t_timer[TCPT_2MSL] = tcp_maxidle; 1138 } 1139 return (tp); 1140} 1141 1142