Deleted Added
full compact
tcp_usrreq.c (54526) tcp_usrreq.c (55009)
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
34 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 54526 1999-12-13 00:39:20Z shin $
34 * $FreeBSD: head/sys/netinet/tcp_usrreq.c 55009 1999-12-22 19:13:38Z shin $
35 */
36
35 */
36
37#include "opt_ipsec.h"
37#include "opt_tcpdebug.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/sysctl.h>
43#include <sys/mbuf.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/protosw.h>
47
48#include <net/if.h>
49#include <net/route.h>
50
51#include <netinet/in.h>
52#include <netinet/in_systm.h>
53#include <netinet/in_pcb.h>
54#include <netinet/in_var.h>
55#include <netinet/ip_var.h>
56#include <netinet/tcp.h>
57#include <netinet/tcp_fsm.h>
58#include <netinet/tcp_seq.h>
59#include <netinet/tcp_timer.h>
60#include <netinet/tcp_var.h>
61#include <netinet/tcpip.h>
62#ifdef TCPDEBUG
63#include <netinet/tcp_debug.h>
64#endif
65
38#include "opt_tcpdebug.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/sysctl.h>
44#include <sys/mbuf.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
47#include <sys/protosw.h>
48
49#include <net/if.h>
50#include <net/route.h>
51
52#include <netinet/in.h>
53#include <netinet/in_systm.h>
54#include <netinet/in_pcb.h>
55#include <netinet/in_var.h>
56#include <netinet/ip_var.h>
57#include <netinet/tcp.h>
58#include <netinet/tcp_fsm.h>
59#include <netinet/tcp_seq.h>
60#include <netinet/tcp_timer.h>
61#include <netinet/tcp_var.h>
62#include <netinet/tcpip.h>
63#ifdef TCPDEBUG
64#include <netinet/tcp_debug.h>
65#endif
66
67#ifdef IPSEC
68#include <netinet6/ipsec.h>
69#endif /*IPSEC*/
70
66/*
67 * TCP protocol interface to socket abstraction.
68 */
69extern char *tcpstates[]; /* XXX ??? */
70
71static int tcp_attach __P((struct socket *, struct proc *));
72static int tcp_connect __P((struct tcpcb *, struct sockaddr *,
73 struct proc *));
74static struct tcpcb *
75 tcp_disconnect __P((struct tcpcb *));
76static struct tcpcb *
77 tcp_usrclosed __P((struct tcpcb *));
78
79#ifdef TCPDEBUG
80#define TCPDEBUG0 int ostate
81#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
82#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
83 tcp_trace(TA_USER, ostate, tp, 0, req)
84#else
85#define TCPDEBUG0
86#define TCPDEBUG1()
87#define TCPDEBUG2(req)
88#endif
89
90/*
91 * TCP attaches to socket via pru_attach(), reserving space,
92 * and an internet control block.
93 */
94static int
95tcp_usr_attach(struct socket *so, int proto, struct proc *p)
96{
97 int s = splnet();
98 int error;
99 struct inpcb *inp = sotoinpcb(so);
100 struct tcpcb *tp = 0;
101 TCPDEBUG0;
102
103 TCPDEBUG1();
104 if (inp) {
105 error = EISCONN;
106 goto out;
107 }
108
109 error = tcp_attach(so, p);
110 if (error)
111 goto out;
112
113 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
114 so->so_linger = TCP_LINGERTIME;
115 tp = sototcpcb(so);
116out:
117 TCPDEBUG2(PRU_ATTACH);
118 splx(s);
119 return error;
120}
121
122/*
123 * pru_detach() detaches the TCP protocol from the socket.
124 * If the protocol state is non-embryonic, then can't
125 * do this directly: have to initiate a pru_disconnect(),
126 * which may finish later; embryonic TCB's can just
127 * be discarded here.
128 */
129static int
130tcp_usr_detach(struct socket *so)
131{
132 int s = splnet();
133 int error = 0;
134 struct inpcb *inp = sotoinpcb(so);
135 struct tcpcb *tp;
136 TCPDEBUG0;
137
138 if (inp == 0) {
139 splx(s);
140 return EINVAL; /* XXX */
141 }
142 tp = intotcpcb(inp);
143 TCPDEBUG1();
144 tp = tcp_disconnect(tp);
145
146 TCPDEBUG2(PRU_DETACH);
147 splx(s);
148 return error;
149}
150
151#define COMMON_START() TCPDEBUG0; \
152 do { \
153 if (inp == 0) { \
154 splx(s); \
155 return EINVAL; \
156 } \
157 tp = intotcpcb(inp); \
158 TCPDEBUG1(); \
159 } while(0)
160
161#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out
162
163
164/*
165 * Give the socket an address.
166 */
167static int
168tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
169{
170 int s = splnet();
171 int error = 0;
172 struct inpcb *inp = sotoinpcb(so);
173 struct tcpcb *tp;
174 struct sockaddr_in *sinp;
175
176 COMMON_START();
177
178 /*
179 * Must check for multicast addresses and disallow binding
180 * to them.
181 */
182 sinp = (struct sockaddr_in *)nam;
183 if (sinp->sin_family == AF_INET &&
184 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
185 error = EAFNOSUPPORT;
186 goto out;
187 }
188 error = in_pcbbind(inp, nam, p);
189 if (error)
190 goto out;
191 COMMON_END(PRU_BIND);
192
193}
194
195/*
196 * Prepare to accept connections.
197 */
198static int
199tcp_usr_listen(struct socket *so, struct proc *p)
200{
201 int s = splnet();
202 int error = 0;
203 struct inpcb *inp = sotoinpcb(so);
204 struct tcpcb *tp;
205
206 COMMON_START();
207 if (inp->inp_lport == 0)
208 error = in_pcbbind(inp, (struct sockaddr *)0, p);
209 if (error == 0)
210 tp->t_state = TCPS_LISTEN;
211 COMMON_END(PRU_LISTEN);
212}
213
214/*
215 * Initiate connection to peer.
216 * Create a template for use in transmissions on this connection.
217 * Enter SYN_SENT state, and mark socket as connecting.
218 * Start keep-alive timer, and seed output sequence space.
219 * Send initial segment on connection.
220 */
221static int
222tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
223{
224 int s = splnet();
225 int error = 0;
226 struct inpcb *inp = sotoinpcb(so);
227 struct tcpcb *tp;
228 struct sockaddr_in *sinp;
229
230 COMMON_START();
231
232 /*
233 * Must disallow TCP ``connections'' to multicast addresses.
234 */
235 sinp = (struct sockaddr_in *)nam;
236 if (sinp->sin_family == AF_INET
237 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
238 error = EAFNOSUPPORT;
239 goto out;
240 }
241
242 prison_remote_ip(p, 0, &sinp->sin_addr.s_addr);
243
244 if ((error = tcp_connect(tp, nam, p)) != 0)
245 goto out;
246 error = tcp_output(tp);
247 COMMON_END(PRU_CONNECT);
248}
249
250/*
251 * Initiate disconnect from peer.
252 * If connection never passed embryonic stage, just drop;
253 * else if don't need to let data drain, then can just drop anyways,
254 * else have to begin TCP shutdown process: mark socket disconnecting,
255 * drain unread data, state switch to reflect user close, and
256 * send segment (e.g. FIN) to peer. Socket will be really disconnected
257 * when peer sends FIN and acks ours.
258 *
259 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
260 */
261static int
262tcp_usr_disconnect(struct socket *so)
263{
264 int s = splnet();
265 int error = 0;
266 struct inpcb *inp = sotoinpcb(so);
267 struct tcpcb *tp;
268
269 COMMON_START();
270 tp = tcp_disconnect(tp);
271 COMMON_END(PRU_DISCONNECT);
272}
273
274/*
275 * Accept a connection. Essentially all the work is
276 * done at higher levels; just return the address
277 * of the peer, storing through addr.
278 */
279static int
280tcp_usr_accept(struct socket *so, struct sockaddr **nam)
281{
282 int s = splnet();
283 int error = 0;
284 struct inpcb *inp = sotoinpcb(so);
285 struct tcpcb *tp;
286
287 COMMON_START();
288 in_setpeeraddr(so, nam);
289 COMMON_END(PRU_ACCEPT);
290}
291
292/*
293 * Mark the connection as being incapable of further output.
294 */
295static int
296tcp_usr_shutdown(struct socket *so)
297{
298 int s = splnet();
299 int error = 0;
300 struct inpcb *inp = sotoinpcb(so);
301 struct tcpcb *tp;
302
303 COMMON_START();
304 socantsendmore(so);
305 tp = tcp_usrclosed(tp);
306 if (tp)
307 error = tcp_output(tp);
308 COMMON_END(PRU_SHUTDOWN);
309}
310
311/*
312 * After a receive, possibly send window update to peer.
313 */
314static int
315tcp_usr_rcvd(struct socket *so, int flags)
316{
317 int s = splnet();
318 int error = 0;
319 struct inpcb *inp = sotoinpcb(so);
320 struct tcpcb *tp;
321
322 COMMON_START();
323 tcp_output(tp);
324 COMMON_END(PRU_RCVD);
325}
326
327/*
328 * Do a send by putting data in output queue and updating urgent
329 * marker if URG set. Possibly send more data. Unlike the other
330 * pru_*() routines, the mbuf chains are our responsibility. We
331 * must either enqueue them or free them. The other pru_* routines
332 * generally are caller-frees.
333 */
334static int
335tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
336 struct sockaddr *nam, struct mbuf *control, struct proc *p)
337{
338 int s = splnet();
339 int error = 0;
340 struct inpcb *inp = sotoinpcb(so);
341 struct tcpcb *tp;
342 TCPDEBUG0;
343
344 if (inp == NULL) {
345 /*
346 * OOPS! we lost a race, the TCP session got reset after
347 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
348 * network interrupt in the non-splnet() section of sosend().
349 */
350 if (m)
351 m_freem(m);
352 if (control)
353 m_freem(control);
354 error = ECONNRESET; /* XXX EPIPE? */
355 tp = NULL;
356 TCPDEBUG1();
357 goto out;
358 }
359 tp = intotcpcb(inp);
360 TCPDEBUG1();
361 if (control) {
362 /* TCP doesn't do control messages (rights, creds, etc) */
363 if (control->m_len) {
364 m_freem(control);
365 if (m)
366 m_freem(m);
367 error = EINVAL;
368 goto out;
369 }
370 m_freem(control); /* empty control, just free it */
371 }
372 if(!(flags & PRUS_OOB)) {
373 sbappend(&so->so_snd, m);
374 if (nam && tp->t_state < TCPS_SYN_SENT) {
375 /*
376 * Do implied connect if not yet connected,
377 * initialize window to default value, and
378 * initialize maxseg/maxopd using peer's cached
379 * MSS.
380 */
381 error = tcp_connect(tp, nam, p);
382 if (error)
383 goto out;
384 tp->snd_wnd = TTCP_CLIENT_SND_WND;
385 tcp_mss(tp, -1);
386 }
387
388 if (flags & PRUS_EOF) {
389 /*
390 * Close the send side of the connection after
391 * the data is sent.
392 */
393 socantsendmore(so);
394 tp = tcp_usrclosed(tp);
395 }
396 if (tp != NULL) {
397 if (flags & PRUS_MORETOCOME)
398 tp->t_flags |= TF_MORETOCOME;
399 error = tcp_output(tp);
400 if (flags & PRUS_MORETOCOME)
401 tp->t_flags &= ~TF_MORETOCOME;
402 }
403 } else {
404 if (sbspace(&so->so_snd) < -512) {
405 m_freem(m);
406 error = ENOBUFS;
407 goto out;
408 }
409 /*
410 * According to RFC961 (Assigned Protocols),
411 * the urgent pointer points to the last octet
412 * of urgent data. We continue, however,
413 * to consider it to indicate the first octet
414 * of data past the urgent section.
415 * Otherwise, snd_up should be one lower.
416 */
417 sbappend(&so->so_snd, m);
418 if (nam && tp->t_state < TCPS_SYN_SENT) {
419 /*
420 * Do implied connect if not yet connected,
421 * initialize window to default value, and
422 * initialize maxseg/maxopd using peer's cached
423 * MSS.
424 */
425 error = tcp_connect(tp, nam, p);
426 if (error)
427 goto out;
428 tp->snd_wnd = TTCP_CLIENT_SND_WND;
429 tcp_mss(tp, -1);
430 }
431 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
432 tp->t_force = 1;
433 error = tcp_output(tp);
434 tp->t_force = 0;
435 }
436 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
437 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
438}
439
440/*
441 * Abort the TCP.
442 */
443static int
444tcp_usr_abort(struct socket *so)
445{
446 int s = splnet();
447 int error = 0;
448 struct inpcb *inp = sotoinpcb(so);
449 struct tcpcb *tp;
450
451 COMMON_START();
452 tp = tcp_drop(tp, ECONNABORTED);
453 COMMON_END(PRU_ABORT);
454}
455
456/*
457 * Receive out-of-band data.
458 */
459static int
460tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
461{
462 int s = splnet();
463 int error = 0;
464 struct inpcb *inp = sotoinpcb(so);
465 struct tcpcb *tp;
466
467 COMMON_START();
468 if ((so->so_oobmark == 0 &&
469 (so->so_state & SS_RCVATMARK) == 0) ||
470 so->so_options & SO_OOBINLINE ||
471 tp->t_oobflags & TCPOOB_HADDATA) {
472 error = EINVAL;
473 goto out;
474 }
475 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
476 error = EWOULDBLOCK;
477 goto out;
478 }
479 m->m_len = 1;
480 *mtod(m, caddr_t) = tp->t_iobc;
481 if ((flags & MSG_PEEK) == 0)
482 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
483 COMMON_END(PRU_RCVOOB);
484}
485
486/* xxx - should be const */
487struct pr_usrreqs tcp_usrreqs = {
488 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
489 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
490 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
491 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
492 in_setsockaddr, sosend, soreceive, sopoll
493};
494
495/*
496 * Common subroutine to open a TCP connection to remote host specified
497 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
498 * port number if needed. Call in_pcbladdr to do the routing and to choose
499 * a local host address (interface). If there is an existing incarnation
500 * of the same connection in TIME-WAIT state and if the remote host was
501 * sending CC options and if the connection duration was < MSL, then
502 * truncate the previous TIME-WAIT state and proceed.
503 * Initialize connection parameters and enter SYN-SENT state.
504 */
505static int
506tcp_connect(tp, nam, p)
507 register struct tcpcb *tp;
508 struct sockaddr *nam;
509 struct proc *p;
510{
511 struct inpcb *inp = tp->t_inpcb, *oinp;
512 struct socket *so = inp->inp_socket;
513 struct tcpcb *otp;
514 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
515 struct sockaddr_in *ifaddr;
516 struct rmxp_tao *taop;
517 struct rmxp_tao tao_noncached;
518 int error;
519
520 if (inp->inp_lport == 0) {
521 error = in_pcbbind(inp, (struct sockaddr *)0, p);
522 if (error)
523 return error;
524 }
525
526 /*
527 * Cannot simply call in_pcbconnect, because there might be an
528 * earlier incarnation of this same connection still in
529 * TIME_WAIT state, creating an ADDRINUSE error.
530 */
531 error = in_pcbladdr(inp, nam, &ifaddr);
532 if (error)
533 return error;
534 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
535 sin->sin_addr, sin->sin_port,
536 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
537 : ifaddr->sin_addr,
538 inp->inp_lport, 0, NULL);
539 if (oinp) {
540 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
541 otp->t_state == TCPS_TIME_WAIT &&
542 (ticks - otp->t_starttime) < tcp_msl &&
543 (otp->t_flags & TF_RCVD_CC))
544 otp = tcp_close(otp);
545 else
546 return EADDRINUSE;
547 }
548 if (inp->inp_laddr.s_addr == INADDR_ANY)
549 inp->inp_laddr = ifaddr->sin_addr;
550 inp->inp_faddr = sin->sin_addr;
551 inp->inp_fport = sin->sin_port;
552 in_pcbrehash(inp);
553
554 tp->t_template = tcp_template(tp);
555 if (tp->t_template == 0) {
556 in_pcbdisconnect(inp);
557 return ENOBUFS;
558 }
559
560 /* Compute window scaling to request. */
561 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
562 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
563 tp->request_r_scale++;
564
565 soisconnecting(so);
566 tcpstat.tcps_connattempt++;
567 tp->t_state = TCPS_SYN_SENT;
568 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
569 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
570 tcp_sendseqinit(tp);
571
572 /*
573 * Generate a CC value for this connection and
574 * check whether CC or CCnew should be used.
575 */
576 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
577 taop = &tao_noncached;
578 bzero(taop, sizeof(*taop));
579 }
580
581 tp->cc_send = CC_INC(tcp_ccgen);
582 if (taop->tao_ccsent != 0 &&
583 CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
584 taop->tao_ccsent = tp->cc_send;
585 } else {
586 taop->tao_ccsent = 0;
587 tp->t_flags |= TF_SENDCCNEW;
588 }
589
590 return 0;
591}
592
593/*
594 * The new sockopt interface makes it possible for us to block in the
595 * copyin/out step (if we take a page fault). Taking a page fault at
596 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
597 * use TSM, there probably isn't any need for this function to run at
598 * splnet() any more. This needs more examination.)
599 */
600int
601tcp_ctloutput(so, sopt)
602 struct socket *so;
603 struct sockopt *sopt;
604{
605 int error, opt, optval, s;
606 struct inpcb *inp;
607 struct tcpcb *tp;
608
609 error = 0;
610 s = splnet(); /* XXX */
611 inp = sotoinpcb(so);
612 if (inp == NULL) {
613 splx(s);
614 return (ECONNRESET);
615 }
616 if (sopt->sopt_level != IPPROTO_TCP) {
617 error = ip_ctloutput(so, sopt);
618 splx(s);
619 return (error);
620 }
621 tp = intotcpcb(inp);
622
623 switch (sopt->sopt_dir) {
624 case SOPT_SET:
625 switch (sopt->sopt_name) {
626 case TCP_NODELAY:
627 case TCP_NOOPT:
628 case TCP_NOPUSH:
629 error = sooptcopyin(sopt, &optval, sizeof optval,
630 sizeof optval);
631 if (error)
632 break;
633
634 switch (sopt->sopt_name) {
635 case TCP_NODELAY:
636 opt = TF_NODELAY;
637 break;
638 case TCP_NOOPT:
639 opt = TF_NOOPT;
640 break;
641 case TCP_NOPUSH:
642 opt = TF_NOPUSH;
643 break;
644 default:
645 opt = 0; /* dead code to fool gcc */
646 break;
647 }
648
649 if (optval)
650 tp->t_flags |= opt;
651 else
652 tp->t_flags &= ~opt;
653 break;
654
655 case TCP_MAXSEG:
656 error = sooptcopyin(sopt, &optval, sizeof optval,
657 sizeof optval);
658 if (error)
659 break;
660
661 if (optval > 0 && optval <= tp->t_maxseg)
662 tp->t_maxseg = optval;
663 else
664 error = EINVAL;
665 break;
666
667 default:
668 error = ENOPROTOOPT;
669 break;
670 }
671 break;
672
673 case SOPT_GET:
674 switch (sopt->sopt_name) {
675 case TCP_NODELAY:
676 optval = tp->t_flags & TF_NODELAY;
677 break;
678 case TCP_MAXSEG:
679 optval = tp->t_maxseg;
680 break;
681 case TCP_NOOPT:
682 optval = tp->t_flags & TF_NOOPT;
683 break;
684 case TCP_NOPUSH:
685 optval = tp->t_flags & TF_NOPUSH;
686 break;
687 default:
688 error = ENOPROTOOPT;
689 break;
690 }
691 if (error == 0)
692 error = sooptcopyout(sopt, &optval, sizeof optval);
693 break;
694 }
695 splx(s);
696 return (error);
697}
698
699/*
700 * tcp_sendspace and tcp_recvspace are the default send and receive window
701 * sizes, respectively. These are obsolescent (this information should
702 * be set by the route).
703 */
704u_long tcp_sendspace = 1024*16;
705SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
706 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
707u_long tcp_recvspace = 1024*16;
708SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
709 &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
710
711/*
712 * Attach TCP protocol to socket, allocating
713 * internet protocol control block, tcp control block,
714 * bufer space, and entering LISTEN state if to accept connections.
715 */
716static int
717tcp_attach(so, p)
718 struct socket *so;
719 struct proc *p;
720{
721 register struct tcpcb *tp;
722 struct inpcb *inp;
723 int error;
724
725 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
726 error = soreserve(so, tcp_sendspace, tcp_recvspace);
727 if (error)
728 return (error);
729 }
730 error = in_pcballoc(so, &tcbinfo, p);
731 if (error)
732 return (error);
733 inp = sotoinpcb(so);
71/*
72 * TCP protocol interface to socket abstraction.
73 */
74extern char *tcpstates[]; /* XXX ??? */
75
76static int tcp_attach __P((struct socket *, struct proc *));
77static int tcp_connect __P((struct tcpcb *, struct sockaddr *,
78 struct proc *));
79static struct tcpcb *
80 tcp_disconnect __P((struct tcpcb *));
81static struct tcpcb *
82 tcp_usrclosed __P((struct tcpcb *));
83
84#ifdef TCPDEBUG
85#define TCPDEBUG0 int ostate
86#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
87#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
88 tcp_trace(TA_USER, ostate, tp, 0, req)
89#else
90#define TCPDEBUG0
91#define TCPDEBUG1()
92#define TCPDEBUG2(req)
93#endif
94
95/*
96 * TCP attaches to socket via pru_attach(), reserving space,
97 * and an internet control block.
98 */
99static int
100tcp_usr_attach(struct socket *so, int proto, struct proc *p)
101{
102 int s = splnet();
103 int error;
104 struct inpcb *inp = sotoinpcb(so);
105 struct tcpcb *tp = 0;
106 TCPDEBUG0;
107
108 TCPDEBUG1();
109 if (inp) {
110 error = EISCONN;
111 goto out;
112 }
113
114 error = tcp_attach(so, p);
115 if (error)
116 goto out;
117
118 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
119 so->so_linger = TCP_LINGERTIME;
120 tp = sototcpcb(so);
121out:
122 TCPDEBUG2(PRU_ATTACH);
123 splx(s);
124 return error;
125}
126
127/*
128 * pru_detach() detaches the TCP protocol from the socket.
129 * If the protocol state is non-embryonic, then can't
130 * do this directly: have to initiate a pru_disconnect(),
131 * which may finish later; embryonic TCB's can just
132 * be discarded here.
133 */
134static int
135tcp_usr_detach(struct socket *so)
136{
137 int s = splnet();
138 int error = 0;
139 struct inpcb *inp = sotoinpcb(so);
140 struct tcpcb *tp;
141 TCPDEBUG0;
142
143 if (inp == 0) {
144 splx(s);
145 return EINVAL; /* XXX */
146 }
147 tp = intotcpcb(inp);
148 TCPDEBUG1();
149 tp = tcp_disconnect(tp);
150
151 TCPDEBUG2(PRU_DETACH);
152 splx(s);
153 return error;
154}
155
156#define COMMON_START() TCPDEBUG0; \
157 do { \
158 if (inp == 0) { \
159 splx(s); \
160 return EINVAL; \
161 } \
162 tp = intotcpcb(inp); \
163 TCPDEBUG1(); \
164 } while(0)
165
166#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out
167
168
169/*
170 * Give the socket an address.
171 */
172static int
173tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
174{
175 int s = splnet();
176 int error = 0;
177 struct inpcb *inp = sotoinpcb(so);
178 struct tcpcb *tp;
179 struct sockaddr_in *sinp;
180
181 COMMON_START();
182
183 /*
184 * Must check for multicast addresses and disallow binding
185 * to them.
186 */
187 sinp = (struct sockaddr_in *)nam;
188 if (sinp->sin_family == AF_INET &&
189 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
190 error = EAFNOSUPPORT;
191 goto out;
192 }
193 error = in_pcbbind(inp, nam, p);
194 if (error)
195 goto out;
196 COMMON_END(PRU_BIND);
197
198}
199
200/*
201 * Prepare to accept connections.
202 */
203static int
204tcp_usr_listen(struct socket *so, struct proc *p)
205{
206 int s = splnet();
207 int error = 0;
208 struct inpcb *inp = sotoinpcb(so);
209 struct tcpcb *tp;
210
211 COMMON_START();
212 if (inp->inp_lport == 0)
213 error = in_pcbbind(inp, (struct sockaddr *)0, p);
214 if (error == 0)
215 tp->t_state = TCPS_LISTEN;
216 COMMON_END(PRU_LISTEN);
217}
218
219/*
220 * Initiate connection to peer.
221 * Create a template for use in transmissions on this connection.
222 * Enter SYN_SENT state, and mark socket as connecting.
223 * Start keep-alive timer, and seed output sequence space.
224 * Send initial segment on connection.
225 */
226static int
227tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
228{
229 int s = splnet();
230 int error = 0;
231 struct inpcb *inp = sotoinpcb(so);
232 struct tcpcb *tp;
233 struct sockaddr_in *sinp;
234
235 COMMON_START();
236
237 /*
238 * Must disallow TCP ``connections'' to multicast addresses.
239 */
240 sinp = (struct sockaddr_in *)nam;
241 if (sinp->sin_family == AF_INET
242 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
243 error = EAFNOSUPPORT;
244 goto out;
245 }
246
247 prison_remote_ip(p, 0, &sinp->sin_addr.s_addr);
248
249 if ((error = tcp_connect(tp, nam, p)) != 0)
250 goto out;
251 error = tcp_output(tp);
252 COMMON_END(PRU_CONNECT);
253}
254
255/*
256 * Initiate disconnect from peer.
257 * If connection never passed embryonic stage, just drop;
258 * else if don't need to let data drain, then can just drop anyways,
259 * else have to begin TCP shutdown process: mark socket disconnecting,
260 * drain unread data, state switch to reflect user close, and
261 * send segment (e.g. FIN) to peer. Socket will be really disconnected
262 * when peer sends FIN and acks ours.
263 *
264 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
265 */
266static int
267tcp_usr_disconnect(struct socket *so)
268{
269 int s = splnet();
270 int error = 0;
271 struct inpcb *inp = sotoinpcb(so);
272 struct tcpcb *tp;
273
274 COMMON_START();
275 tp = tcp_disconnect(tp);
276 COMMON_END(PRU_DISCONNECT);
277}
278
279/*
280 * Accept a connection. Essentially all the work is
281 * done at higher levels; just return the address
282 * of the peer, storing through addr.
283 */
284static int
285tcp_usr_accept(struct socket *so, struct sockaddr **nam)
286{
287 int s = splnet();
288 int error = 0;
289 struct inpcb *inp = sotoinpcb(so);
290 struct tcpcb *tp;
291
292 COMMON_START();
293 in_setpeeraddr(so, nam);
294 COMMON_END(PRU_ACCEPT);
295}
296
297/*
298 * Mark the connection as being incapable of further output.
299 */
300static int
301tcp_usr_shutdown(struct socket *so)
302{
303 int s = splnet();
304 int error = 0;
305 struct inpcb *inp = sotoinpcb(so);
306 struct tcpcb *tp;
307
308 COMMON_START();
309 socantsendmore(so);
310 tp = tcp_usrclosed(tp);
311 if (tp)
312 error = tcp_output(tp);
313 COMMON_END(PRU_SHUTDOWN);
314}
315
316/*
317 * After a receive, possibly send window update to peer.
318 */
319static int
320tcp_usr_rcvd(struct socket *so, int flags)
321{
322 int s = splnet();
323 int error = 0;
324 struct inpcb *inp = sotoinpcb(so);
325 struct tcpcb *tp;
326
327 COMMON_START();
328 tcp_output(tp);
329 COMMON_END(PRU_RCVD);
330}
331
332/*
333 * Do a send by putting data in output queue and updating urgent
334 * marker if URG set. Possibly send more data. Unlike the other
335 * pru_*() routines, the mbuf chains are our responsibility. We
336 * must either enqueue them or free them. The other pru_* routines
337 * generally are caller-frees.
338 */
339static int
340tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
341 struct sockaddr *nam, struct mbuf *control, struct proc *p)
342{
343 int s = splnet();
344 int error = 0;
345 struct inpcb *inp = sotoinpcb(so);
346 struct tcpcb *tp;
347 TCPDEBUG0;
348
349 if (inp == NULL) {
350 /*
351 * OOPS! we lost a race, the TCP session got reset after
352 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
353 * network interrupt in the non-splnet() section of sosend().
354 */
355 if (m)
356 m_freem(m);
357 if (control)
358 m_freem(control);
359 error = ECONNRESET; /* XXX EPIPE? */
360 tp = NULL;
361 TCPDEBUG1();
362 goto out;
363 }
364 tp = intotcpcb(inp);
365 TCPDEBUG1();
366 if (control) {
367 /* TCP doesn't do control messages (rights, creds, etc) */
368 if (control->m_len) {
369 m_freem(control);
370 if (m)
371 m_freem(m);
372 error = EINVAL;
373 goto out;
374 }
375 m_freem(control); /* empty control, just free it */
376 }
377 if(!(flags & PRUS_OOB)) {
378 sbappend(&so->so_snd, m);
379 if (nam && tp->t_state < TCPS_SYN_SENT) {
380 /*
381 * Do implied connect if not yet connected,
382 * initialize window to default value, and
383 * initialize maxseg/maxopd using peer's cached
384 * MSS.
385 */
386 error = tcp_connect(tp, nam, p);
387 if (error)
388 goto out;
389 tp->snd_wnd = TTCP_CLIENT_SND_WND;
390 tcp_mss(tp, -1);
391 }
392
393 if (flags & PRUS_EOF) {
394 /*
395 * Close the send side of the connection after
396 * the data is sent.
397 */
398 socantsendmore(so);
399 tp = tcp_usrclosed(tp);
400 }
401 if (tp != NULL) {
402 if (flags & PRUS_MORETOCOME)
403 tp->t_flags |= TF_MORETOCOME;
404 error = tcp_output(tp);
405 if (flags & PRUS_MORETOCOME)
406 tp->t_flags &= ~TF_MORETOCOME;
407 }
408 } else {
409 if (sbspace(&so->so_snd) < -512) {
410 m_freem(m);
411 error = ENOBUFS;
412 goto out;
413 }
414 /*
415 * According to RFC961 (Assigned Protocols),
416 * the urgent pointer points to the last octet
417 * of urgent data. We continue, however,
418 * to consider it to indicate the first octet
419 * of data past the urgent section.
420 * Otherwise, snd_up should be one lower.
421 */
422 sbappend(&so->so_snd, m);
423 if (nam && tp->t_state < TCPS_SYN_SENT) {
424 /*
425 * Do implied connect if not yet connected,
426 * initialize window to default value, and
427 * initialize maxseg/maxopd using peer's cached
428 * MSS.
429 */
430 error = tcp_connect(tp, nam, p);
431 if (error)
432 goto out;
433 tp->snd_wnd = TTCP_CLIENT_SND_WND;
434 tcp_mss(tp, -1);
435 }
436 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
437 tp->t_force = 1;
438 error = tcp_output(tp);
439 tp->t_force = 0;
440 }
441 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
442 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
443}
444
445/*
446 * Abort the TCP.
447 */
448static int
449tcp_usr_abort(struct socket *so)
450{
451 int s = splnet();
452 int error = 0;
453 struct inpcb *inp = sotoinpcb(so);
454 struct tcpcb *tp;
455
456 COMMON_START();
457 tp = tcp_drop(tp, ECONNABORTED);
458 COMMON_END(PRU_ABORT);
459}
460
461/*
462 * Receive out-of-band data.
463 */
464static int
465tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
466{
467 int s = splnet();
468 int error = 0;
469 struct inpcb *inp = sotoinpcb(so);
470 struct tcpcb *tp;
471
472 COMMON_START();
473 if ((so->so_oobmark == 0 &&
474 (so->so_state & SS_RCVATMARK) == 0) ||
475 so->so_options & SO_OOBINLINE ||
476 tp->t_oobflags & TCPOOB_HADDATA) {
477 error = EINVAL;
478 goto out;
479 }
480 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
481 error = EWOULDBLOCK;
482 goto out;
483 }
484 m->m_len = 1;
485 *mtod(m, caddr_t) = tp->t_iobc;
486 if ((flags & MSG_PEEK) == 0)
487 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
488 COMMON_END(PRU_RCVOOB);
489}
490
491/* xxx - should be const */
492struct pr_usrreqs tcp_usrreqs = {
493 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
494 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
495 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
496 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
497 in_setsockaddr, sosend, soreceive, sopoll
498};
499
500/*
501 * Common subroutine to open a TCP connection to remote host specified
502 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
503 * port number if needed. Call in_pcbladdr to do the routing and to choose
504 * a local host address (interface). If there is an existing incarnation
505 * of the same connection in TIME-WAIT state and if the remote host was
506 * sending CC options and if the connection duration was < MSL, then
507 * truncate the previous TIME-WAIT state and proceed.
508 * Initialize connection parameters and enter SYN-SENT state.
509 */
510static int
511tcp_connect(tp, nam, p)
512 register struct tcpcb *tp;
513 struct sockaddr *nam;
514 struct proc *p;
515{
516 struct inpcb *inp = tp->t_inpcb, *oinp;
517 struct socket *so = inp->inp_socket;
518 struct tcpcb *otp;
519 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
520 struct sockaddr_in *ifaddr;
521 struct rmxp_tao *taop;
522 struct rmxp_tao tao_noncached;
523 int error;
524
525 if (inp->inp_lport == 0) {
526 error = in_pcbbind(inp, (struct sockaddr *)0, p);
527 if (error)
528 return error;
529 }
530
531 /*
532 * Cannot simply call in_pcbconnect, because there might be an
533 * earlier incarnation of this same connection still in
534 * TIME_WAIT state, creating an ADDRINUSE error.
535 */
536 error = in_pcbladdr(inp, nam, &ifaddr);
537 if (error)
538 return error;
539 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
540 sin->sin_addr, sin->sin_port,
541 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
542 : ifaddr->sin_addr,
543 inp->inp_lport, 0, NULL);
544 if (oinp) {
545 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
546 otp->t_state == TCPS_TIME_WAIT &&
547 (ticks - otp->t_starttime) < tcp_msl &&
548 (otp->t_flags & TF_RCVD_CC))
549 otp = tcp_close(otp);
550 else
551 return EADDRINUSE;
552 }
553 if (inp->inp_laddr.s_addr == INADDR_ANY)
554 inp->inp_laddr = ifaddr->sin_addr;
555 inp->inp_faddr = sin->sin_addr;
556 inp->inp_fport = sin->sin_port;
557 in_pcbrehash(inp);
558
559 tp->t_template = tcp_template(tp);
560 if (tp->t_template == 0) {
561 in_pcbdisconnect(inp);
562 return ENOBUFS;
563 }
564
565 /* Compute window scaling to request. */
566 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
567 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
568 tp->request_r_scale++;
569
570 soisconnecting(so);
571 tcpstat.tcps_connattempt++;
572 tp->t_state = TCPS_SYN_SENT;
573 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
574 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
575 tcp_sendseqinit(tp);
576
577 /*
578 * Generate a CC value for this connection and
579 * check whether CC or CCnew should be used.
580 */
581 if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
582 taop = &tao_noncached;
583 bzero(taop, sizeof(*taop));
584 }
585
586 tp->cc_send = CC_INC(tcp_ccgen);
587 if (taop->tao_ccsent != 0 &&
588 CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
589 taop->tao_ccsent = tp->cc_send;
590 } else {
591 taop->tao_ccsent = 0;
592 tp->t_flags |= TF_SENDCCNEW;
593 }
594
595 return 0;
596}
597
598/*
599 * The new sockopt interface makes it possible for us to block in the
600 * copyin/out step (if we take a page fault). Taking a page fault at
601 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
602 * use TSM, there probably isn't any need for this function to run at
603 * splnet() any more. This needs more examination.)
604 */
605int
606tcp_ctloutput(so, sopt)
607 struct socket *so;
608 struct sockopt *sopt;
609{
610 int error, opt, optval, s;
611 struct inpcb *inp;
612 struct tcpcb *tp;
613
614 error = 0;
615 s = splnet(); /* XXX */
616 inp = sotoinpcb(so);
617 if (inp == NULL) {
618 splx(s);
619 return (ECONNRESET);
620 }
621 if (sopt->sopt_level != IPPROTO_TCP) {
622 error = ip_ctloutput(so, sopt);
623 splx(s);
624 return (error);
625 }
626 tp = intotcpcb(inp);
627
628 switch (sopt->sopt_dir) {
629 case SOPT_SET:
630 switch (sopt->sopt_name) {
631 case TCP_NODELAY:
632 case TCP_NOOPT:
633 case TCP_NOPUSH:
634 error = sooptcopyin(sopt, &optval, sizeof optval,
635 sizeof optval);
636 if (error)
637 break;
638
639 switch (sopt->sopt_name) {
640 case TCP_NODELAY:
641 opt = TF_NODELAY;
642 break;
643 case TCP_NOOPT:
644 opt = TF_NOOPT;
645 break;
646 case TCP_NOPUSH:
647 opt = TF_NOPUSH;
648 break;
649 default:
650 opt = 0; /* dead code to fool gcc */
651 break;
652 }
653
654 if (optval)
655 tp->t_flags |= opt;
656 else
657 tp->t_flags &= ~opt;
658 break;
659
660 case TCP_MAXSEG:
661 error = sooptcopyin(sopt, &optval, sizeof optval,
662 sizeof optval);
663 if (error)
664 break;
665
666 if (optval > 0 && optval <= tp->t_maxseg)
667 tp->t_maxseg = optval;
668 else
669 error = EINVAL;
670 break;
671
672 default:
673 error = ENOPROTOOPT;
674 break;
675 }
676 break;
677
678 case SOPT_GET:
679 switch (sopt->sopt_name) {
680 case TCP_NODELAY:
681 optval = tp->t_flags & TF_NODELAY;
682 break;
683 case TCP_MAXSEG:
684 optval = tp->t_maxseg;
685 break;
686 case TCP_NOOPT:
687 optval = tp->t_flags & TF_NOOPT;
688 break;
689 case TCP_NOPUSH:
690 optval = tp->t_flags & TF_NOPUSH;
691 break;
692 default:
693 error = ENOPROTOOPT;
694 break;
695 }
696 if (error == 0)
697 error = sooptcopyout(sopt, &optval, sizeof optval);
698 break;
699 }
700 splx(s);
701 return (error);
702}
703
704/*
705 * tcp_sendspace and tcp_recvspace are the default send and receive window
706 * sizes, respectively. These are obsolescent (this information should
707 * be set by the route).
708 */
709u_long tcp_sendspace = 1024*16;
710SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
711 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
712u_long tcp_recvspace = 1024*16;
713SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
714 &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
715
716/*
717 * Attach TCP protocol to socket, allocating
718 * internet protocol control block, tcp control block,
719 * bufer space, and entering LISTEN state if to accept connections.
720 */
721static int
722tcp_attach(so, p)
723 struct socket *so;
724 struct proc *p;
725{
726 register struct tcpcb *tp;
727 struct inpcb *inp;
728 int error;
729
730 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
731 error = soreserve(so, tcp_sendspace, tcp_recvspace);
732 if (error)
733 return (error);
734 }
735 error = in_pcballoc(so, &tcbinfo, p);
736 if (error)
737 return (error);
738 inp = sotoinpcb(so);
739#ifdef IPSEC
740 error = ipsec_init_policy(so, &inp->inp_sp);
741 if (error) {
742 in_pcbdetach(inp);
743 return (error);
744 }
745#endif /*IPSEC*/
734 inp->inp_vflag |= INP_IPV4;
735 tp = tcp_newtcpcb(inp);
736 if (tp == 0) {
737 int nofd = so->so_state & SS_NOFDREF; /* XXX */
738
739 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
740 in_pcbdetach(inp);
741 so->so_state |= nofd;
742 return (ENOBUFS);
743 }
744 tp->t_state = TCPS_CLOSED;
745 return (0);
746}
747
748/*
749 * Initiate (or continue) disconnect.
750 * If embryonic state, just send reset (once).
751 * If in ``let data drain'' option and linger null, just drop.
752 * Otherwise (hard), mark socket disconnecting and drop
753 * current input data; switch states based on user close, and
754 * send segment to peer (with FIN).
755 */
756static struct tcpcb *
757tcp_disconnect(tp)
758 register struct tcpcb *tp;
759{
760 struct socket *so = tp->t_inpcb->inp_socket;
761
762 if (tp->t_state < TCPS_ESTABLISHED)
763 tp = tcp_close(tp);
764 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
765 tp = tcp_drop(tp, 0);
766 else {
767 soisdisconnecting(so);
768 sbflush(&so->so_rcv);
769 tp = tcp_usrclosed(tp);
770 if (tp)
771 (void) tcp_output(tp);
772 }
773 return (tp);
774}
775
776/*
777 * User issued close, and wish to trail through shutdown states:
778 * if never received SYN, just forget it. If got a SYN from peer,
779 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
780 * If already got a FIN from peer, then almost done; go to LAST_ACK
781 * state. In all other cases, have already sent FIN to peer (e.g.
782 * after PRU_SHUTDOWN), and just have to play tedious game waiting
783 * for peer to send FIN or not respond to keep-alives, etc.
784 * We can let the user exit from the close as soon as the FIN is acked.
785 */
786static struct tcpcb *
787tcp_usrclosed(tp)
788 register struct tcpcb *tp;
789{
790
791 switch (tp->t_state) {
792
793 case TCPS_CLOSED:
794 case TCPS_LISTEN:
795 tp->t_state = TCPS_CLOSED;
796 tp = tcp_close(tp);
797 break;
798
799 case TCPS_SYN_SENT:
800 case TCPS_SYN_RECEIVED:
801 tp->t_flags |= TF_NEEDFIN;
802 break;
803
804 case TCPS_ESTABLISHED:
805 tp->t_state = TCPS_FIN_WAIT_1;
806 break;
807
808 case TCPS_CLOSE_WAIT:
809 tp->t_state = TCPS_LAST_ACK;
810 break;
811 }
812 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
813 soisdisconnected(tp->t_inpcb->inp_socket);
814 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
815 if (tp->t_state == TCPS_FIN_WAIT_2)
816 callout_reset(tp->tt_2msl, tcp_maxidle,
817 tcp_timer_2msl, tp);
818 }
819 return (tp);
820}
821
746 inp->inp_vflag |= INP_IPV4;
747 tp = tcp_newtcpcb(inp);
748 if (tp == 0) {
749 int nofd = so->so_state & SS_NOFDREF; /* XXX */
750
751 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
752 in_pcbdetach(inp);
753 so->so_state |= nofd;
754 return (ENOBUFS);
755 }
756 tp->t_state = TCPS_CLOSED;
757 return (0);
758}
759
760/*
761 * Initiate (or continue) disconnect.
762 * If embryonic state, just send reset (once).
763 * If in ``let data drain'' option and linger null, just drop.
764 * Otherwise (hard), mark socket disconnecting and drop
765 * current input data; switch states based on user close, and
766 * send segment to peer (with FIN).
767 */
768static struct tcpcb *
769tcp_disconnect(tp)
770 register struct tcpcb *tp;
771{
772 struct socket *so = tp->t_inpcb->inp_socket;
773
774 if (tp->t_state < TCPS_ESTABLISHED)
775 tp = tcp_close(tp);
776 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
777 tp = tcp_drop(tp, 0);
778 else {
779 soisdisconnecting(so);
780 sbflush(&so->so_rcv);
781 tp = tcp_usrclosed(tp);
782 if (tp)
783 (void) tcp_output(tp);
784 }
785 return (tp);
786}
787
788/*
789 * User issued close, and wish to trail through shutdown states:
790 * if never received SYN, just forget it. If got a SYN from peer,
791 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
792 * If already got a FIN from peer, then almost done; go to LAST_ACK
793 * state. In all other cases, have already sent FIN to peer (e.g.
794 * after PRU_SHUTDOWN), and just have to play tedious game waiting
795 * for peer to send FIN or not respond to keep-alives, etc.
796 * We can let the user exit from the close as soon as the FIN is acked.
797 */
798static struct tcpcb *
799tcp_usrclosed(tp)
800 register struct tcpcb *tp;
801{
802
803 switch (tp->t_state) {
804
805 case TCPS_CLOSED:
806 case TCPS_LISTEN:
807 tp->t_state = TCPS_CLOSED;
808 tp = tcp_close(tp);
809 break;
810
811 case TCPS_SYN_SENT:
812 case TCPS_SYN_RECEIVED:
813 tp->t_flags |= TF_NEEDFIN;
814 break;
815
816 case TCPS_ESTABLISHED:
817 tp->t_state = TCPS_FIN_WAIT_1;
818 break;
819
820 case TCPS_CLOSE_WAIT:
821 tp->t_state = TCPS_LAST_ACK;
822 break;
823 }
824 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
825 soisdisconnected(tp->t_inpcb->inp_socket);
826 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
827 if (tp->t_state == TCPS_FIN_WAIT_2)
828 callout_reset(tp->tt_2msl, tcp_maxidle,
829 tcp_timer_2msl, tp);
830 }
831 return (tp);
832}
833