tcp_input.c (122875) | tcp_input.c (122922) |
---|---|
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 17 unchanged lines hidden (view full) --- 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 | 1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 17 unchanged lines hidden (view full) --- 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 |
34 * $FreeBSD: head/sys/netinet/tcp_input.c 122875 2003-11-18 00:39:07Z rwatson $ | 34 * $FreeBSD: head/sys/netinet/tcp_input.c 122922 2003-11-20 20:07:39Z andre $ |
35 */ 36 37#include "opt_ipfw.h" /* for ipfw_fwd */ 38#include "opt_inet6.h" 39#include "opt_ipsec.h" 40#include "opt_mac.h" 41#include "opt_tcpdebug.h" 42#include "opt_tcp_input.h" --- 106 unchanged lines hidden (view full) --- 149static int tcp_timewait(struct tcptw *, struct tcpopt *, 150 struct tcphdr *, struct mbuf *, int); 151 152/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ 153#ifdef INET6 154#define ND6_HINT(tp) \ 155do { \ 156 if ((tp) && (tp)->t_inpcb && \ | 35 */ 36 37#include "opt_ipfw.h" /* for ipfw_fwd */ 38#include "opt_inet6.h" 39#include "opt_ipsec.h" 40#include "opt_mac.h" 41#include "opt_tcpdebug.h" 42#include "opt_tcp_input.h" --- 106 unchanged lines hidden (view full) --- 149static int tcp_timewait(struct tcptw *, struct tcpopt *, 150 struct tcphdr *, struct mbuf *, int); 151 152/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ 153#ifdef INET6 154#define ND6_HINT(tp) \ 155do { \ 156 if ((tp) && (tp)->t_inpcb && \ |
157 ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \ 158 (tp)->t_inpcb->in6p_route.ro_rt) \ 159 nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \ | 157 ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \ 158 nd6_nud_hint(NULL, NULL, 0); \ |
160} while (0) 161#else 162#define ND6_HINT(tp) 163#endif 164 165/* 166 * Indicate whether this ack should be delayed. We can delay the ack if 167 * - there is no delayed ack timer in progress and --- 185 unchanged lines hidden (view full) --- 353 int len, tlen, off; 354 int drop_hdrlen; 355 register struct tcpcb *tp = 0; 356 register int thflags; 357 struct socket *so = 0; 358 int todrop, acked, ourfinisacked, needoutput = 0; 359 u_long tiwin; 360 struct tcpopt to; /* options in this segment */ | 159} while (0) 160#else 161#define ND6_HINT(tp) 162#endif 163 164/* 165 * Indicate whether this ack should be delayed. We can delay the ack if 166 * - there is no delayed ack timer in progress and --- 185 unchanged lines hidden (view full) --- 352 int len, tlen, off; 353 int drop_hdrlen; 354 register struct tcpcb *tp = 0; 355 register int thflags; 356 struct socket *so = 0; 357 int todrop, acked, ourfinisacked, needoutput = 0; 358 u_long tiwin; 359 struct tcpopt to; /* options in this segment */ |
361 struct rmxp_tao *taop; /* pointer to our TAO cache entry */ 362 struct rmxp_tao tao_noncached; /* in case there's no cached entry */ | 360 struct rmxp_tao tao; /* our TAO cache entry */ |
363 int headlocked = 0; 364 struct sockaddr_in *next_hop = NULL; 365 int rstreason; /* For badport_bandlim accounting purposes */ 366 367 struct ip6_hdr *ip6 = NULL; 368#ifdef INET6 369 int isipv6; 370#else --- 13 unchanged lines hidden (view full) --- 384 /* Grab info from MT_TAG mbufs prepended to the chain. */ 385 for (;m && m->m_type == MT_TAG; m = m->m_next) { 386 if (m->_m_tag_id == PACKET_TAG_IPFORWARD) 387 next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; 388 } 389#ifdef INET6 390 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; 391#endif | 361 int headlocked = 0; 362 struct sockaddr_in *next_hop = NULL; 363 int rstreason; /* For badport_bandlim accounting purposes */ 364 365 struct ip6_hdr *ip6 = NULL; 366#ifdef INET6 367 int isipv6; 368#else --- 13 unchanged lines hidden (view full) --- 382 /* Grab info from MT_TAG mbufs prepended to the chain. */ 383 for (;m && m->m_type == MT_TAG; m = m->m_next) { 384 if (m->_m_tag_id == PACKET_TAG_IPFORWARD) 385 next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; 386 } 387#ifdef INET6 388 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; 389#endif |
390 bzero(&tao, sizeof(tao)); |
|
392 bzero((char *)&to, sizeof(to)); 393 394 tcpstat.tcps_rcvtotal++; 395 396 if (isipv6) { 397 /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */ 398 ip6 = mtod(m, struct ip6_hdr *); 399 tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; --- 302 unchanged lines hidden (view full) --- 702 struct in_conninfo inc; 703 704#ifdef INET6 705 inc.inc_isipv6 = isipv6; 706#endif 707 if (isipv6) { 708 inc.inc6_faddr = ip6->ip6_src; 709 inc.inc6_laddr = ip6->ip6_dst; | 391 bzero((char *)&to, sizeof(to)); 392 393 tcpstat.tcps_rcvtotal++; 394 395 if (isipv6) { 396 /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */ 397 ip6 = mtod(m, struct ip6_hdr *); 398 tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; --- 302 unchanged lines hidden (view full) --- 701 struct in_conninfo inc; 702 703#ifdef INET6 704 inc.inc_isipv6 = isipv6; 705#endif 706 if (isipv6) { 707 inc.inc6_faddr = ip6->ip6_src; 708 inc.inc6_laddr = ip6->ip6_dst; |
710 inc.inc6_route.ro_rt = NULL; /* XXX */ | |
711 } else { 712 inc.inc_faddr = ip->ip_src; 713 inc.inc_laddr = ip->ip_dst; | 709 } else { 710 inc.inc_faddr = ip->ip_src; 711 inc.inc_laddr = ip->ip_dst; |
714 inc.inc_route.ro_rt = NULL; /* XXX */ | |
715 } 716 inc.inc_fport = th->th_sport; 717 inc.inc_lport = th->th_dport; 718 719 /* 720 * If the state is LISTEN then ignore segment if it contains 721 * a RST. If the segment contains an ACK then it is bad and 722 * send a RST. If it does not contain a SYN then it is not --- 188 unchanged lines hidden (view full) --- 911 tcpstat.tcps_connects++; 912 soisconnected(so); 913 goto trimthenstep6; 914 } 915 goto drop; 916 } 917after_listen: 918 | 712 } 713 inc.inc_fport = th->th_sport; 714 inc.inc_lport = th->th_dport; 715 716 /* 717 * If the state is LISTEN then ignore segment if it contains 718 * a RST. If the segment contains an ACK then it is bad and 719 * send a RST. If it does not contain a SYN then it is not --- 188 unchanged lines hidden (view full) --- 908 tcpstat.tcps_connects++; 909 soisconnected(so); 910 goto trimthenstep6; 911 } 912 goto drop; 913 } 914after_listen: 915 |
919/* XXX temp debugging */ | 916 /* XXX temp debugging */ |
920 /* should not happen - syncache should pick up these connections */ 921 if (tp->t_state == TCPS_LISTEN) 922 panic("tcp_input: TCPS_LISTEN"); 923 924 /* 925 * Segment received on connection. 926 * Reset idle time and keep-alive timer. 927 */ 928 tp->t_rcvtime = ticks; 929 if (TCPS_HAVEESTABLISHED(tp->t_state)) 930 callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 931 932 /* | 917 /* should not happen - syncache should pick up these connections */ 918 if (tp->t_state == TCPS_LISTEN) 919 panic("tcp_input: TCPS_LISTEN"); 920 921 /* 922 * Segment received on connection. 923 * Reset idle time and keep-alive timer. 924 */ 925 tp->t_rcvtime = ticks; 926 if (TCPS_HAVEESTABLISHED(tp->t_state)) 927 callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 928 929 /* |
933 * Process options. 934 * XXX this is tradtitional behavior, may need to be cleaned up. | 930 * Process options only when we get SYN/ACK back. The SYN case 931 * for incoming connections is handled in tcp_syncache. 932 * XXX this is traditional behavior, may need to be cleaned up. |
935 */ 936 tcp_dooptions(&to, optp, optlen, thflags & TH_SYN); 937 if (thflags & TH_SYN) { 938 if (to.to_flags & TOF_SCALE) { 939 tp->t_flags |= TF_RCVD_SCALE; 940 tp->requested_s_scale = to.to_requested_s_scale; 941 } 942 if (to.to_flags & TOF_TS) { --- 231 unchanged lines hidden (view full) --- 1174 * Otherwise this is an acceptable SYN segment 1175 * initialize tp->rcv_nxt and tp->irs 1176 * if seg contains ack then advance tp->snd_una 1177 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 1178 * arrange for segment to be acked (eventually) 1179 * continue processing rest of data/controls, beginning with URG 1180 */ 1181 case TCPS_SYN_SENT: | 933 */ 934 tcp_dooptions(&to, optp, optlen, thflags & TH_SYN); 935 if (thflags & TH_SYN) { 936 if (to.to_flags & TOF_SCALE) { 937 tp->t_flags |= TF_RCVD_SCALE; 938 tp->requested_s_scale = to.to_requested_s_scale; 939 } 940 if (to.to_flags & TOF_TS) { --- 231 unchanged lines hidden (view full) --- 1172 * Otherwise this is an acceptable SYN segment 1173 * initialize tp->rcv_nxt and tp->irs 1174 * if seg contains ack then advance tp->snd_una 1175 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 1176 * arrange for segment to be acked (eventually) 1177 * continue processing rest of data/controls, beginning with URG 1178 */ 1179 case TCPS_SYN_SENT: |
1182 if ((taop = tcp_gettaocache(&inp->inp_inc)) == NULL) { 1183 taop = &tao_noncached; 1184 bzero(taop, sizeof(*taop)); 1185 } | 1180 if (tcp_do_rfc1644) 1181 tcp_hc_gettao(&inp->inp_inc, &tao); |
1186 1187 if ((thflags & TH_ACK) && 1188 (SEQ_LEQ(th->th_ack, tp->iss) || 1189 SEQ_GT(th->th_ack, tp->snd_max))) { 1190 /* 1191 * If we have a cached CCsent for the remote host, 1192 * hence we haven't just crashed and restarted, 1193 * do not send a RST. This may be a retransmission 1194 * from the other side after our earlier ACK was lost. 1195 * Our new SYN, when it arrives, will serve as the 1196 * needed ACK. 1197 */ | 1182 1183 if ((thflags & TH_ACK) && 1184 (SEQ_LEQ(th->th_ack, tp->iss) || 1185 SEQ_GT(th->th_ack, tp->snd_max))) { 1186 /* 1187 * If we have a cached CCsent for the remote host, 1188 * hence we haven't just crashed and restarted, 1189 * do not send a RST. This may be a retransmission 1190 * from the other side after our earlier ACK was lost. 1191 * Our new SYN, when it arrives, will serve as the 1192 * needed ACK. 1193 */ |
1198 if (taop->tao_ccsent != 0) | 1194 if (tao.tao_ccsent != 0) |
1199 goto drop; 1200 else { 1201 rstreason = BANDLIM_UNLIMITED; 1202 goto dropwithreset; 1203 } 1204 } 1205 if (thflags & TH_RST) { 1206 if (thflags & TH_ACK) --- 13 unchanged lines hidden (view full) --- 1220 * option, check it to make sure this segment really 1221 * matches our SYN. If not, just drop it as old 1222 * duplicate, but send an RST if we're still playing 1223 * by the old rules. If no CC.ECHO option, make sure 1224 * we don't get fooled into using T/TCP. 1225 */ 1226 if (to.to_flags & TOF_CCECHO) { 1227 if (tp->cc_send != to.to_ccecho) { | 1195 goto drop; 1196 else { 1197 rstreason = BANDLIM_UNLIMITED; 1198 goto dropwithreset; 1199 } 1200 } 1201 if (thflags & TH_RST) { 1202 if (thflags & TH_ACK) --- 13 unchanged lines hidden (view full) --- 1216 * option, check it to make sure this segment really 1217 * matches our SYN. If not, just drop it as old 1218 * duplicate, but send an RST if we're still playing 1219 * by the old rules. If no CC.ECHO option, make sure 1220 * we don't get fooled into using T/TCP. 1221 */ 1222 if (to.to_flags & TOF_CCECHO) { 1223 if (tp->cc_send != to.to_ccecho) { |
1228 if (taop->tao_ccsent != 0) | 1224 if (tao.tao_ccsent != 0) |
1229 goto drop; 1230 else { 1231 rstreason = BANDLIM_UNLIMITED; 1232 goto dropwithreset; 1233 } 1234 } 1235 } else 1236 tp->t_flags &= ~TF_RCVD_CC; --- 4 unchanged lines hidden (view full) --- 1241#endif 1242 /* Do window scaling on this connection? */ 1243 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 1244 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 1245 tp->snd_scale = tp->requested_s_scale; 1246 tp->rcv_scale = tp->request_r_scale; 1247 } 1248 /* Segment is acceptable, update cache if undefined. */ | 1225 goto drop; 1226 else { 1227 rstreason = BANDLIM_UNLIMITED; 1228 goto dropwithreset; 1229 } 1230 } 1231 } else 1232 tp->t_flags &= ~TF_RCVD_CC; --- 4 unchanged lines hidden (view full) --- 1237#endif 1238 /* Do window scaling on this connection? */ 1239 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 1240 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 1241 tp->snd_scale = tp->requested_s_scale; 1242 tp->rcv_scale = tp->request_r_scale; 1243 } 1244 /* Segment is acceptable, update cache if undefined. */ |
1249 if (taop->tao_ccsent == 0) 1250 taop->tao_ccsent = to.to_ccecho; | 1245 if (tao.tao_ccsent == 0 && tcp_do_rfc1644) 1246 tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, to.to_ccecho, 0); |
1251 1252 tp->rcv_adv += tp->rcv_wnd; 1253 tp->snd_una++; /* SYN is acked */ 1254 /* 1255 * If there's data, delay ACK; if there's also a FIN 1256 * ACKNOW will be turned on later. 1257 */ 1258 if (DELAY_ACK(tp) && tlen != 0) --- 26 unchanged lines hidden (view full) --- 1285 * Otherwise, do 3-way handshake: 1286 * SYN-SENT -> SYN-RECEIVED 1287 * SYN-SENT* -> SYN-RECEIVED* 1288 * If there was no CC option, clear cached CC value. 1289 */ 1290 tp->t_flags |= TF_ACKNOW; 1291 callout_stop(tp->tt_rexmt); 1292 if (to.to_flags & TOF_CC) { | 1247 1248 tp->rcv_adv += tp->rcv_wnd; 1249 tp->snd_una++; /* SYN is acked */ 1250 /* 1251 * If there's data, delay ACK; if there's also a FIN 1252 * ACKNOW will be turned on later. 1253 */ 1254 if (DELAY_ACK(tp) && tlen != 0) --- 26 unchanged lines hidden (view full) --- 1281 * Otherwise, do 3-way handshake: 1282 * SYN-SENT -> SYN-RECEIVED 1283 * SYN-SENT* -> SYN-RECEIVED* 1284 * If there was no CC option, clear cached CC value. 1285 */ 1286 tp->t_flags |= TF_ACKNOW; 1287 callout_stop(tp->tt_rexmt); 1288 if (to.to_flags & TOF_CC) { |
1293 if (taop->tao_cc != 0 && 1294 CC_GT(to.to_cc, taop->tao_cc)) { | 1289 if (tao.tao_cc != 0 && 1290 CC_GT(to.to_cc, tao.tao_cc)) { |
1295 /* 1296 * update cache and make transition: 1297 * SYN-SENT -> ESTABLISHED* 1298 * SYN-SENT* -> FIN-WAIT-1* 1299 */ | 1291 /* 1292 * update cache and make transition: 1293 * SYN-SENT -> ESTABLISHED* 1294 * SYN-SENT* -> FIN-WAIT-1* 1295 */ |
1300 taop->tao_cc = to.to_cc; | 1296 tao.tao_cc = to.to_cc; 1297 tcp_hc_updatetao(&inp->inp_inc, 1298 TCP_HC_TAO_CC, to.to_cc, 0); |
1301 tp->t_starttime = ticks; 1302 if (tp->t_flags & TF_NEEDFIN) { 1303 tp->t_state = TCPS_FIN_WAIT_1; 1304 tp->t_flags &= ~TF_NEEDFIN; 1305 } else { 1306 tp->t_state = TCPS_ESTABLISHED; 1307 callout_reset(tp->tt_keep, 1308 tcp_keepidle, 1309 tcp_timer_keep, 1310 tp); 1311 } 1312 tp->t_flags |= TF_NEEDSYN; 1313 } else 1314 tp->t_state = TCPS_SYN_RECEIVED; 1315 } else { | 1299 tp->t_starttime = ticks; 1300 if (tp->t_flags & TF_NEEDFIN) { 1301 tp->t_state = TCPS_FIN_WAIT_1; 1302 tp->t_flags &= ~TF_NEEDFIN; 1303 } else { 1304 tp->t_state = TCPS_ESTABLISHED; 1305 callout_reset(tp->tt_keep, 1306 tcp_keepidle, 1307 tcp_timer_keep, 1308 tp); 1309 } 1310 tp->t_flags |= TF_NEEDSYN; 1311 } else 1312 tp->t_state = TCPS_SYN_RECEIVED; 1313 } else { |
1316 /* CC.NEW or no option => invalidate cache */ 1317 taop->tao_cc = 0; | 1314 if (tcp_do_rfc1644) { 1315 /* CC.NEW or no option => invalidate cache */ 1316 tao.tao_cc = 0; 1317 tcp_hc_updatetao(&inp->inp_inc, 1318 TCP_HC_TAO_CC, to.to_cc, 0); 1319 } |
1318 tp->t_state = TCPS_SYN_RECEIVED; 1319 } 1320 } 1321 1322trimthenstep6: 1323 /* 1324 * Advance th->th_seq to correspond to first data byte. 1325 * If data, trim to stay within window, --- 351 unchanged lines hidden (view full) --- 1677 /* Do window scaling? */ 1678 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 1679 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 1680 tp->snd_scale = tp->requested_s_scale; 1681 tp->rcv_scale = tp->request_r_scale; 1682 } 1683 /* 1684 * Upon successful completion of 3-way handshake, | 1320 tp->t_state = TCPS_SYN_RECEIVED; 1321 } 1322 } 1323 1324trimthenstep6: 1325 /* 1326 * Advance th->th_seq to correspond to first data byte. 1327 * If data, trim to stay within window, --- 351 unchanged lines hidden (view full) --- 1679 /* Do window scaling? */ 1680 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 1681 (TF_RCVD_SCALE|TF_REQ_SCALE)) { 1682 tp->snd_scale = tp->requested_s_scale; 1683 tp->rcv_scale = tp->request_r_scale; 1684 } 1685 /* 1686 * Upon successful completion of 3-way handshake, |
1685 * update cache.CC if it was undefined, pass any queued 1686 * data to the user, and advance state appropriately. | 1687 * update cache.CC, pass any queued data to the user, 1688 * and advance state appropriately. |
1687 */ | 1689 */ |
1688 if ((taop = tcp_gettaocache(&inp->inp_inc)) != NULL && 1689 taop->tao_cc == 0) 1690 taop->tao_cc = tp->cc_recv; 1691 | 1690 if (tcp_do_rfc1644) { 1691 tao.tao_cc = tp->cc_recv; 1692 tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CC, 1693 tp->cc_recv, 0); 1694 } |
1692 /* 1693 * Make transitions: 1694 * SYN-RECEIVED -> ESTABLISHED 1695 * SYN-RECEIVED* -> FIN-WAIT-1 1696 */ 1697 tp->t_starttime = ticks; 1698 if (tp->t_flags & TF_NEEDFIN) { 1699 tp->t_state = TCPS_FIN_WAIT_1; --- 906 unchanged lines hidden (view full) --- 2606 * parameters from pre-set or cached values in the routing entry. 2607 * 2608 * Also take into account the space needed for options that we 2609 * send regularly. Make maxseg shorter by that amount to assure 2610 * that we can send maxseg amount of data even when the options 2611 * are present. Store the upper limit of the length of options plus 2612 * data in maxopd. 2613 * | 1695 /* 1696 * Make transitions: 1697 * SYN-RECEIVED -> ESTABLISHED 1698 * SYN-RECEIVED* -> FIN-WAIT-1 1699 */ 1700 tp->t_starttime = ticks; 1701 if (tp->t_flags & TF_NEEDFIN) { 1702 tp->t_state = TCPS_FIN_WAIT_1; --- 906 unchanged lines hidden (view full) --- 2609 * parameters from pre-set or cached values in the routing entry. 2610 * 2611 * Also take into account the space needed for options that we 2612 * send regularly. Make maxseg shorter by that amount to assure 2613 * that we can send maxseg amount of data even when the options 2614 * are present. Store the upper limit of the length of options plus 2615 * data in maxopd. 2616 * |
2614 * NOTE that this routine is only called when we process an incoming 2615 * segment, for outgoing segments only tcp_mssopt is called. | |
2616 * 2617 * In case of T/TCP, we call this routine during implicit connection 2618 * setup as well (offer = -1), to initialize maxseg from the cached 2619 * MSS of our peer. | 2617 * 2618 * In case of T/TCP, we call this routine during implicit connection 2619 * setup as well (offer = -1), to initialize maxseg from the cached 2620 * MSS of our peer. |
2621 * 2622 * NOTE that this routine is only called when we process an incoming 2623 * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt(). |
|
2620 */ 2621void 2622tcp_mss(tp, offer) 2623 struct tcpcb *tp; 2624 int offer; 2625{ | 2624 */ 2625void 2626tcp_mss(tp, offer) 2627 struct tcpcb *tp; 2628 int offer; 2629{ |
2626 register struct rtentry *rt; 2627 struct ifnet *ifp; 2628 register int rtt, mss; | 2630 int rtt, mss; |
2629 u_long bufsize; | 2631 u_long bufsize; |
2632 u_long maxmtu; |
|
2630 struct inpcb *inp = tp->t_inpcb; 2631 struct socket *so; | 2633 struct inpcb *inp = tp->t_inpcb; 2634 struct socket *so; |
2632 struct rmxp_tao *taop; | 2635 struct hc_metrics_lite metrics; 2636 struct rmxp_tao tao; |
2633 int origoffer = offer; 2634#ifdef INET6 2635 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; 2636 size_t min_protoh = isipv6 ? 2637 sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : 2638 sizeof (struct tcpiphdr); 2639#else | 2637 int origoffer = offer; 2638#ifdef INET6 2639 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; 2640 size_t min_protoh = isipv6 ? 2641 sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : 2642 sizeof (struct tcpiphdr); 2643#else |
2640 const int isipv6 = 0; 2641 const size_t min_protoh = sizeof (struct tcpiphdr); | 2644 const size_t min_protoh = sizeof(struct tcpiphdr); |
2642#endif | 2645#endif |
2646 bzero(&tao, sizeof(tao)); |
|
2643 | 2647 |
2644 if (isipv6) 2645 rt = tcp_rtlookup6(&inp->inp_inc); 2646 else 2647 rt = tcp_rtlookup(&inp->inp_inc); 2648 if (rt == NULL) { 2649 tp->t_maxopd = tp->t_maxseg = 2650 isipv6 ? tcp_v6mssdflt : tcp_mssdflt; 2651 return; | 2648 /* initialize */ 2649#ifdef INET6 2650 if (isipv6) { 2651 maxmtu = tcp_maxmtu6(&inp->inp_inc); 2652 tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt; 2653 } else 2654#endif 2655 { 2656 maxmtu = tcp_maxmtu(&inp->inp_inc); 2657 tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; |
2652 } | 2658 } |
2653 ifp = rt->rt_ifp; | |
2654 so = inp->inp_socket; 2655 | 2659 so = inp->inp_socket; 2660 |
2656 taop = rmx_taop(rt->rt_rmx); | |
2657 /* | 2661 /* |
2658 * Offer == -1 means that we didn't receive SYN yet, 2659 * use cached value in that case; | 2662 * no route to sender, take default mss and return |
2660 */ | 2663 */ |
2661 if (offer == -1) 2662 offer = taop->tao_mssopt; | 2664 if (maxmtu == 0) 2665 return; 2666 2667 /* what have we got? */ 2668 switch (offer) { 2669 case 0: 2670 /* 2671 * Offer == 0 means that there was no MSS on the SYN 2672 * segment, in this case we use tcp_mssdflt. 2673 */ 2674 offer = 2675#ifdef INET6 2676 isipv6 ? tcp_v6mssdflt : 2677#endif 2678 tcp_mssdflt; 2679 break; 2680 2681 case -1: 2682 /* 2683 * Offer == -1 means that we didn't receive SYN yet, 2684 * use cached value in that case; 2685 */ 2686 if (tcp_do_rfc1644) 2687 tcp_hc_gettao(&inp->inp_inc, &tao); 2688 if (tao.tao_mssopt != 0) 2689 offer = tao.tao_mssopt; 2690 /* FALLTHROUGH */ 2691 2692 default: 2693 /* 2694 * Sanity check: make sure that maxopd will be large 2695 * enough to allow some data on segments even if the 2696 * all the option space is used (40bytes). Otherwise 2697 * funny things may happen in tcp_output. 2698 */ 2699 offer = max(offer, 64); 2700 if (tcp_do_rfc1644) 2701 tcp_hc_updatetao(&inp->inp_inc, 2702 TCP_HC_TAO_MSSOPT, 0, offer); 2703 } 2704 |
2663 /* | 2705 /* |
2664 * Offer == 0 means that there was no MSS on the SYN segment, 2665 * in this case we use tcp_mssdflt. | 2706 * rmx information is now retrieved from tcp_hostcache |
2666 */ | 2707 */ |
2667 if (offer == 0) 2668 offer = isipv6 ? tcp_v6mssdflt : tcp_mssdflt; 2669 else 2670 /* 2671 * Sanity check: make sure that maxopd will be large 2672 * enough to allow some data on segments even is the 2673 * all the option space is used (40bytes). Otherwise 2674 * funny things may happen in tcp_output. 2675 */ 2676 offer = max(offer, 64); 2677 taop->tao_mssopt = offer; | 2708 tcp_hc_get(&inp->inp_inc, &metrics); |
2678 2679 /* | 2709 2710 /* |
2680 * While we're here, check if there's an initial rtt 2681 * or rttvar. Convert from the route-table units 2682 * to scaled multiples of the slow timeout timer. 2683 */ 2684 if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { 2685 /* 2686 * XXX the lock bit for RTT indicates that the value 2687 * is also a minimum value; this is subject to time. 2688 */ 2689 if (rt->rt_rmx.rmx_locks & RTV_RTT) 2690 tp->t_rttmin = rtt / (RTM_RTTUNIT / hz); 2691 tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE)); 2692 tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; 2693 tcpstat.tcps_usedrtt++; 2694 if (rt->rt_rmx.rmx_rttvar) { 2695 tp->t_rttvar = rt->rt_rmx.rmx_rttvar / 2696 (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE)); 2697 tcpstat.tcps_usedrttvar++; 2698 } else { 2699 /* default variation is +- 1 rtt */ 2700 tp->t_rttvar = 2701 tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; 2702 } 2703 TCPT_RANGESET(tp->t_rxtcur, 2704 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, 2705 tp->t_rttmin, TCPTV_REXMTMAX); 2706 } 2707 /* 2708 * if there's an mtu associated with the route, use it | 2711 * if there's a discovered mtu int tcp hostcache, use it |
2709 * else, use the link mtu. 2710 */ | 2712 * else, use the link mtu. 2713 */ |
2711 if (rt->rt_rmx.rmx_mtu) 2712 mss = rt->rt_rmx.rmx_mtu - min_protoh; | 2714 if (metrics.rmx_mtu) 2715 mss = metrics.rmx_mtu - min_protoh; |
2713 else { 2714#ifdef INET6 | 2716 else { 2717#ifdef INET6 |
2715 mss = (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : ifp->if_mtu) 2716 - min_protoh; 2717#else 2718 mss = ifp->if_mtu - min_protoh; 2719#endif 2720#ifdef INET6 | |
2721 if (isipv6) { | 2718 if (isipv6) { |
2722 if (!in6_localaddr(&inp->in6p_faddr)) | 2719 mss = maxmtu - min_protoh; 2720 if (!path_mtu_discovery && 2721 !in6_localaddr(&inp->in6p_faddr)) |
2723 mss = min(mss, tcp_v6mssdflt); 2724 } else 2725#endif | 2722 mss = min(mss, tcp_v6mssdflt); 2723 } else 2724#endif |
2726 if (!in_localaddr(inp->inp_faddr)) | 2725 { 2726 mss = maxmtu - min_protoh; 2727 if (!path_mtu_discovery && 2728 !in_localaddr(inp->inp_faddr)) |
2727 mss = min(mss, tcp_mssdflt); | 2729 mss = min(mss, tcp_mssdflt); |
2730 } |
|
2728 } 2729 mss = min(mss, offer); | 2731 } 2732 mss = min(mss, offer); |
2733 |
|
2730 /* 2731 * maxopd stores the maximum length of data AND options 2732 * in a segment; maxseg is the amount of data in a normal 2733 * segment. We need to store this value (maxopd) apart 2734 * from maxseg, because now every segment carries options 2735 * and thus we normally have somewhat less data in segments. 2736 */ 2737 tp->t_maxopd = mss; --- 6 unchanged lines hidden (view full) --- 2744 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 2745 (origoffer == -1 || 2746 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) 2747 mss -= TCPOLEN_TSTAMP_APPA; 2748 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && 2749 (origoffer == -1 || 2750 (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)) 2751 mss -= TCPOLEN_CC_APPA; | 2734 /* 2735 * maxopd stores the maximum length of data AND options 2736 * in a segment; maxseg is the amount of data in a normal 2737 * segment. We need to store this value (maxopd) apart 2738 * from maxseg, because now every segment carries options 2739 * and thus we normally have somewhat less data in segments. 2740 */ 2741 tp->t_maxopd = mss; --- 6 unchanged lines hidden (view full) --- 2748 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 2749 (origoffer == -1 || 2750 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) 2751 mss -= TCPOLEN_TSTAMP_APPA; 2752 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && 2753 (origoffer == -1 || 2754 (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)) 2755 mss -= TCPOLEN_CC_APPA; |
2756 tp->t_maxseg = mss; |
|
2752 2753#if (MCLBYTES & (MCLBYTES - 1)) == 0 2754 if (mss > MCLBYTES) 2755 mss &= ~(MCLBYTES-1); 2756#else 2757 if (mss > MCLBYTES) 2758 mss = mss / MCLBYTES * MCLBYTES; 2759#endif | 2757 2758#if (MCLBYTES & (MCLBYTES - 1)) == 0 2759 if (mss > MCLBYTES) 2760 mss &= ~(MCLBYTES-1); 2761#else 2762 if (mss > MCLBYTES) 2763 mss = mss / MCLBYTES * MCLBYTES; 2764#endif |
2765 tp->t_maxseg = mss; 2766 |
|
2760 /* | 2767 /* |
2761 * If there's a pipesize, change the socket buffer 2762 * to that size. Make the socket buffers an integral 2763 * number of mss units; if the mss is larger than 2764 * the socket buffer, decrease the mss. | 2768 * If there's a pipesize, change the socket buffer to that size, 2769 * don't change if sb_hiwat is different than default (then it 2770 * has been changed on purpose with setsockopt). 2771 * Make the socket buffers an integral number of mss units; 2772 * if the mss is larger than the socket buffer, decrease the mss. |
2765 */ | 2773 */ |
2766#ifdef RTV_SPIPE 2767 if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) 2768#endif | 2774 if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe) 2775 bufsize = metrics.rmx_sendpipe; 2776 else |
2769 bufsize = so->so_snd.sb_hiwat; 2770 if (bufsize < mss) 2771 mss = bufsize; 2772 else { 2773 bufsize = roundup(bufsize, mss); 2774 if (bufsize > sb_max) 2775 bufsize = sb_max; 2776 if (bufsize > so->so_snd.sb_hiwat) 2777 (void)sbreserve(&so->so_snd, bufsize, so, NULL); 2778 } 2779 tp->t_maxseg = mss; 2780 | 2777 bufsize = so->so_snd.sb_hiwat; 2778 if (bufsize < mss) 2779 mss = bufsize; 2780 else { 2781 bufsize = roundup(bufsize, mss); 2782 if (bufsize > sb_max) 2783 bufsize = sb_max; 2784 if (bufsize > so->so_snd.sb_hiwat) 2785 (void)sbreserve(&so->so_snd, bufsize, so, NULL); 2786 } 2787 tp->t_maxseg = mss; 2788 |
2781#ifdef RTV_RPIPE 2782 if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) 2783#endif | 2789 if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe) 2790 bufsize = metrics.rmx_recvpipe; 2791 else |
2784 bufsize = so->so_rcv.sb_hiwat; 2785 if (bufsize > mss) { 2786 bufsize = roundup(bufsize, mss); 2787 if (bufsize > sb_max) 2788 bufsize = sb_max; 2789 if (bufsize > so->so_rcv.sb_hiwat) 2790 (void)sbreserve(&so->so_rcv, bufsize, so, NULL); 2791 } | 2792 bufsize = so->so_rcv.sb_hiwat; 2793 if (bufsize > mss) { 2794 bufsize = roundup(bufsize, mss); 2795 if (bufsize > sb_max) 2796 bufsize = sb_max; 2797 if (bufsize > so->so_rcv.sb_hiwat) 2798 (void)sbreserve(&so->so_rcv, bufsize, so, NULL); 2799 } |
2800 /* 2801 * While we're here, check the others too 2802 */ 2803 if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { 2804 tp->t_srtt = rtt; 2805 tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; 2806 tcpstat.tcps_usedrtt++; 2807 if (metrics.rmx_rttvar) { 2808 tp->t_rttvar = metrics.rmx_rttvar; 2809 tcpstat.tcps_usedrttvar++; 2810 } else { 2811 /* default variation is +- 1 rtt */ 2812 tp->t_rttvar = 2813 tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; 2814 } 2815 TCPT_RANGESET(tp->t_rxtcur, 2816 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, 2817 tp->t_rttmin, TCPTV_REXMTMAX); 2818 } 2819 if (metrics.rmx_ssthresh) { 2820 /* 2821 * There's some sort of gateway or interface 2822 * buffer limit on the path. Use this to set 2823 * the slow start threshhold, but set the 2824 * threshold to no less than 2*mss. 2825 */ 2826 tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh); 2827 tcpstat.tcps_usedssthresh++; 2828 } 2829 if (metrics.rmx_bandwidth) 2830 tp->snd_bandwidth = metrics.rmx_bandwidth; |
|
2792 2793 /* 2794 * Set the slow-start flight size depending on whether this 2795 * is a local network or not. | 2831 2832 /* 2833 * Set the slow-start flight size depending on whether this 2834 * is a local network or not. |
2835 * 2836 * Extend this so we cache the cwnd too and retrieve it here. 2837 * Make cwnd even bigger than RFC3390 suggests but only if we 2838 * have previous experience with the remote host. Be careful 2839 * not make cwnd bigger than remote receive window or our own 2840 * send socket buffer. Maybe put some additional upper bound 2841 * on the retrieved cwnd. Should do incremental updates to 2842 * hostcache when cwnd collapses so next connection doesn't 2843 * overloads the path again. 2844 * 2845 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost. 2846 * We currently check only in syncache_socket for that. |
|
2796 */ | 2847 */ |
2848#define TCP_METRICS_CWND 2849#ifdef TCP_METRICS_CWND 2850 if (metrics.rmx_cwnd) 2851 tp->snd_cwnd = max(mss, 2852 min(metrics.rmx_cwnd / 2, 2853 min(tp->snd_wnd, so->so_snd.sb_hiwat))); 2854 else 2855#endif |
|
2797 if (tcp_do_rfc3390) 2798 tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380)); | 2856 if (tcp_do_rfc3390) 2857 tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380)); |
2858#ifdef INET6 |
|
2799 else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) || 2800 (!isipv6 && in_localaddr(inp->inp_faddr))) 2801 tp->snd_cwnd = mss * ss_fltsz_local; | 2859 else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) || 2860 (!isipv6 && in_localaddr(inp->inp_faddr))) 2861 tp->snd_cwnd = mss * ss_fltsz_local; |
2862#endif |
|
2802 else 2803 tp->snd_cwnd = mss * ss_fltsz; | 2863 else 2864 tp->snd_cwnd = mss * ss_fltsz; |
2804 2805 if (rt->rt_rmx.rmx_ssthresh) { 2806 /* 2807 * There's some sort of gateway or interface 2808 * buffer limit on the path. Use this to set 2809 * the slow start threshhold, but set the 2810 * threshold to no less than 2*mss. 2811 */ 2812 tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); 2813 tcpstat.tcps_usedssthresh++; 2814 } | |
2815} 2816 2817/* 2818 * Determine the MSS option to send on an outgoing SYN. 2819 */ 2820int | 2865} 2866 2867/* 2868 * Determine the MSS option to send on an outgoing SYN. 2869 */ 2870int |
2821tcp_mssopt(tp) 2822 struct tcpcb *tp; | 2871tcp_mssopt(inc) 2872 struct in_conninfo *inc; |
2823{ | 2873{ |
2824 struct rtentry *rt; | 2874 int mss = 0; 2875 u_long maxmtu = 0; 2876 u_long thcmtu = 0; 2877 size_t min_protoh; |
2825#ifdef INET6 | 2878#ifdef INET6 |
2826 int isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0; 2827 size_t min_protoh = isipv6 ? 2828 sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : 2829 sizeof (struct tcpiphdr); 2830#else 2831 const int isipv6 = 0; 2832 const size_t min_protoh = sizeof (struct tcpiphdr); | 2879 int isipv6 = inc->inc_isipv6 ? 1 : 0; |
2833#endif 2834 | 2880#endif 2881 |
2835 if (isipv6) 2836 rt = tcp_rtlookup6(&tp->t_inpcb->inp_inc); 2837 else 2838 rt = tcp_rtlookup(&tp->t_inpcb->inp_inc); 2839 if (rt == NULL) 2840 return (isipv6 ? tcp_v6mssdflt : tcp_mssdflt); | 2882 KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer")); |
2841 2842#ifdef INET6 | 2883 2884#ifdef INET6 |
2843 return (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : 2844 rt->rt_ifp->if_mtu - min_protoh); 2845#else 2846 return (rt->rt_ifp->if_mtu - min_protoh); | 2885 if (isipv6) { 2886 mss = tcp_v6mssdflt; 2887 maxmtu = tcp_maxmtu6(inc); 2888 thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ 2889 min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 2890 } else |
2847#endif | 2891#endif |
2892 { 2893 mss = tcp_mssdflt; 2894 maxmtu = tcp_maxmtu(inc); 2895 thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ 2896 min_protoh = sizeof(struct tcpiphdr); 2897 } 2898 if (maxmtu && thcmtu) 2899 mss = min(maxmtu, thcmtu) - min_protoh; 2900 else if (maxmtu || thcmtu) 2901 mss = max(maxmtu, thcmtu) - min_protoh; 2902 2903 return (mss); |
|
2848} 2849 2850 2851/* 2852 * On a partial ack arrives, force the retransmission of the 2853 * next unacknowledged segment. Do not clear tp->t_dupacks. 2854 * By setting snd_nxt to ti_ack, this forces retransmission timer to 2855 * be started again. --- 178 unchanged lines hidden --- | 2904} 2905 2906 2907/* 2908 * On a partial ack arrives, force the retransmission of the 2909 * next unacknowledged segment. Do not clear tp->t_dupacks. 2910 * By setting snd_nxt to ti_ack, this forces retransmission timer to 2911 * be started again. --- 178 unchanged lines hidden --- |