tcp_timewait.c revision 92760
11541Srgrimes/* 211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 3311150Swollman * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 3450477Speter * $FreeBSD: head/sys/netinet/tcp_timewait.c 92760 2002-03-20 05:48:55Z jeff $ 351541Srgrimes */ 361541Srgrimes 3732752Seivind#include "opt_compat.h" 3854263Sshin#include "opt_inet6.h" 3956041Sshin#include "opt_ipsec.h" 4029514Sjoerg#include "opt_tcpdebug.h" 4129514Sjoerg 421541Srgrimes#include <sys/param.h> 431541Srgrimes#include <sys/systm.h> 4450673Sjlemon#include <sys/callout.h> 4512172Sphk#include <sys/kernel.h> 4612172Sphk#include <sys/sysctl.h> 471541Srgrimes#include <sys/malloc.h> 481541Srgrimes#include <sys/mbuf.h> 4955679Sshin#ifdef INET6 5055679Sshin#include <sys/domain.h> 5155679Sshin#endif 5248758Sgreen#include <sys/proc.h> 531541Srgrimes#include <sys/socket.h> 541541Srgrimes#include <sys/socketvar.h> 551541Srgrimes#include <sys/protosw.h> 5675619Skris#include <sys/random.h> 5734923Sbde 5892760Sjeff#include <vm/uma.h> 591541Srgrimes 601541Srgrimes#include <net/route.h> 611541Srgrimes#include <net/if.h> 621541Srgrimes 6317269Swollman#define _IP_VHL 641541Srgrimes#include <netinet/in.h> 651541Srgrimes#include <netinet/in_systm.h> 661541Srgrimes#include <netinet/ip.h> 6755679Sshin#ifdef INET6 6855679Sshin#include <netinet/ip6.h> 6955679Sshin#endif 701541Srgrimes#include <netinet/in_pcb.h> 7155679Sshin#ifdef INET6 7255679Sshin#include <netinet6/in6_pcb.h> 7355679Sshin#endif 747090Sbde#include <netinet/in_var.h> 751541Srgrimes#include <netinet/ip_var.h> 7655679Sshin#ifdef INET6 7755679Sshin#include <netinet6/ip6_var.h> 7855679Sshin#endif 791541Srgrimes#include <netinet/tcp.h> 801541Srgrimes#include <netinet/tcp_fsm.h> 811541Srgrimes#include <netinet/tcp_seq.h> 821541Srgrimes#include <netinet/tcp_timer.h> 831541Srgrimes#include <netinet/tcp_var.h> 8455679Sshin#ifdef INET6 8555679Sshin#include <netinet6/tcp6_var.h> 8655679Sshin#endif 871541Srgrimes#include <netinet/tcpip.h> 886283Swollman#ifdef TCPDEBUG 896283Swollman#include <netinet/tcp_debug.h> 906283Swollman#endif 9155679Sshin#include <netinet6/ip6protosw.h> 921541Srgrimes 9355679Sshin#ifdef IPSEC 9455679Sshin#include <netinet6/ipsec.h> 9562587Sitojun#ifdef INET6 9662587Sitojun#include <netinet6/ipsec6.h> 9762587Sitojun#endif 9855679Sshin#endif /*IPSEC*/ 9955679Sshin 10058698Sjlemon#include <machine/in_cksum.h> 10182122Ssilby#include <sys/md5.h> 10258698Sjlemon 1031541Srgrimesint tcp_mssdflt = TCP_MSS; 10446381SbillfSYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, 10546381Sbillf &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); 10612296Sphk 10752904Sshin#ifdef INET6 10852904Sshinint tcp_v6mssdflt = TCP6_MSS; 10952904SshinSYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, 11055679Sshin CTLFLAG_RW, &tcp_v6mssdflt , 0, 11155679Sshin "Default TCP Maximum Segment Size for IPv6"); 11252904Sshin#endif 11352904Sshin 11450673Sjlemon#if 0 11512296Sphkstatic int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 11646381SbillfSYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, 11746381Sbillf &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time"); 11850673Sjlemon#endif 11912296Sphk 12086764Sjlemonint tcp_do_rfc1323 = 1; 12146381SbillfSYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, 12246381Sbillf &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); 12312296Sphk 12486764Sjlemonint tcp_do_rfc1644 = 0; 12546381SbillfSYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, 12646381Sbillf &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); 1271541Srgrimes 12850426Sjlemonstatic int tcp_tcbhashsize = 0; 12950426SjlemonSYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, 13050426Sjlemon &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); 13150426Sjlemon 13255198Smsmithstatic int do_tcpdrain = 1; 13366376SbmilekicSYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, 13466376Sbmilekic "Enable tcp_drain routine for extra help when low on mbufs"); 13555198Smsmith 13646381SbillfSYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, 13746381Sbillf &tcbinfo.ipi_count, 0, "Number of active PCBs"); 13836079Swollman 13972959Sjlemonstatic int icmp_may_rst = 1; 14072959SjlemonSYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, 14172959Sjlemon "Certain ICMP unreachable messages may abort connections in SYN_SENT"); 14270103Sphk 14382122Ssilbystatic int tcp_strict_rfc1948 = 0; 14482122SsilbySYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948, CTLFLAG_RW, 14582122Ssilby &tcp_strict_rfc1948, 0, "Determines if RFC1948 is followed exactly"); 14679413Ssilby 14782122Ssilbystatic int tcp_isn_reseed_interval = 0; 14882122SsilbySYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW, 14982122Ssilby &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret"); 15082122Ssilby 15192723Salfredstatic void tcp_cleartaocache(void); 15292723Salfredstatic void tcp_notify(struct inpcb *, int); 15312296Sphk 1547684Sdg/* 15532821Sdg * Target size of TCP PCB hash tables. Must be a power of two. 15643562Smsmith * 15743562Smsmith * Note that this can be overridden by the kernel environment 15843562Smsmith * variable net.inet.tcp.tcbhashsize 1597684Sdg */ 1607684Sdg#ifndef TCBHASHSIZE 16132821Sdg#define TCBHASHSIZE 512 1627684Sdg#endif 1631541Srgrimes 1641541Srgrimes/* 16534881Swollman * This is the actual shape of what we allocate using the zone 16634881Swollman * allocator. Doing it this way allows us to protect both structures 16734881Swollman * using the same generation count, and also eliminates the overhead 16834881Swollman * of allocating tcpcbs separately. By hiding the structure here, 16934881Swollman * we avoid changing most of the rest of the code (although it needs 17034881Swollman * to be changed, eventually, for greater efficiency). 17134881Swollman */ 17234923Sbde#define ALIGNMENT 32 17334923Sbde#define ALIGNM1 (ALIGNMENT - 1) 17434881Swollmanstruct inp_tp { 17534881Swollman union { 17634881Swollman struct inpcb inp; 17734881Swollman char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; 17834881Swollman } inp_tp_u; 17934881Swollman struct tcpcb tcb; 18050673Sjlemon struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; 18150673Sjlemon struct callout inp_tp_delack; 18234881Swollman}; 18334881Swollman#undef ALIGNMENT 18434881Swollman#undef ALIGNM1 18534881Swollman 18634881Swollman/* 1871541Srgrimes * Tcp initialization 1881541Srgrimes */ 1891541Srgrimesvoid 1901541Srgrimestcp_init() 1911541Srgrimes{ 19277843Speter int hashsize = TCBHASHSIZE; 19343562Smsmith 1946283Swollman tcp_ccgen = 1; 1956283Swollman tcp_cleartaocache(); 19650673Sjlemon 19750673Sjlemon tcp_delacktime = TCPTV_DELACK; 19850673Sjlemon tcp_keepinit = TCPTV_KEEP_INIT; 19950673Sjlemon tcp_keepidle = TCPTV_KEEP_IDLE; 20050673Sjlemon tcp_keepintvl = TCPTV_KEEPINTVL; 20150673Sjlemon tcp_maxpersistidle = TCPTV_KEEP_IDLE; 20250673Sjlemon tcp_msl = TCPTV_MSL; 20350673Sjlemon 2047684Sdg LIST_INIT(&tcb); 2057684Sdg tcbinfo.listhead = &tcb; 20677900Speter TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); 20743576Smsmith if (!powerof2(hashsize)) { 20843562Smsmith printf("WARNING: TCB hash size not a power of 2\n"); 20943562Smsmith hashsize = 512; /* safe default */ 21043562Smsmith } 21150426Sjlemon tcp_tcbhashsize = hashsize; 21243562Smsmith tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); 21343562Smsmith tcbinfo.porthashbase = hashinit(hashsize, M_PCB, 21434923Sbde &tcbinfo.porthashmask); 21592760Sjeff tcbinfo.ipi_zone = uma_zcreate("tcpcb", sizeof(struct inp_tp), 21692760Sjeff NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 21792760Sjeff uma_zone_set_max(tcbinfo.ipi_zone, maxsockets); 21855679Sshin#ifdef INET6 21955679Sshin#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) 22055679Sshin#else /* INET6 */ 22155679Sshin#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) 22255679Sshin#endif /* INET6 */ 22355679Sshin if (max_protohdr < TCP_MINPROTOHDR) 22455679Sshin max_protohdr = TCP_MINPROTOHDR; 22555679Sshin if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) 2261541Srgrimes panic("tcp_init"); 22755679Sshin#undef TCP_MINPROTOHDR 22886764Sjlemon 22986764Sjlemon syncache_init(); 2301541Srgrimes} 2311541Srgrimes 2321541Srgrimes/* 23378642Ssilby * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. 23478642Ssilby * tcp_template used to store this data in mbufs, but we now recopy it out 23578642Ssilby * of the tcpcb each time to conserve mbufs. 2361541Srgrimes */ 23778642Ssilbyvoid 23878642Ssilbytcp_fillheaders(tp, ip_ptr, tcp_ptr) 2391541Srgrimes struct tcpcb *tp; 24078642Ssilby void *ip_ptr; 24178642Ssilby void *tcp_ptr; 2421541Srgrimes{ 24378642Ssilby struct inpcb *inp = tp->t_inpcb; 24478642Ssilby struct tcphdr *tcp_hdr = (struct tcphdr *)tcp_ptr; 2451541Srgrimes 24655679Sshin#ifdef INET6 24755679Sshin if ((inp->inp_vflag & INP_IPV6) != 0) { 24878642Ssilby struct ip6_hdr *ip6; 24955679Sshin 25078642Ssilby ip6 = (struct ip6_hdr *)ip_ptr; 25155679Sshin ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | 25255679Sshin (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); 25355679Sshin ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | 25455679Sshin (IPV6_VERSION & IPV6_VERSION_MASK); 25555679Sshin ip6->ip6_nxt = IPPROTO_TCP; 25655679Sshin ip6->ip6_plen = sizeof(struct tcphdr); 25755679Sshin ip6->ip6_src = inp->in6p_laddr; 25855679Sshin ip6->ip6_dst = inp->in6p_faddr; 25978642Ssilby tcp_hdr->th_sum = 0; 26055679Sshin } else 26155679Sshin#endif 26278642Ssilby { 26378642Ssilby struct ip *ip = (struct ip *) ip_ptr; 26455679Sshin 26558698Sjlemon ip->ip_vhl = IP_VHL_BORING; 26678671Sjlemon ip->ip_tos = 0; 26778671Sjlemon ip->ip_len = 0; 26878671Sjlemon ip->ip_id = 0; 26978671Sjlemon ip->ip_off = 0; 27078671Sjlemon ip->ip_ttl = 0; 27178671Sjlemon ip->ip_sum = 0; 27258698Sjlemon ip->ip_p = IPPROTO_TCP; 27358698Sjlemon ip->ip_src = inp->inp_laddr; 27458698Sjlemon ip->ip_dst = inp->inp_faddr; 27578642Ssilby tcp_hdr->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 27678642Ssilby htons(sizeof(struct tcphdr) + IPPROTO_TCP)); 27778642Ssilby } 27878642Ssilby 27978642Ssilby tcp_hdr->th_sport = inp->inp_lport; 28078642Ssilby tcp_hdr->th_dport = inp->inp_fport; 28178642Ssilby tcp_hdr->th_seq = 0; 28278642Ssilby tcp_hdr->th_ack = 0; 28378642Ssilby tcp_hdr->th_x2 = 0; 28478642Ssilby tcp_hdr->th_off = 5; 28578642Ssilby tcp_hdr->th_flags = 0; 28678642Ssilby tcp_hdr->th_win = 0; 28778642Ssilby tcp_hdr->th_urp = 0; 28878642Ssilby} 28978642Ssilby 29078642Ssilby/* 29178642Ssilby * Create template to be used to send tcp packets on a connection. 29278642Ssilby * Allocates an mbuf and fills in a skeletal tcp/ip header. The only 29378642Ssilby * use for this function is in keepalives, which use tcp_respond. 29478642Ssilby */ 29578642Ssilbystruct tcptemp * 29678642Ssilbytcp_maketemplate(tp) 29778642Ssilby struct tcpcb *tp; 29878642Ssilby{ 29978642Ssilby struct mbuf *m; 30078642Ssilby struct tcptemp *n; 30178642Ssilby 30278642Ssilby m = m_get(M_DONTWAIT, MT_HEADER); 30378642Ssilby if (m == NULL) 30478642Ssilby return (0); 30578642Ssilby m->m_len = sizeof(struct tcptemp); 30678642Ssilby n = mtod(m, struct tcptemp *); 30778642Ssilby 30878642Ssilby tcp_fillheaders(tp, (void *)&n->tt_ipgen, (void *)&n->tt_t); 3091541Srgrimes return (n); 3101541Srgrimes} 3111541Srgrimes 3121541Srgrimes/* 3131541Srgrimes * Send a single message to the TCP at address specified by 3141541Srgrimes * the given TCP/IP header. If m == 0, then we make a copy 3151541Srgrimes * of the tcpiphdr at ti and send directly to the addressed host. 3161541Srgrimes * This is used to force keep alive messages out using the TCP 31778642Ssilby * template for a connection. If flags are given then we send 31878642Ssilby * a message back to the TCP which originated the * segment ti, 31978642Ssilby * and discard the mbuf containing it and any other attached mbufs. 3201541Srgrimes * 3211541Srgrimes * In any case the ack and sequence number of the transmitted 3221541Srgrimes * segment are as specified by the parameters. 32331848Sjulian * 32431848Sjulian * NOTE: If m != NULL, then ti must point to *inside* the mbuf. 3251541Srgrimes */ 3261541Srgrimesvoid 32755679Sshintcp_respond(tp, ipgen, th, m, ack, seq, flags) 3281541Srgrimes struct tcpcb *tp; 32955679Sshin void *ipgen; 33055679Sshin register struct tcphdr *th; 3311541Srgrimes register struct mbuf *m; 3321541Srgrimes tcp_seq ack, seq; 3331541Srgrimes int flags; 3341541Srgrimes{ 3351541Srgrimes register int tlen; 3361541Srgrimes int win = 0; 3371541Srgrimes struct route *ro = 0; 33814754Swollman struct route sro; 33955679Sshin struct ip *ip; 34055679Sshin struct tcphdr *nth; 34155679Sshin#ifdef INET6 34255679Sshin struct route_in6 *ro6 = 0; 34355679Sshin struct route_in6 sro6; 34455679Sshin struct ip6_hdr *ip6; 34555679Sshin int isipv6; 34655679Sshin#endif /* INET6 */ 34755679Sshin int ipflags = 0; 3481541Srgrimes 34955679Sshin#ifdef INET6 35055679Sshin isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6; 35155679Sshin ip6 = ipgen; 35255679Sshin#endif /* INET6 */ 35355679Sshin ip = ipgen; 35455679Sshin 3551541Srgrimes if (tp) { 35657576Sps if (!(flags & TH_RST)) { 35741187Sguido win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); 35857576Sps if (win > (long)TCP_MAXWIN << tp->rcv_scale) 35957576Sps win = (long)TCP_MAXWIN << tp->rcv_scale; 36057576Sps } 36155679Sshin#ifdef INET6 36255679Sshin if (isipv6) 36355679Sshin ro6 = &tp->t_inpcb->in6p_route; 36455679Sshin else 36555679Sshin#endif /* INET6 */ 3661541Srgrimes ro = &tp->t_inpcb->inp_route; 36714754Swollman } else { 36855679Sshin#ifdef INET6 36955679Sshin if (isipv6) { 37055679Sshin ro6 = &sro6; 37155679Sshin bzero(ro6, sizeof *ro6); 37255679Sshin } else 37355679Sshin#endif /* INET6 */ 37455679Sshin { 37514754Swollman ro = &sro; 37614754Swollman bzero(ro, sizeof *ro); 37755679Sshin } 3781541Srgrimes } 3791541Srgrimes if (m == 0) { 3801541Srgrimes m = m_gethdr(M_DONTWAIT, MT_HEADER); 3811541Srgrimes if (m == NULL) 3821541Srgrimes return; 3831541Srgrimes tlen = 0; 3841541Srgrimes m->m_data += max_linkhdr; 38555679Sshin#ifdef INET6 38655679Sshin if (isipv6) { 38755679Sshin bcopy((caddr_t)ip6, mtod(m, caddr_t), 38855679Sshin sizeof(struct ip6_hdr)); 38955679Sshin ip6 = mtod(m, struct ip6_hdr *); 39055679Sshin nth = (struct tcphdr *)(ip6 + 1); 39155679Sshin } else 39255679Sshin#endif /* INET6 */ 39355679Sshin { 39455679Sshin bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 39555679Sshin ip = mtod(m, struct ip *); 39655679Sshin nth = (struct tcphdr *)(ip + 1); 39755679Sshin } 39855679Sshin bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); 3991541Srgrimes flags = TH_ACK; 4001541Srgrimes } else { 4011541Srgrimes m_freem(m->m_next); 4021541Srgrimes m->m_next = 0; 40355679Sshin m->m_data = (caddr_t)ipgen; 40455679Sshin /* m_len is set later */ 4051541Srgrimes tlen = 0; 4061541Srgrimes#define xchg(a,b,type) { type t; t=a; a=b; b=t; } 40755679Sshin#ifdef INET6 40855679Sshin if (isipv6) { 40955679Sshin xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); 41055679Sshin nth = (struct tcphdr *)(ip6 + 1); 41155679Sshin } else 41255679Sshin#endif /* INET6 */ 41355679Sshin { 41455679Sshin xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long); 41555679Sshin nth = (struct tcphdr *)(ip + 1); 41655679Sshin } 41755679Sshin if (th != nth) { 41855679Sshin /* 41955679Sshin * this is usually a case when an extension header 42055679Sshin * exists between the IPv6 header and the 42155679Sshin * TCP header. 42255679Sshin */ 42355679Sshin nth->th_sport = th->th_sport; 42455679Sshin nth->th_dport = th->th_dport; 42555679Sshin } 42655679Sshin xchg(nth->th_dport, nth->th_sport, n_short); 4271541Srgrimes#undef xchg 4281541Srgrimes } 42955679Sshin#ifdef INET6 43055679Sshin if (isipv6) { 43190198Sume ip6->ip6_flow = 0; 43290198Sume ip6->ip6_vfc = IPV6_VERSION; 43390198Sume ip6->ip6_nxt = IPPROTO_TCP; 43455679Sshin ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + 43555679Sshin tlen)); 43655679Sshin tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); 43756039Sshin } else 43855679Sshin#endif 43955679Sshin { 4401541Srgrimes tlen += sizeof (struct tcpiphdr); 44158698Sjlemon ip->ip_len = tlen; 44258698Sjlemon ip->ip_ttl = ip_defttl; 44355679Sshin } 4441541Srgrimes m->m_len = tlen; 4451541Srgrimes m->m_pkthdr.len = tlen; 4461541Srgrimes m->m_pkthdr.rcvif = (struct ifnet *) 0; 44755679Sshin nth->th_seq = htonl(seq); 44855679Sshin nth->th_ack = htonl(ack); 44955679Sshin nth->th_x2 = 0; 45055679Sshin nth->th_off = sizeof (struct tcphdr) >> 2; 45155679Sshin nth->th_flags = flags; 4521541Srgrimes if (tp) 45355679Sshin nth->th_win = htons((u_short) (win >> tp->rcv_scale)); 4541541Srgrimes else 45555679Sshin nth->th_win = htons((u_short)win); 45655679Sshin nth->th_urp = 0; 45755679Sshin#ifdef INET6 45855679Sshin if (isipv6) { 45959392Sshin nth->th_sum = 0; 46055679Sshin nth->th_sum = in6_cksum(m, IPPROTO_TCP, 46155679Sshin sizeof(struct ip6_hdr), 46255679Sshin tlen - sizeof(struct ip6_hdr)); 46355679Sshin ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, 46455679Sshin ro6 && ro6->ro_rt ? 46555679Sshin ro6->ro_rt->rt_ifp : 46655679Sshin NULL); 46755679Sshin } else 46855679Sshin#endif /* INET6 */ 46955679Sshin { 47058698Sjlemon nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 47158698Sjlemon htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); 47258698Sjlemon m->m_pkthdr.csum_flags = CSUM_TCP; 47358698Sjlemon m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 47455679Sshin } 4756283Swollman#ifdef TCPDEBUG 4766283Swollman if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 47755679Sshin tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); 4786283Swollman#endif 47955679Sshin#ifdef IPSEC 48078064Sume if (ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL) != 0) { 48178064Sume m_freem(m); 48278064Sume return; 48378064Sume } 48455679Sshin#endif 48555679Sshin#ifdef INET6 48655679Sshin if (isipv6) { 48755679Sshin (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL); 48855913Sshin if (ro6 == &sro6 && ro6->ro_rt) { 48955679Sshin RTFREE(ro6->ro_rt); 49055913Sshin ro6->ro_rt = NULL; 49155913Sshin } 49255679Sshin } else 49355679Sshin#endif /* INET6 */ 49455679Sshin { 49555679Sshin (void) ip_output(m, NULL, ro, ipflags, NULL); 49614841Swollman if (ro == &sro && ro->ro_rt) { 49714754Swollman RTFREE(ro->ro_rt); 49855913Sshin ro->ro_rt = NULL; 49914754Swollman } 50055679Sshin } 5011541Srgrimes} 5021541Srgrimes 5031541Srgrimes/* 5041541Srgrimes * Create a new TCP control block, making an 5051541Srgrimes * empty reassembly queue and hooking it to the argument 50634881Swollman * protocol control block. The `inp' parameter must have 50734881Swollman * come from the zone allocator set up in tcp_init(). 5081541Srgrimes */ 5091541Srgrimesstruct tcpcb * 5101541Srgrimestcp_newtcpcb(inp) 5111541Srgrimes struct inpcb *inp; 5121541Srgrimes{ 51334923Sbde struct inp_tp *it; 5141541Srgrimes register struct tcpcb *tp; 51555679Sshin#ifdef INET6 51655679Sshin int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; 51755679Sshin#endif /* INET6 */ 5181541Srgrimes 51934881Swollman it = (struct inp_tp *)inp; 52034881Swollman tp = &it->tcb; 5211541Srgrimes bzero((char *) tp, sizeof(struct tcpcb)); 52255679Sshin LIST_INIT(&tp->t_segq); 52355679Sshin tp->t_maxseg = tp->t_maxopd = 52455679Sshin#ifdef INET6 52555679Sshin isipv6 ? tcp_v6mssdflt : 52655679Sshin#endif /* INET6 */ 52755679Sshin tcp_mssdflt; 5281541Srgrimes 52950673Sjlemon /* Set up our timeouts. */ 53069147Sjlemon callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0); 53169147Sjlemon callout_init(tp->tt_persist = &it->inp_tp_persist, 0); 53269147Sjlemon callout_init(tp->tt_keep = &it->inp_tp_keep, 0); 53369147Sjlemon callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0); 53469147Sjlemon callout_init(tp->tt_delack = &it->inp_tp_delack, 0); 53550673Sjlemon 5366283Swollman if (tcp_do_rfc1323) 5376283Swollman tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); 5386283Swollman if (tcp_do_rfc1644) 5396283Swollman tp->t_flags |= TF_REQ_CC; 54034881Swollman tp->t_inpcb = inp; /* XXX */ 5411541Srgrimes /* 5421541Srgrimes * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no 54316367Swollman * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives 5441541Srgrimes * reasonable initial retransmit time. 5451541Srgrimes */ 5461541Srgrimes tp->t_srtt = TCPTV_SRTTBASE; 54716367Swollman tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; 5481541Srgrimes tp->t_rttmin = TCPTV_MIN; 54916367Swollman tp->t_rxtcur = TCPTV_RTOBASE; 5501541Srgrimes tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 5511541Srgrimes tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; 55250673Sjlemon tp->t_rcvtime = ticks; 55356564Sshin /* 55456564Sshin * IPv4 TTL initialization is necessary for an IPv6 socket as well, 55556564Sshin * because the socket may be bound to an IPv6 wildcard address, 55656564Sshin * which may match an IPv4-mapped IPv6 address. 55756564Sshin */ 55824570Sdg inp->inp_ip_ttl = ip_defttl; 5591541Srgrimes inp->inp_ppcb = (caddr_t)tp; 56034881Swollman return (tp); /* XXX */ 5611541Srgrimes} 5621541Srgrimes 5631541Srgrimes/* 5641541Srgrimes * Drop a TCP connection, reporting 5651541Srgrimes * the specified error. If connection is synchronized, 5661541Srgrimes * then send a RST to peer. 5671541Srgrimes */ 5681541Srgrimesstruct tcpcb * 5691541Srgrimestcp_drop(tp, errno) 5701541Srgrimes register struct tcpcb *tp; 5711541Srgrimes int errno; 5721541Srgrimes{ 5731541Srgrimes struct socket *so = tp->t_inpcb->inp_socket; 5741541Srgrimes 5751541Srgrimes if (TCPS_HAVERCVDSYN(tp->t_state)) { 5761541Srgrimes tp->t_state = TCPS_CLOSED; 5771541Srgrimes (void) tcp_output(tp); 5781541Srgrimes tcpstat.tcps_drops++; 5791541Srgrimes } else 5801541Srgrimes tcpstat.tcps_conndrops++; 5811541Srgrimes if (errno == ETIMEDOUT && tp->t_softerror) 5821541Srgrimes errno = tp->t_softerror; 5831541Srgrimes so->so_error = errno; 5841541Srgrimes return (tcp_close(tp)); 5851541Srgrimes} 5861541Srgrimes 5871541Srgrimes/* 5881541Srgrimes * Close a TCP control block: 5891541Srgrimes * discard all space held by the tcp 5901541Srgrimes * discard internet protocol block 5911541Srgrimes * wake up any sleepers 5921541Srgrimes */ 5931541Srgrimesstruct tcpcb * 5941541Srgrimestcp_close(tp) 5951541Srgrimes register struct tcpcb *tp; 5961541Srgrimes{ 59755679Sshin register struct tseg_qent *q; 5981541Srgrimes struct inpcb *inp = tp->t_inpcb; 5991541Srgrimes struct socket *so = inp->inp_socket; 60055679Sshin#ifdef INET6 60155679Sshin int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; 60255679Sshin#endif /* INET6 */ 6031541Srgrimes register struct rtentry *rt; 60422719Swollman int dosavessthresh; 6051541Srgrimes 6061541Srgrimes /* 60750673Sjlemon * Make sure that all of our timers are stopped before we 60850673Sjlemon * delete the PCB. 60950673Sjlemon */ 61050673Sjlemon callout_stop(tp->tt_rexmt); 61150673Sjlemon callout_stop(tp->tt_persist); 61250673Sjlemon callout_stop(tp->tt_keep); 61350673Sjlemon callout_stop(tp->tt_2msl); 61450673Sjlemon callout_stop(tp->tt_delack); 61550673Sjlemon 61650673Sjlemon /* 6179373Swollman * If we got enough samples through the srtt filter, 6189373Swollman * save the rtt and rttvar in the routing entry. 6199373Swollman * 'Enough' is arbitrarily defined as the 16 samples. 6209373Swollman * 16 samples is enough for the srtt filter to converge 6219373Swollman * to within 5% of the correct value; fewer samples and 6229373Swollman * we could save a very bogus rtt. 6231541Srgrimes * 6241541Srgrimes * Don't update the default route's characteristics and don't 6251541Srgrimes * update anything that the user "locked". 6261541Srgrimes */ 62755679Sshin if (tp->t_rttupdated >= 16) { 6281549Srgrimes register u_long i = 0; 62955679Sshin#ifdef INET6 63055679Sshin if (isipv6) { 63155679Sshin struct sockaddr_in6 *sin6; 6321541Srgrimes 63355679Sshin if ((rt = inp->in6p_route.ro_rt) == NULL) 63455679Sshin goto no_valid_rt; 63555679Sshin sin6 = (struct sockaddr_in6 *)rt_key(rt); 63655679Sshin if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 63755679Sshin goto no_valid_rt; 63855679Sshin } 63955679Sshin else 64055679Sshin#endif /* INET6 */ 64155679Sshin if ((rt = inp->inp_route.ro_rt) == NULL || 64255679Sshin ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr 64355679Sshin == INADDR_ANY) 64455679Sshin goto no_valid_rt; 64555679Sshin 6461541Srgrimes if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { 6471541Srgrimes i = tp->t_srtt * 64850673Sjlemon (RTM_RTTUNIT / (hz * TCP_RTT_SCALE)); 6491541Srgrimes if (rt->rt_rmx.rmx_rtt && i) 6501541Srgrimes /* 6511541Srgrimes * filter this update to half the old & half 6521541Srgrimes * the new values, converting scale. 6531541Srgrimes * See route.h and tcp_var.h for a 6541541Srgrimes * description of the scaling constants. 6551541Srgrimes */ 6561541Srgrimes rt->rt_rmx.rmx_rtt = 6571541Srgrimes (rt->rt_rmx.rmx_rtt + i) / 2; 6581541Srgrimes else 6591541Srgrimes rt->rt_rmx.rmx_rtt = i; 6609263Swollman tcpstat.tcps_cachedrtt++; 6611541Srgrimes } 6621541Srgrimes if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { 6631541Srgrimes i = tp->t_rttvar * 66450673Sjlemon (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE)); 6651541Srgrimes if (rt->rt_rmx.rmx_rttvar && i) 6661541Srgrimes rt->rt_rmx.rmx_rttvar = 6671541Srgrimes (rt->rt_rmx.rmx_rttvar + i) / 2; 6681541Srgrimes else 6691541Srgrimes rt->rt_rmx.rmx_rttvar = i; 6709263Swollman tcpstat.tcps_cachedrttvar++; 6711541Srgrimes } 6721541Srgrimes /* 67322719Swollman * The old comment here said: 6741541Srgrimes * update the pipelimit (ssthresh) if it has been updated 6751541Srgrimes * already or if a pipesize was specified & the threshhold 6761541Srgrimes * got below half the pipesize. I.e., wait for bad news 6771541Srgrimes * before we start updating, then update on both good 6781541Srgrimes * and bad news. 67922719Swollman * 68022719Swollman * But we want to save the ssthresh even if no pipesize is 68122719Swollman * specified explicitly in the route, because such 68222719Swollman * connections still have an implicit pipesize specified 68322719Swollman * by the global tcp_sendspace. In the absence of a reliable 68422719Swollman * way to calculate the pipesize, it will have to do. 6851541Srgrimes */ 68622719Swollman i = tp->snd_ssthresh; 68722719Swollman if (rt->rt_rmx.rmx_sendpipe != 0) 68822719Swollman dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); 68922719Swollman else 69022719Swollman dosavessthresh = (i < so->so_snd.sb_hiwat / 2); 6913444Sphk if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && 69222719Swollman i != 0 && rt->rt_rmx.rmx_ssthresh != 0) 69322719Swollman || dosavessthresh) { 6941541Srgrimes /* 6951541Srgrimes * convert the limit from user data bytes to 6961541Srgrimes * packets then to packet data bytes. 6971541Srgrimes */ 6981541Srgrimes i = (i + tp->t_maxseg / 2) / tp->t_maxseg; 6991541Srgrimes if (i < 2) 7001541Srgrimes i = 2; 70155679Sshin i *= (u_long)(tp->t_maxseg + 70255679Sshin#ifdef INET6 70355679Sshin (isipv6 ? sizeof (struct ip6_hdr) + 70455679Sshin sizeof (struct tcphdr) : 70555679Sshin#endif 70655679Sshin sizeof (struct tcpiphdr) 70755679Sshin#ifdef INET6 70855679Sshin ) 70955679Sshin#endif 71055679Sshin ); 7111541Srgrimes if (rt->rt_rmx.rmx_ssthresh) 7121541Srgrimes rt->rt_rmx.rmx_ssthresh = 7131541Srgrimes (rt->rt_rmx.rmx_ssthresh + i) / 2; 7141541Srgrimes else 7151541Srgrimes rt->rt_rmx.rmx_ssthresh = i; 7169263Swollman tcpstat.tcps_cachedssthresh++; 7171541Srgrimes } 7181541Srgrimes } 71955679Sshin no_valid_rt: 7201541Srgrimes /* free the reassembly queue, if any */ 72155679Sshin while((q = LIST_FIRST(&tp->t_segq)) != NULL) { 72255679Sshin LIST_REMOVE(q, tqe_q); 72355679Sshin m_freem(q->tqe_m); 72455679Sshin FREE(q, M_TSEGQ); 7251541Srgrimes } 72632821Sdg inp->inp_ppcb = NULL; 7271541Srgrimes soisdisconnected(so); 72855679Sshin#ifdef INET6 72955679Sshin if (INP_CHECK_SOCKAF(so, AF_INET6)) 73055679Sshin in6_pcbdetach(inp); 73155679Sshin else 73255679Sshin#endif /* INET6 */ 7331541Srgrimes in_pcbdetach(inp); 7341541Srgrimes tcpstat.tcps_closed++; 7351541Srgrimes return ((struct tcpcb *)0); 7361541Srgrimes} 7371541Srgrimes 7381541Srgrimesvoid 7391541Srgrimestcp_drain() 7401541Srgrimes{ 74155198Smsmith if (do_tcpdrain) 74255198Smsmith { 74355198Smsmith struct inpcb *inpb; 74455198Smsmith struct tcpcb *tcpb; 74555679Sshin struct tseg_qent *te; 7461541Srgrimes 74755198Smsmith /* 74855198Smsmith * Walk the tcpbs, if existing, and flush the reassembly queue, 74955198Smsmith * if there is one... 75055198Smsmith * XXX: The "Net/3" implementation doesn't imply that the TCP 75155198Smsmith * reassembly queue should be flushed, but in a situation 75255198Smsmith * where we're really low on mbufs, this is potentially 75355198Smsmith * usefull. 75455198Smsmith */ 75574362Sphk LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) { 75674362Sphk if ((tcpb = intotcpcb(inpb))) { 75774362Sphk while ((te = LIST_FIRST(&tcpb->t_segq)) 75874362Sphk != NULL) { 75955679Sshin LIST_REMOVE(te, tqe_q); 76055679Sshin m_freem(te->tqe_m); 76155679Sshin FREE(te, M_TSEGQ); 76255198Smsmith } 76355198Smsmith } 76455198Smsmith } 76555198Smsmith } 7661541Srgrimes} 7671541Srgrimes 7681541Srgrimes/* 7691541Srgrimes * Notify a tcp user of an asynchronous error; 7701541Srgrimes * store error as soft error, but wake up user 7711541Srgrimes * (for now, won't do anything until can select for soft error). 77272960Sjlemon * 77372960Sjlemon * Do not wake up user since there currently is no mechanism for 77472960Sjlemon * reporting soft errors (yet - a kqueue filter may be added). 7751541Srgrimes */ 77612296Sphkstatic void 7771541Srgrimestcp_notify(inp, error) 7781541Srgrimes struct inpcb *inp; 7791541Srgrimes int error; 7801541Srgrimes{ 78172960Sjlemon struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; 7821541Srgrimes 7831541Srgrimes /* 7841541Srgrimes * Ignore some errors if we are hooked up. 7851541Srgrimes * If connection hasn't completed, has retransmitted several times, 7861541Srgrimes * and receives a second error, give up now. This is better 7871541Srgrimes * than waiting a long time to establish a connection that 7881541Srgrimes * can never complete. 7891541Srgrimes */ 7901541Srgrimes if (tp->t_state == TCPS_ESTABLISHED && 7911541Srgrimes (error == EHOSTUNREACH || error == ENETUNREACH || 7921541Srgrimes error == EHOSTDOWN)) { 7931541Srgrimes return; 7941541Srgrimes } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && 7951541Srgrimes tp->t_softerror) 79672960Sjlemon tcp_drop(tp, error); 7978876Srgrimes else 7981541Srgrimes tp->t_softerror = error; 79972960Sjlemon#if 0 8001541Srgrimes wakeup((caddr_t) &so->so_timeo); 8011541Srgrimes sorwakeup(so); 8021541Srgrimes sowwakeup(so); 80372960Sjlemon#endif 8041541Srgrimes} 8051541Srgrimes 80636079Swollmanstatic int 80762573Sphktcp_pcblist(SYSCTL_HANDLER_ARGS) 80836079Swollman{ 80936079Swollman int error, i, n, s; 81036079Swollman struct inpcb *inp, **inp_list; 81136079Swollman inp_gen_t gencnt; 81236079Swollman struct xinpgen xig; 81336079Swollman 81436079Swollman /* 81536079Swollman * The process of preparing the TCB list is too time-consuming and 81636079Swollman * resource-intensive to repeat twice on every request. 81736079Swollman */ 81836079Swollman if (req->oldptr == 0) { 81936079Swollman n = tcbinfo.ipi_count; 82036079Swollman req->oldidx = 2 * (sizeof xig) 82136079Swollman + (n + n/8) * sizeof(struct xtcpcb); 82236079Swollman return 0; 82336079Swollman } 82436079Swollman 82536079Swollman if (req->newptr != 0) 82636079Swollman return EPERM; 82736079Swollman 82836079Swollman /* 82936079Swollman * OK, now we're committed to doing something. 83036079Swollman */ 83136079Swollman s = splnet(); 83236079Swollman gencnt = tcbinfo.ipi_gencnt; 83336079Swollman n = tcbinfo.ipi_count; 83436079Swollman splx(s); 83536079Swollman 83636079Swollman xig.xig_len = sizeof xig; 83736079Swollman xig.xig_count = n; 83836079Swollman xig.xig_gen = gencnt; 83936079Swollman xig.xig_sogen = so_gencnt; 84036079Swollman error = SYSCTL_OUT(req, &xig, sizeof xig); 84136079Swollman if (error) 84236079Swollman return error; 84336079Swollman 84436079Swollman inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 84536079Swollman if (inp_list == 0) 84636079Swollman return ENOMEM; 84736079Swollman 84836079Swollman s = splnet(); 84971999Sphk for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; 85071999Sphk inp = LIST_NEXT(inp, inp_list)) { 85184736Srwatson if (inp->inp_gencnt <= gencnt) { 85291406Sjhb if (cr_cansee(req->td->td_ucred, 85384736Srwatson inp->inp_socket->so_cred)) 85484527Sps continue; 85536079Swollman inp_list[i++] = inp; 85684527Sps } 85736079Swollman } 85836079Swollman splx(s); 85936079Swollman n = i; 86036079Swollman 86136079Swollman error = 0; 86236079Swollman for (i = 0; i < n; i++) { 86336079Swollman inp = inp_list[i]; 86436079Swollman if (inp->inp_gencnt <= gencnt) { 86536079Swollman struct xtcpcb xt; 86647960Stegge caddr_t inp_ppcb; 86736079Swollman xt.xt_len = sizeof xt; 86836079Swollman /* XXX should avoid extra copy */ 86936079Swollman bcopy(inp, &xt.xt_inp, sizeof *inp); 87047960Stegge inp_ppcb = inp->inp_ppcb; 87147960Stegge if (inp_ppcb != NULL) 87247960Stegge bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); 87347960Stegge else 87447960Stegge bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); 87536079Swollman if (inp->inp_socket) 87636079Swollman sotoxsocket(inp->inp_socket, &xt.xt_socket); 87736079Swollman error = SYSCTL_OUT(req, &xt, sizeof xt); 87836079Swollman } 87936079Swollman } 88036079Swollman if (!error) { 88136079Swollman /* 88236079Swollman * Give the user an updated idea of our state. 88336079Swollman * If the generation differs from what we told 88436079Swollman * her before, she knows that something happened 88536079Swollman * while we were processing this request, and it 88636079Swollman * might be necessary to retry. 88736079Swollman */ 88836079Swollman s = splnet(); 88936079Swollman xig.xig_gen = tcbinfo.ipi_gencnt; 89036079Swollman xig.xig_sogen = so_gencnt; 89136079Swollman xig.xig_count = tcbinfo.ipi_count; 89236079Swollman splx(s); 89336079Swollman error = SYSCTL_OUT(req, &xig, sizeof xig); 89436079Swollman } 89536079Swollman free(inp_list, M_TEMP); 89636079Swollman return error; 89736079Swollman} 89836079Swollman 89936079SwollmanSYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, 90036079Swollman tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); 90136079Swollman 90248758Sgreenstatic int 90362573Sphktcp_getcred(SYSCTL_HANDLER_ARGS) 90448758Sgreen{ 90572650Sgreen struct xucred xuc; 90648758Sgreen struct sockaddr_in addrs[2]; 90748758Sgreen struct inpcb *inp; 90848758Sgreen int error, s; 90948758Sgreen 91086183Srwatson error = suser_xxx(0, req->td->td_proc, PRISON_ROOT); 91148758Sgreen if (error) 91248758Sgreen return (error); 91348758Sgreen error = SYSCTL_IN(req, addrs, sizeof(addrs)); 91448758Sgreen if (error) 91548758Sgreen return (error); 91648758Sgreen s = splnet(); 91748758Sgreen inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, 91854263Sshin addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); 91951381Sgreen if (inp == NULL || inp->inp_socket == NULL) { 92048758Sgreen error = ENOENT; 92148758Sgreen goto out; 92248758Sgreen } 92391406Sjhb error = cr_cansee(req->td->td_ucred, inp->inp_socket->so_cred); 92478697Sdwmalone if (error) 92578697Sdwmalone goto out; 92691354Sdd cru2x(inp->inp_socket->so_cred, &xuc); 92772650Sgreen error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); 92848758Sgreenout: 92948758Sgreen splx(s); 93048758Sgreen return (error); 93148758Sgreen} 93248758Sgreen 93378697SdwmaloneSYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, 93478697Sdwmalone CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, 93578697Sdwmalone tcp_getcred, "S,xucred", "Get the xucred of a TCP connection"); 93648758Sgreen 93755679Sshin#ifdef INET6 93855679Sshinstatic int 93962573Sphktcp6_getcred(SYSCTL_HANDLER_ARGS) 94055679Sshin{ 94172650Sgreen struct xucred xuc; 94255679Sshin struct sockaddr_in6 addrs[2]; 94355679Sshin struct inpcb *inp; 94455679Sshin int error, s, mapped = 0; 94555679Sshin 94686183Srwatson error = suser_xxx(0, req->td->td_proc, PRISON_ROOT); 94755679Sshin if (error) 94855679Sshin return (error); 94955679Sshin error = SYSCTL_IN(req, addrs, sizeof(addrs)); 95055679Sshin if (error) 95155679Sshin return (error); 95255679Sshin if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { 95355679Sshin if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) 95455679Sshin mapped = 1; 95555679Sshin else 95655679Sshin return (EINVAL); 95755679Sshin } 95855679Sshin s = splnet(); 95955679Sshin if (mapped == 1) 96055679Sshin inp = in_pcblookup_hash(&tcbinfo, 96155679Sshin *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], 96255679Sshin addrs[1].sin6_port, 96355679Sshin *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], 96455679Sshin addrs[0].sin6_port, 96555679Sshin 0, NULL); 96655679Sshin else 96755679Sshin inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr, 96855679Sshin addrs[1].sin6_port, 96955679Sshin &addrs[0].sin6_addr, addrs[0].sin6_port, 97055679Sshin 0, NULL); 97155679Sshin if (inp == NULL || inp->inp_socket == NULL) { 97255679Sshin error = ENOENT; 97355679Sshin goto out; 97455679Sshin } 97591406Sjhb error = cr_cansee(req->td->td_ucred, inp->inp_socket->so_cred); 97678697Sdwmalone if (error) 97778697Sdwmalone goto out; 97891354Sdd cru2x(inp->inp_socket->so_cred, &xuc); 97972650Sgreen error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); 98055679Sshinout: 98155679Sshin splx(s); 98255679Sshin return (error); 98355679Sshin} 98455679Sshin 98578697SdwmaloneSYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, 98678697Sdwmalone CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, 98778697Sdwmalone tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection"); 98855679Sshin#endif 98955679Sshin 99055679Sshin 9911541Srgrimesvoid 99212881Sbdetcp_ctlinput(cmd, sa, vip) 9931541Srgrimes int cmd; 9941541Srgrimes struct sockaddr *sa; 99512881Sbde void *vip; 9961541Srgrimes{ 99772959Sjlemon struct ip *ip = vip; 99872959Sjlemon struct tcphdr *th; 99973109Sjlemon struct in_addr faddr; 100073109Sjlemon struct inpcb *inp; 100173109Sjlemon struct tcpcb *tp; 100292723Salfred void (*notify)(struct inpcb *, int) = tcp_notify; 100373109Sjlemon tcp_seq icmp_seq; 100473109Sjlemon int s; 10051541Srgrimes 100673109Sjlemon faddr = ((struct sockaddr_in *)sa)->sin_addr; 100773109Sjlemon if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 100873109Sjlemon return; 100973109Sjlemon 10101541Srgrimes if (cmd == PRC_QUENCH) 10111541Srgrimes notify = tcp_quench; 101274937Sjesper else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || 101374937Sjesper cmd == PRC_UNREACH_PORT) && ip) 101472959Sjlemon notify = tcp_drop_syn_sent; 101573109Sjlemon else if (cmd == PRC_MSGSIZE) 101610881Swollman notify = tcp_mtudisc; 101772922Sjesper else if (PRC_IS_REDIRECT(cmd)) { 101872922Sjesper ip = 0; 101972922Sjesper notify = in_rtchange; 102072922Sjesper } else if (cmd == PRC_HOSTDEAD) 102172922Sjesper ip = 0; 102272922Sjesper else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) 10231541Srgrimes return; 10241541Srgrimes if (ip) { 102573109Sjlemon s = splnet(); 102617269Swollman th = (struct tcphdr *)((caddr_t)ip 102717269Swollman + (IP_VHL_HL(ip->ip_vhl) << 2)); 102873109Sjlemon inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport, 102973109Sjlemon ip->ip_src, th->th_sport, 0, NULL); 103073109Sjlemon if (inp != NULL && inp->inp_socket != NULL) { 103173109Sjlemon icmp_seq = htonl(th->th_seq); 103273109Sjlemon tp = intotcpcb(inp); 103373109Sjlemon if (SEQ_GEQ(icmp_seq, tp->snd_una) && 103473109Sjlemon SEQ_LT(icmp_seq, tp->snd_max)) 103573109Sjlemon (*notify)(inp, inetctlerrmap[cmd]); 103686764Sjlemon } else { 103786764Sjlemon struct in_conninfo inc; 103886764Sjlemon 103986764Sjlemon inc.inc_fport = th->th_dport; 104086764Sjlemon inc.inc_lport = th->th_sport; 104186764Sjlemon inc.inc_faddr = faddr; 104286764Sjlemon inc.inc_laddr = ip->ip_src; 104386764Sjlemon#ifdef INET6 104486764Sjlemon inc.inc_isipv6 = 0; 104586764Sjlemon#endif 104686764Sjlemon syncache_unreach(&inc, th); 104773109Sjlemon } 104873109Sjlemon splx(s); 10491541Srgrimes } else 105073109Sjlemon in_pcbnotifyall(&tcb, faddr, inetctlerrmap[cmd], notify); 10511541Srgrimes} 10521541Srgrimes 105355679Sshin#ifdef INET6 105455679Sshinvoid 105555679Sshintcp6_ctlinput(cmd, sa, d) 105655679Sshin int cmd; 105755679Sshin struct sockaddr *sa; 105855679Sshin void *d; 105955679Sshin{ 106055679Sshin struct tcphdr th; 106192723Salfred void (*notify)(struct inpcb *, int) = tcp_notify; 106255679Sshin struct ip6_hdr *ip6; 106355679Sshin struct mbuf *m; 106478064Sume struct ip6ctlparam *ip6cp = NULL; 106578064Sume const struct sockaddr_in6 *sa6_src = NULL; 106655679Sshin int off; 106778064Sume struct tcp_portonly { 106878064Sume u_int16_t th_sport; 106978064Sume u_int16_t th_dport; 107078064Sume } *thp; 107155679Sshin 107255679Sshin if (sa->sa_family != AF_INET6 || 107355679Sshin sa->sa_len != sizeof(struct sockaddr_in6)) 107455679Sshin return; 107555679Sshin 107655679Sshin if (cmd == PRC_QUENCH) 107755679Sshin notify = tcp_quench; 107855679Sshin else if (cmd == PRC_MSGSIZE) 107955679Sshin notify = tcp_mtudisc; 108055679Sshin else if (!PRC_IS_REDIRECT(cmd) && 108155679Sshin ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) 108255679Sshin return; 108355679Sshin 108455679Sshin /* if the parameter is from icmp6, decode it. */ 108555679Sshin if (d != NULL) { 108678064Sume ip6cp = (struct ip6ctlparam *)d; 108755679Sshin m = ip6cp->ip6c_m; 108855679Sshin ip6 = ip6cp->ip6c_ip6; 108955679Sshin off = ip6cp->ip6c_off; 109078064Sume sa6_src = ip6cp->ip6c_src; 109155679Sshin } else { 109255679Sshin m = NULL; 109355679Sshin ip6 = NULL; 109467456Sitojun off = 0; /* fool gcc */ 109578064Sume sa6_src = &sa6_any; 109655679Sshin } 109755679Sshin 109855679Sshin if (ip6) { 109986764Sjlemon struct in_conninfo inc; 110055679Sshin /* 110155679Sshin * XXX: We assume that when IPV6 is non NULL, 110255679Sshin * M and OFF are valid. 110355679Sshin */ 110455679Sshin 110567456Sitojun /* check if we can safely examine src and dst ports */ 110678064Sume if (m->m_pkthdr.len < off + sizeof(*thp)) 110767456Sitojun return; 110867456Sitojun 110978064Sume bzero(&th, sizeof(th)); 111078064Sume m_copydata(m, off, sizeof(*thp), (caddr_t)&th); 111178064Sume 111278064Sume in6_pcbnotify(&tcb, sa, th.th_dport, 111378064Sume (struct sockaddr *)ip6cp->ip6c_src, 111478064Sume th.th_sport, cmd, notify); 111586764Sjlemon 111686764Sjlemon inc.inc_fport = th.th_dport; 111786764Sjlemon inc.inc_lport = th.th_sport; 111886764Sjlemon inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr; 111986764Sjlemon inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; 112086764Sjlemon inc.inc_isipv6 = 1; 112186764Sjlemon syncache_unreach(&inc, &th); 112255679Sshin } else 112391357Salfred in6_pcbnotify(&tcb, sa, 0, (const struct sockaddr *)sa6_src, 112455679Sshin 0, cmd, notify); 112555679Sshin} 112655679Sshin#endif /* INET6 */ 112755679Sshin 112880428Speter 112982122Ssilby/* 113082122Ssilby * Following is where TCP initial sequence number generation occurs. 113182122Ssilby * 113282122Ssilby * There are two places where we must use initial sequence numbers: 113382122Ssilby * 1. In SYN-ACK packets. 113482122Ssilby * 2. In SYN packets. 113582122Ssilby * 113682122Ssilby * The ISNs in SYN-ACK packets have no monotonicity requirement, 113782122Ssilby * and should be as unpredictable as possible to avoid the possibility 113882122Ssilby * of spoofing and/or connection hijacking. To satisfy this 113982122Ssilby * requirement, SYN-ACK ISNs are generated via the arc4random() 114082122Ssilby * function. If exact RFC 1948 compliance is requested via sysctl, 114182122Ssilby * these ISNs will be generated just like those in SYN packets. 114282122Ssilby * 114382122Ssilby * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling 114482122Ssilby * depends on this property. In addition, these ISNs should be 114582122Ssilby * unguessable so as to prevent connection hijacking. To satisfy 114682122Ssilby * the requirements of this situation, the algorithm outlined in 114782122Ssilby * RFC 1948 is used to generate sequence numbers. 114882122Ssilby * 114982122Ssilby * For more information on the theory of operation, please see 115082122Ssilby * RFC 1948. 115182122Ssilby * 115282122Ssilby * Implementation details: 115382122Ssilby * 115482122Ssilby * Time is based off the system timer, and is corrected so that it 115582122Ssilby * increases by one megabyte per second. This allows for proper 115682122Ssilby * recycling on high speed LANs while still leaving over an hour 115782122Ssilby * before rollover. 115882122Ssilby * 115982122Ssilby * Two sysctls control the generation of ISNs: 116082122Ssilby * 116182122Ssilby * net.inet.tcp.isn_reseed_interval controls the number of seconds 116282122Ssilby * between seeding of isn_secret. This is normally set to zero, 116382122Ssilby * as reseeding should not be necessary. 116482122Ssilby * 116582122Ssilby * net.inet.tcp.strict_rfc1948 controls whether RFC 1948 is followed 116682122Ssilby * strictly. When strict compliance is requested, reseeding is 116782122Ssilby * disabled and SYN-ACKs will be generated in the same manner as 116882122Ssilby * SYNs. Strict mode is disabled by default. 116982122Ssilby * 117082122Ssilby */ 117179413Ssilby 117282122Ssilby#define ISN_BYTES_PER_SECOND 1048576 117379413Ssilby 117482122Ssilbyu_char isn_secret[32]; 117582122Ssilbyint isn_last_reseed; 117682122SsilbyMD5_CTX isn_ctx; 117775619Skris 117875619Skristcp_seq 117982122Ssilbytcp_new_isn(tp) 118082122Ssilby struct tcpcb *tp; 118175619Skris{ 118282122Ssilby u_int32_t md5_buffer[4]; 118382122Ssilby tcp_seq new_isn; 118475619Skris 118582122Ssilby /* Use arc4random for SYN-ACKs when not in exact RFC1948 mode. */ 118682122Ssilby if (((tp->t_state == TCPS_LISTEN) || (tp->t_state == TCPS_TIME_WAIT)) 118782122Ssilby && tcp_strict_rfc1948 == 0) 118882122Ssilby return arc4random(); 118975619Skris 119082122Ssilby /* Seed if this is the first use, reseed if requested. */ 119182122Ssilby if ((isn_last_reseed == 0) || 119282122Ssilby ((tcp_strict_rfc1948 == 0) && (tcp_isn_reseed_interval > 0) && 119382122Ssilby (((u_int)isn_last_reseed + (u_int)tcp_isn_reseed_interval*hz) 119482122Ssilby < (u_int)ticks))) { 119582122Ssilby read_random(&isn_secret, sizeof(isn_secret)); 119682122Ssilby isn_last_reseed = ticks; 119782122Ssilby } 119882122Ssilby 119982122Ssilby /* Compute the md5 hash and return the ISN. */ 120082122Ssilby MD5Init(&isn_ctx); 120182122Ssilby MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); 120282122Ssilby MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); 120382122Ssilby#ifdef INET6 120482122Ssilby if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { 120582122Ssilby MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, 120682122Ssilby sizeof(struct in6_addr)); 120782122Ssilby MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr, 120882122Ssilby sizeof(struct in6_addr)); 120982122Ssilby } else 121082122Ssilby#endif 121182122Ssilby { 121282122Ssilby MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr, 121382122Ssilby sizeof(struct in_addr)); 121482122Ssilby MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr, 121582122Ssilby sizeof(struct in_addr)); 121682122Ssilby } 121782122Ssilby MD5Update(&isn_ctx, (u_char *) &isn_secret, sizeof(isn_secret)); 121882122Ssilby MD5Final((u_char *) &md5_buffer, &isn_ctx); 121982122Ssilby new_isn = (tcp_seq) md5_buffer[0]; 122082122Ssilby new_isn += ticks * (ISN_BYTES_PER_SECOND / hz); 122182122Ssilby return new_isn; 122275619Skris} 122375619Skris 12241541Srgrimes/* 12251541Srgrimes * When a source quench is received, close congestion window 12261541Srgrimes * to one segment. We will gradually open it again as we proceed. 12271541Srgrimes */ 12281541Srgrimesvoid 12291541Srgrimestcp_quench(inp, errno) 12301541Srgrimes struct inpcb *inp; 12311541Srgrimes int errno; 12321541Srgrimes{ 12331541Srgrimes struct tcpcb *tp = intotcpcb(inp); 12341541Srgrimes 12351541Srgrimes if (tp) 12361541Srgrimes tp->snd_cwnd = tp->t_maxseg; 12371541Srgrimes} 12386283Swollman 12396283Swollman/* 124072959Sjlemon * When a specific ICMP unreachable message is received and the 124172959Sjlemon * connection state is SYN-SENT, drop the connection. This behavior 124272959Sjlemon * is controlled by the icmp_may_rst sysctl. 124370103Sphk */ 124470103Sphkvoid 124570103Sphktcp_drop_syn_sent(inp, errno) 124670103Sphk struct inpcb *inp; 124770103Sphk int errno; 124870103Sphk{ 124970103Sphk struct tcpcb *tp = intotcpcb(inp); 125070103Sphk 125172959Sjlemon if (tp && tp->t_state == TCPS_SYN_SENT) 125272638Sphk tcp_drop(tp, errno); 125372638Sphk} 125472638Sphk 125572638Sphk/* 125610881Swollman * When `need fragmentation' ICMP is received, update our idea of the MSS 125710881Swollman * based on the new value in the route. Also nudge TCP to send something, 125810881Swollman * since we know the packet we just sent was dropped. 125910930Swollman * This duplicates some code in the tcp_mss() function in tcp_input.c. 126010881Swollman */ 126111537Swollmanvoid 126210881Swollmantcp_mtudisc(inp, errno) 126310881Swollman struct inpcb *inp; 126410881Swollman int errno; 126510881Swollman{ 126610881Swollman struct tcpcb *tp = intotcpcb(inp); 126710930Swollman struct rtentry *rt; 126810930Swollman struct rmxp_tao *taop; 126910930Swollman struct socket *so = inp->inp_socket; 127010930Swollman int offered; 127110930Swollman int mss; 127255679Sshin#ifdef INET6 127355679Sshin int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; 127455679Sshin#endif /* INET6 */ 127510881Swollman 127610930Swollman if (tp) { 127755679Sshin#ifdef INET6 127855679Sshin if (isipv6) 127986764Sjlemon rt = tcp_rtlookup6(&inp->inp_inc); 128055679Sshin else 128155679Sshin#endif /* INET6 */ 128286764Sjlemon rt = tcp_rtlookup(&inp->inp_inc); 128310930Swollman if (!rt || !rt->rt_rmx.rmx_mtu) { 128455679Sshin tp->t_maxopd = tp->t_maxseg = 128555679Sshin#ifdef INET6 128655679Sshin isipv6 ? tcp_v6mssdflt : 128755679Sshin#endif /* INET6 */ 128855679Sshin tcp_mssdflt; 128910930Swollman return; 129010930Swollman } 129110930Swollman taop = rmx_taop(rt->rt_rmx); 129210930Swollman offered = taop->tao_mssopt; 129355679Sshin mss = rt->rt_rmx.rmx_mtu - 129455679Sshin#ifdef INET6 129555679Sshin (isipv6 ? 129655679Sshin sizeof(struct ip6_hdr) + sizeof(struct tcphdr) : 129755679Sshin#endif /* INET6 */ 129855679Sshin sizeof(struct tcpiphdr) 129955679Sshin#ifdef INET6 130055679Sshin ) 130155679Sshin#endif /* INET6 */ 130255679Sshin ; 130355679Sshin 130412939Swollman if (offered) 130512939Swollman mss = min(mss, offered); 130612939Swollman /* 130712939Swollman * XXX - The above conditional probably violates the TCP 130812939Swollman * spec. The problem is that, since we don't know the 130912939Swollman * other end's MSS, we are supposed to use a conservative 131012939Swollman * default. But, if we do that, then MTU discovery will 131112939Swollman * never actually take place, because the conservative 131212939Swollman * default is much less than the MTUs typically seen 131312939Swollman * on the Internet today. For the moment, we'll sweep 131412939Swollman * this under the carpet. 131512939Swollman * 131612939Swollman * The conservative default might not actually be a problem 131712939Swollman * if the only case this occurs is when sending an initial 131812939Swollman * SYN with options and data to a host we've never talked 131912939Swollman * to before. Then, they will reply with an MSS value which 132012939Swollman * will get recorded and the new parameters should get 132112939Swollman * recomputed. For Further Study. 132212939Swollman */ 132311415Swollman if (tp->t_maxopd <= mss) 132411415Swollman return; 132510930Swollman tp->t_maxopd = mss; 132610930Swollman 132710930Swollman if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 132810930Swollman (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) 132910930Swollman mss -= TCPOLEN_TSTAMP_APPA; 133010930Swollman if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && 133110930Swollman (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC) 133210930Swollman mss -= TCPOLEN_CC_APPA; 133310930Swollman#if (MCLBYTES & (MCLBYTES - 1)) == 0 133410930Swollman if (mss > MCLBYTES) 133510930Swollman mss &= ~(MCLBYTES-1); 133610930Swollman#else 133710930Swollman if (mss > MCLBYTES) 133810930Swollman mss = mss / MCLBYTES * MCLBYTES; 133910881Swollman#endif 134010930Swollman if (so->so_snd.sb_hiwat < mss) 134110930Swollman mss = so->so_snd.sb_hiwat; 134210930Swollman 134310930Swollman tp->t_maxseg = mss; 134410930Swollman 134511450Swollman tcpstat.tcps_mturesent++; 134650673Sjlemon tp->t_rtttime = 0; 134711450Swollman tp->snd_nxt = tp->snd_una; 134811450Swollman tcp_output(tp); 134910930Swollman } 135010881Swollman} 135110881Swollman 135210881Swollman/* 13536283Swollman * Look-up the routing entry to the peer of this inpcb. If no route 13546283Swollman * is found and it cannot be allocated the return NULL. This routine 13556283Swollman * is called by TCP routines that access the rmx structure and by tcp_mss 13566283Swollman * to get the interface MTU. 13576283Swollman */ 13586283Swollmanstruct rtentry * 135986764Sjlemontcp_rtlookup(inc) 136086764Sjlemon struct in_conninfo *inc; 13616283Swollman{ 13626283Swollman struct route *ro; 13636283Swollman struct rtentry *rt; 13646283Swollman 136586764Sjlemon ro = &inc->inc_route; 13666283Swollman rt = ro->ro_rt; 13676283Swollman if (rt == NULL || !(rt->rt_flags & RTF_UP)) { 13686283Swollman /* No route yet, so try to acquire one */ 136986764Sjlemon if (inc->inc_faddr.s_addr != INADDR_ANY) { 13706283Swollman ro->ro_dst.sa_family = AF_INET; 137178492Sume ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 13726283Swollman ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 137386764Sjlemon inc->inc_faddr; 13746283Swollman rtalloc(ro); 13756283Swollman rt = ro->ro_rt; 13766283Swollman } 13776283Swollman } 13786283Swollman return rt; 13796283Swollman} 13806283Swollman 138155679Sshin#ifdef INET6 138255679Sshinstruct rtentry * 138386764Sjlemontcp_rtlookup6(inc) 138486764Sjlemon struct in_conninfo *inc; 138555679Sshin{ 138655679Sshin struct route_in6 *ro6; 138755679Sshin struct rtentry *rt; 138855679Sshin 138986764Sjlemon ro6 = &inc->inc6_route; 139055679Sshin rt = ro6->ro_rt; 139155679Sshin if (rt == NULL || !(rt->rt_flags & RTF_UP)) { 139255679Sshin /* No route yet, so try to acquire one */ 139386764Sjlemon if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { 139486764Sjlemon ro6->ro_dst.sin6_family = AF_INET6; 139586764Sjlemon ro6->ro_dst.sin6_len = sizeof(struct sockaddr_in6); 139686764Sjlemon ro6->ro_dst.sin6_addr = inc->inc6_faddr; 139755679Sshin rtalloc((struct route *)ro6); 139855679Sshin rt = ro6->ro_rt; 139955679Sshin } 140055679Sshin } 140155679Sshin return rt; 140255679Sshin} 140355679Sshin#endif /* INET6 */ 140455679Sshin 140555679Sshin#ifdef IPSEC 140655679Sshin/* compute ESP/AH header size for TCP, including outer IP header. */ 140755679Sshinsize_t 140855679Sshinipsec_hdrsiz_tcp(tp) 140955679Sshin struct tcpcb *tp; 141055679Sshin{ 141155679Sshin struct inpcb *inp; 141255679Sshin struct mbuf *m; 141355679Sshin size_t hdrsiz; 141455679Sshin struct ip *ip; 141555679Sshin#ifdef INET6 141655679Sshin struct ip6_hdr *ip6; 141755679Sshin#endif /* INET6 */ 141855679Sshin struct tcphdr *th; 141955679Sshin 142078642Ssilby if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL)) 142155679Sshin return 0; 142255679Sshin MGETHDR(m, M_DONTWAIT, MT_DATA); 142355679Sshin if (!m) 142455679Sshin return 0; 142555679Sshin 142655679Sshin#ifdef INET6 142755679Sshin if ((inp->inp_vflag & INP_IPV6) != 0) { 142855679Sshin ip6 = mtod(m, struct ip6_hdr *); 142955679Sshin th = (struct tcphdr *)(ip6 + 1); 143055679Sshin m->m_pkthdr.len = m->m_len = 143155679Sshin sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 143278642Ssilby tcp_fillheaders(tp, ip6, th); 143355679Sshin hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 143455679Sshin } else 143555679Sshin#endif /* INET6 */ 143655679Sshin { 143755679Sshin ip = mtod(m, struct ip *); 143855679Sshin th = (struct tcphdr *)(ip + 1); 143955679Sshin m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); 144078642Ssilby tcp_fillheaders(tp, ip, th); 144155679Sshin hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 144255679Sshin } 144355679Sshin 144455679Sshin m_free(m); 144555679Sshin return hdrsiz; 144655679Sshin} 144755679Sshin#endif /*IPSEC*/ 144855679Sshin 14496283Swollman/* 14506283Swollman * Return a pointer to the cached information about the remote host. 14516283Swollman * The cached information is stored in the protocol specific part of 14526283Swollman * the route metrics. 14536283Swollman */ 14546283Swollmanstruct rmxp_tao * 145586764Sjlemontcp_gettaocache(inc) 145686764Sjlemon struct in_conninfo *inc; 14576283Swollman{ 145855679Sshin struct rtentry *rt; 14596283Swollman 146055679Sshin#ifdef INET6 146186764Sjlemon if (inc->inc_isipv6) 146286764Sjlemon rt = tcp_rtlookup6(inc); 146355679Sshin else 146455679Sshin#endif /* INET6 */ 146586764Sjlemon rt = tcp_rtlookup(inc); 146655679Sshin 14676283Swollman /* Make sure this is a host route and is up. */ 14686283Swollman if (rt == NULL || 14696283Swollman (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) 14706283Swollman return NULL; 14716283Swollman 14726283Swollman return rmx_taop(rt->rt_rmx); 14736283Swollman} 14746283Swollman 14756283Swollman/* 14766283Swollman * Clear all the TAO cache entries, called from tcp_init. 14776283Swollman * 14786283Swollman * XXX 14796283Swollman * This routine is just an empty one, because we assume that the routing 14806283Swollman * routing tables are initialized at the same time when TCP, so there is 14816283Swollman * nothing in the cache left over. 14826283Swollman */ 14836283Swollmanstatic void 148429506Sbdetcp_cleartaocache() 148529506Sbde{ 148629506Sbde} 1487