1/* (C) 1999-2001 Paul `Rusty' Russell 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9#include <linux/types.h> 10#include <linux/timer.h> 11#include <linux/module.h> 12#include <linux/in.h> 13#include <linux/tcp.h> 14#include <linux/spinlock.h> 15#include <linux/skbuff.h> 16#include <linux/ipv6.h> 17#include <net/ip6_checksum.h> 18#include <asm/unaligned.h> 19 20#include <net/tcp.h> 21 22#include <linux/netfilter.h> 23#include <linux/netfilter_ipv4.h> 24#include <linux/netfilter_ipv6.h> 25#include <net/netfilter/nf_conntrack.h> 26#include <net/netfilter/nf_conntrack_l4proto.h> 27#include <net/netfilter/nf_conntrack_ecache.h> 28#include <net/netfilter/nf_log.h> 29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 31 32#ifdef HNDCTF 33#include <ctf/hndctf.h> 34extern int ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout); 35#else 36#define BCMFASTPATH_HOST 37#endif /* HNDCTF */ 38 39/* "Be conservative in what you do, 40 be liberal in what you accept from others." 41 If it's non-zero, we mark only out of window RST segments as INVALID. */ 42static int nf_ct_tcp_be_liberal __read_mostly = 0; 43 44/* If it is set to zero, we disable picking up already established 45 connections. */ 46static int nf_ct_tcp_loose __read_mostly = 1; 47 48/* Max number of the retransmitted packets without receiving an (acceptable) 49 ACK from the destination. If this number is reached, a shorter timer 50 will be started. */ 51static int nf_ct_tcp_max_retrans __read_mostly = 3; 52 53 54static const char *const tcp_conntrack_names[] = { 55 "NONE", 56 "SYN_SENT", 57 "SYN_RECV", 58 "ESTABLISHED", 59 "FIN_WAIT", 60 "CLOSE_WAIT", 61 "LAST_ACK", 62 "TIME_WAIT", 63 "CLOSE", 64 "SYN_SENT2", 65}; 66 67#define SECS * HZ 68#define MINS * 60 SECS 69#define HOURS * 60 MINS 70#define DAYS * 24 HOURS 71 72/* RFC1122 says the R2 limit should be at least 100 seconds. 73 Linux uses 15 packets as limit, which corresponds 74 to ~13-30min depending on RTO. */ 75static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; 76static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS; 77 78static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { 79 [TCP_CONNTRACK_SYN_SENT] = 2 MINS, 80 [TCP_CONNTRACK_SYN_RECV] = 60 SECS, 81 [TCP_CONNTRACK_ESTABLISHED] = 40 MINS, /* was 5 DAYS, no less then tcp_keepalive_time + tcp_keepalive_probes * tcp_keepalive_intvl */ 82 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS, 83 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS, 84 [TCP_CONNTRACK_LAST_ACK] = 30 SECS, 85 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, 86 [TCP_CONNTRACK_CLOSE] = 10 SECS, 87 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, 88}; 89 90#define sNO TCP_CONNTRACK_NONE 91#define sSS TCP_CONNTRACK_SYN_SENT 92#define sSR TCP_CONNTRACK_SYN_RECV 93#define sES TCP_CONNTRACK_ESTABLISHED 94#define sFW TCP_CONNTRACK_FIN_WAIT 95#define sCW TCP_CONNTRACK_CLOSE_WAIT 96#define sLA TCP_CONNTRACK_LAST_ACK 97#define sTW TCP_CONNTRACK_TIME_WAIT 98#define sCL TCP_CONNTRACK_CLOSE 99#define sS2 TCP_CONNTRACK_SYN_SENT2 100#define sIV TCP_CONNTRACK_MAX 101#define sIG TCP_CONNTRACK_IGNORE 102 103/* What TCP flags are set from RST/SYN/FIN/ACK. */ 104enum tcp_bit_set { 105 TCP_SYN_SET, 106 TCP_SYNACK_SET, 107 TCP_FIN_SET, 108 TCP_ACK_SET, 109 TCP_RST_SET, 110 TCP_NONE_SET, 111}; 112 113/* 114 * The TCP state transition table needs a few words... 115 * 116 * We are the man in the middle. All the packets go through us 117 * but might get lost in transit to the destination. 118 * It is assumed that the destinations can't receive segments 119 * we haven't seen. 120 * 121 * The checked segment is in window, but our windows are *not* 122 * equivalent with the ones of the sender/receiver. We always 123 * try to guess the state of the current sender. 124 * 125 * The meaning of the states are: 126 * 127 * NONE: initial state 128 * SYN_SENT: SYN-only packet seen 129 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open 130 * SYN_RECV: SYN-ACK packet seen 131 * ESTABLISHED: ACK packet seen 132 * FIN_WAIT: FIN packet seen 133 * CLOSE_WAIT: ACK seen (after FIN) 134 * LAST_ACK: FIN seen (after FIN) 135 * TIME_WAIT: last ACK seen 136 * CLOSE: closed connection (RST) 137 * 138 * Packets marked as IGNORED (sIG): 139 * if they may be either invalid or valid 140 * and the receiver may send back a connection 141 * closing RST or a SYN/ACK. 142 * 143 * Packets marked as INVALID (sIV): 144 * if we regard them as truly invalid packets 145 */ 146static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { 147 { 148/* ORIGINAL */ 149/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 150/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, 151/* 152 * sNO -> sSS Initialize a new connection 153 * sSS -> sSS Retransmitted SYN 154 * sS2 -> sS2 Late retransmitted SYN 155 * sSR -> sIG 156 * sES -> sIG Error: SYNs in window outside the SYN_SENT state 157 * are errors. Receiver will reply with RST 158 * and close the connection. 159 * Or we are not in sync and hold a dead connection. 160 * sFW -> sIG 161 * sCW -> sIG 162 * sLA -> sIG 163 * sTW -> sSS Reopened connection (RFC 1122). 164 * sCL -> sSS 165 */ 166/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 167/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, 168/* 169 * sNO -> sIV Too late and no reason to do anything 170 * sSS -> sIV Client can't send SYN and then SYN/ACK 171 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open 172 * sSR -> sIG 173 * sES -> sIG Error: SYNs in window outside the SYN_SENT state 174 * are errors. Receiver will reply with RST 175 * and close the connection. 176 * Or we are not in sync and hold a dead connection. 177 * sFW -> sIG 178 * sCW -> sIG 179 * sLA -> sIG 180 * sTW -> sIG 181 * sCL -> sIG 182 */ 183/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 184/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 185/* 186 * sNO -> sIV Too late and no reason to do anything... 187 * sSS -> sIV Client migth not send FIN in this state: 188 * we enforce waiting for a SYN/ACK reply first. 189 * sS2 -> sIV 190 * sSR -> sFW Close started. 191 * sES -> sFW 192 * sFW -> sLA FIN seen in both directions, waiting for 193 * the last ACK. 194 * Migth be a retransmitted FIN as well... 195 * sCW -> sLA 196 * sLA -> sLA Retransmitted FIN. Remain in the same state. 197 * sTW -> sTW 198 * sCL -> sCL 199 */ 200/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 201/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, 202/* 203 * sNO -> sES Assumed. 204 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. 205 * sS2 -> sIV 206 * sSR -> sES Established state is reached. 207 * sES -> sES :-) 208 * sFW -> sCW Normal close request answered by ACK. 209 * sCW -> sCW 210 * sLA -> sTW Last ACK detected. 211 * sTW -> sTW Retransmitted last ACK. Remain in the same state. 212 * sCL -> sCL 213 */ 214/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 215/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, 216/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 217 }, 218 { 219/* REPLY */ 220/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 221/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, 222/* 223 * sNO -> sIV Never reached. 224 * sSS -> sS2 Simultaneous open 225 * sS2 -> sS2 Retransmitted simultaneous SYN 226 * sSR -> sIV Invalid SYN packets sent by the server 227 * sES -> sIV 228 * sFW -> sIV 229 * sCW -> sIV 230 * sLA -> sIV 231 * sTW -> sIV Reopened connection, but server may not do it. 232 * sCL -> sIV 233 */ 234/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 235/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, 236/* 237 * sSS -> sSR Standard open. 238 * sS2 -> sSR Simultaneous open 239 * sSR -> sSR Retransmitted SYN/ACK. 240 * sES -> sIG Late retransmitted SYN/ACK? 241 * sFW -> sIG Might be SYN/ACK answering ignored SYN 242 * sCW -> sIG 243 * sLA -> sIG 244 * sTW -> sIG 245 * sCL -> sIG 246 */ 247/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 248/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 249/* 250 * sSS -> sIV Server might not send FIN in this state. 251 * sS2 -> sIV 252 * sSR -> sFW Close started. 253 * sES -> sFW 254 * sFW -> sLA FIN seen in both directions. 255 * sCW -> sLA 256 * sLA -> sLA Retransmitted FIN. 257 * sTW -> sTW 258 * sCL -> sCL 259 */ 260/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 261/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, 262/* 263 * sSS -> sIG Might be a half-open connection. 264 * sS2 -> sIG 265 * sSR -> sSR Might answer late resent SYN. 266 * sES -> sES :-) 267 * sFW -> sCW Normal close request answered by ACK. 268 * sCW -> sCW 269 * sLA -> sTW Last ACK detected. 270 * sTW -> sTW Retransmitted last ACK. 271 * sCL -> sCL 272 */ 273/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 274/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, 275/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 276 } 277}; 278 279static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 280 struct nf_conntrack_tuple *tuple) 281{ 282 const struct tcphdr *hp; 283 struct tcphdr _hdr; 284 285 /* Actually only need first 8 bytes. */ 286 hp = skb_header_pointer(skb, dataoff, 8, &_hdr); 287 if (hp == NULL) 288 return false; 289 290 tuple->src.u.tcp.port = hp->source; 291 tuple->dst.u.tcp.port = hp->dest; 292 293 return true; 294} 295 296static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, 297 const struct nf_conntrack_tuple *orig) 298{ 299 tuple->src.u.tcp.port = orig->dst.u.tcp.port; 300 tuple->dst.u.tcp.port = orig->src.u.tcp.port; 301 return true; 302} 303 304/* Print out the per-protocol part of the tuple. */ 305static int tcp_print_tuple(struct seq_file *s, 306 const struct nf_conntrack_tuple *tuple) 307{ 308 return seq_printf(s, "sport=%hu dport=%hu ", 309 ntohs(tuple->src.u.tcp.port), 310 ntohs(tuple->dst.u.tcp.port)); 311} 312 313/* Print out the private part of the conntrack. */ 314static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) 315{ 316 enum tcp_conntrack state; 317 318 spin_lock_bh(&ct->lock); 319 state = ct->proto.tcp.state; 320 spin_unlock_bh(&ct->lock); 321 322 return seq_printf(s, "%s ", tcp_conntrack_names[state]); 323} 324 325static unsigned int get_conntrack_index(const struct tcphdr *tcph) 326{ 327 if (tcph->rst) return TCP_RST_SET; 328 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); 329 else if (tcph->fin) return TCP_FIN_SET; 330 else if (tcph->ack) return TCP_ACK_SET; 331 else return TCP_NONE_SET; 332} 333 334/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering 335 in IP Filter' by Guido van Rooij. 336 337 http://www.nluug.nl/events/sane2000/papers.html 338 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz 339 340 The boundaries and the conditions are changed according to RFC793: 341 the packet must intersect the window (i.e. segments may be 342 after the right or before the left edge) and thus receivers may ACK 343 segments after the right edge of the window. 344 345 td_maxend = max(sack + max(win,1)) seen in reply packets 346 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets 347 td_maxwin += seq + len - sender.td_maxend 348 if seq + len > sender.td_maxend 349 td_end = max(seq + len) seen in sent packets 350 351 I. Upper bound for valid data: seq <= sender.td_maxend 352 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin 353 III. Upper bound for valid (s)ack: sack <= receiver.td_end 354 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW 355 356 where sack is the highest right edge of sack block found in the packet 357 or ack in the case of packet without SACK option. 358 359 The upper bound limit for a valid (s)ack is not ignored - 360 we doesn't have to deal with fragments. 361*/ 362 363static inline __u32 segment_seq_plus_len(__u32 seq, 364 size_t len, 365 unsigned int dataoff, 366 const struct tcphdr *tcph) 367{ 368 return (seq + len - dataoff - tcph->doff*4 369 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); 370} 371 372#define MAXACKWINCONST 66000 373#define MAXACKWINDOW(sender) \ 374 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ 375 : MAXACKWINCONST) 376 377/* 378 * Simplified tcp_parse_options routine from tcp_input.c 379 */ 380static void tcp_options(const struct sk_buff *skb, 381 unsigned int dataoff, 382 const struct tcphdr *tcph, 383 struct ip_ct_tcp_state *state) 384{ 385 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 386 const unsigned char *ptr; 387 int length = (tcph->doff*4) - sizeof(struct tcphdr); 388 389 if (!length) 390 return; 391 392 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), 393 length, buff); 394 BUG_ON(ptr == NULL); 395 396 state->td_scale = 397 state->flags = 0; 398 399 while (length > 0) { 400 int opcode=*ptr++; 401 int opsize; 402 403 switch (opcode) { 404 case TCPOPT_EOL: 405 return; 406 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 407 length--; 408 continue; 409 default: 410 opsize=*ptr++; 411 if (opsize < 2) /* "silly options" */ 412 return; 413 if (opsize > length) 414 break; /* don't parse partial options */ 415 416 if (opcode == TCPOPT_SACK_PERM 417 && opsize == TCPOLEN_SACK_PERM) 418 state->flags |= IP_CT_TCP_FLAG_SACK_PERM; 419 else if (opcode == TCPOPT_WINDOW 420 && opsize == TCPOLEN_WINDOW) { 421 state->td_scale = *(u_int8_t *)ptr; 422 423 if (state->td_scale > 14) { 424 /* See RFC1323 */ 425 state->td_scale = 14; 426 } 427 state->flags |= 428 IP_CT_TCP_FLAG_WINDOW_SCALE; 429 } 430 ptr += opsize - 2; 431 length -= opsize; 432 } 433 } 434} 435 436static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, 437 const struct tcphdr *tcph, __u32 *sack) 438{ 439 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 440 const unsigned char *ptr; 441 int length = (tcph->doff*4) - sizeof(struct tcphdr); 442 __u32 tmp; 443 444 if (!length) 445 return; 446 447 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), 448 length, buff); 449 BUG_ON(ptr == NULL); 450 451 /* Fast path for timestamp-only option */ 452 if (length == TCPOLEN_TSTAMP_ALIGNED*4 453 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) 454 | (TCPOPT_NOP << 16) 455 | (TCPOPT_TIMESTAMP << 8) 456 | TCPOLEN_TIMESTAMP)) 457 return; 458 459 while (length > 0) { 460 int opcode = *ptr++; 461 int opsize, i; 462 463 switch (opcode) { 464 case TCPOPT_EOL: 465 return; 466 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 467 length--; 468 continue; 469 default: 470 opsize = *ptr++; 471 if (opsize < 2) /* "silly options" */ 472 return; 473 if (opsize > length) 474 break; /* don't parse partial options */ 475 476 if (opcode == TCPOPT_SACK 477 && opsize >= (TCPOLEN_SACK_BASE 478 + TCPOLEN_SACK_PERBLOCK) 479 && !((opsize - TCPOLEN_SACK_BASE) 480 % TCPOLEN_SACK_PERBLOCK)) { 481 for (i = 0; 482 i < (opsize - TCPOLEN_SACK_BASE); 483 i += TCPOLEN_SACK_PERBLOCK) { 484 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); 485 486 if (after(tmp, *sack)) 487 *sack = tmp; 488 } 489 return; 490 } 491 ptr += opsize - 2; 492 length -= opsize; 493 } 494 } 495} 496 497#ifdef CONFIG_NF_NAT_NEEDED 498static inline s16 nat_offset(const struct nf_conn *ct, 499 enum ip_conntrack_dir dir, 500 u32 seq) 501{ 502 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); 503 504 return get_offset != NULL ? get_offset(ct, dir, seq) : 0; 505} 506#define NAT_OFFSET(pf, ct, dir, seq) \ 507 (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) 508#else 509#define NAT_OFFSET(pf, ct, dir, seq) 0 510#endif 511 512static bool tcp_in_window(const struct nf_conn *ct, 513 struct ip_ct_tcp *state, 514 enum ip_conntrack_dir dir, 515 unsigned int index, 516 const struct sk_buff *skb, 517 unsigned int dataoff, 518 const struct tcphdr *tcph, 519 u_int8_t pf) 520{ 521 struct net *net = nf_ct_net(ct); 522 struct ip_ct_tcp_state *sender = &state->seen[dir]; 523 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 524 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; 525 __u32 seq, ack, sack, end, win, swin; 526 s16 receiver_offset; 527 bool res; 528 529 /* 530 * Get the required data from the packet. 531 */ 532 seq = ntohl(tcph->seq); 533 ack = sack = ntohl(tcph->ack_seq); 534 win = ntohs(tcph->window); 535 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); 536 537 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) 538 tcp_sack(skb, dataoff, tcph, &sack); 539 540 /* Take into account NAT sequence number mangling */ 541 receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); 542 ack -= receiver_offset; 543 sack -= receiver_offset; 544 545 pr_debug("tcp_in_window: START\n"); 546 pr_debug("tcp_in_window: "); 547 nf_ct_dump_tuple(tuple); 548 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", 549 seq, ack, receiver_offset, sack, receiver_offset, win, end); 550 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 551 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 552 sender->td_end, sender->td_maxend, sender->td_maxwin, 553 sender->td_scale, 554 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 555 receiver->td_scale); 556 557 if (sender->td_maxwin == 0) { 558 /* 559 * Initialize sender data. 560 */ 561 if (tcph->syn) { 562 /* 563 * SYN-ACK in reply to a SYN 564 * or SYN from reply direction in simultaneous open. 565 */ 566 sender->td_end = 567 sender->td_maxend = end; 568 sender->td_maxwin = (win == 0 ? 1 : win); 569 570 tcp_options(skb, dataoff, tcph, sender); 571 /* 572 * RFC 1323: 573 * Both sides must send the Window Scale option 574 * to enable window scaling in either direction. 575 */ 576 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE 577 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) 578 sender->td_scale = 579 receiver->td_scale = 0; 580 if (!tcph->ack) 581 /* Simultaneous open */ 582 return true; 583 } else { 584 /* 585 * We are in the middle of a connection, 586 * its history is lost for us. 587 * Let's try to use the data from the packet. 588 */ 589 sender->td_end = end; 590 win <<= sender->td_scale; 591 sender->td_maxwin = (win == 0 ? 1 : win); 592 sender->td_maxend = end + sender->td_maxwin; 593 /* 594 * We haven't seen traffic in the other direction yet 595 * but we have to tweak window tracking to pass III 596 * and IV until that happens. 597 */ 598 if (receiver->td_maxwin == 0) 599 receiver->td_end = receiver->td_maxend = sack; 600 } 601 } else if (((state->state == TCP_CONNTRACK_SYN_SENT 602 && dir == IP_CT_DIR_ORIGINAL) 603 || (state->state == TCP_CONNTRACK_SYN_RECV 604 && dir == IP_CT_DIR_REPLY)) 605 && after(end, sender->td_end)) { 606 /* 607 * RFC 793: "if a TCP is reinitialized ... then it need 608 * not wait at all; it must only be sure to use sequence 609 * numbers larger than those recently used." 610 */ 611 sender->td_end = 612 sender->td_maxend = end; 613 sender->td_maxwin = (win == 0 ? 1 : win); 614 615 tcp_options(skb, dataoff, tcph, sender); 616 } 617 618 if (!(tcph->ack)) { 619 /* 620 * If there is no ACK, just pretend it was set and OK. 621 */ 622 ack = sack = receiver->td_end; 623 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == 624 (TCP_FLAG_ACK|TCP_FLAG_RST)) 625 && (ack == 0)) { 626 /* 627 * Broken TCP stacks, that set ACK in RST packets as well 628 * with zero ack value. 629 */ 630 ack = sack = receiver->td_end; 631 } 632 633 if (seq == end 634 && (!tcph->rst 635 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) 636 /* 637 * Packets contains no data: we assume it is valid 638 * and check the ack value only. 639 * However RST segments are always validated by their 640 * SEQ number, except when seq == 0 (reset sent answering 641 * SYN. 642 */ 643 seq = end = sender->td_end; 644 645 pr_debug("tcp_in_window: "); 646 nf_ct_dump_tuple(tuple); 647 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", 648 seq, ack, receiver_offset, sack, receiver_offset, win, end); 649 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 650 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 651 sender->td_end, sender->td_maxend, sender->td_maxwin, 652 sender->td_scale, 653 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 654 receiver->td_scale); 655 656 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", 657 before(seq, sender->td_maxend + 1), 658 after(end, sender->td_end - receiver->td_maxwin - 1), 659 before(sack, receiver->td_end + 1), 660 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); 661 662 if (before(seq, sender->td_maxend + 1) && 663 after(end, sender->td_end - receiver->td_maxwin - 1) && 664 before(sack, receiver->td_end + 1) && 665 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { 666 /* 667 * Take into account window scaling (RFC 1323). 668 */ 669 if (!tcph->syn) 670 win <<= sender->td_scale; 671 672 /* 673 * Update sender data. 674 */ 675 swin = win + (sack - ack); 676 if (sender->td_maxwin < swin) 677 sender->td_maxwin = swin; 678 if (after(end, sender->td_end)) { 679 sender->td_end = end; 680 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; 681 } 682 if (tcph->ack) { 683 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { 684 sender->td_maxack = ack; 685 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; 686 } else if (after(ack, sender->td_maxack)) 687 sender->td_maxack = ack; 688 } 689 690 /* 691 * Update receiver data. 692 */ 693 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) 694 receiver->td_maxwin += end - sender->td_maxend; 695 if (after(sack + win, receiver->td_maxend - 1)) { 696 receiver->td_maxend = sack + win; 697 if (win == 0) 698 receiver->td_maxend++; 699 } 700 if (ack == receiver->td_end) 701 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; 702 703 /* 704 * Check retransmissions. 705 */ 706 if (index == TCP_ACK_SET) { 707 if (state->last_dir == dir 708 && state->last_seq == seq 709 && state->last_ack == ack 710 && state->last_end == end 711 && state->last_win == win) 712 state->retrans++; 713 else { 714 state->last_dir = dir; 715 state->last_seq = seq; 716 state->last_ack = ack; 717 state->last_end = end; 718 state->last_win = win; 719 state->retrans = 0; 720 } 721 } 722 res = true; 723 } else { 724 res = false; 725 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || 726 nf_ct_tcp_be_liberal) 727 res = true; 728 if (!res && LOG_INVALID(net, IPPROTO_TCP)) 729 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 730 "nf_ct_tcp: %s ", 731 before(seq, sender->td_maxend + 1) ? 732 after(end, sender->td_end - receiver->td_maxwin - 1) ? 733 before(sack, receiver->td_end + 1) ? 734 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" 735 : "ACK is under the lower bound (possible overly delayed ACK)" 736 : "ACK is over the upper bound (ACKed data not seen yet)" 737 : "SEQ is under the lower bound (already ACKed data retransmitted)" 738 : "SEQ is over the upper bound (over the window of the receiver)"); 739 } 740 741 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " 742 "receiver end=%u maxend=%u maxwin=%u\n", 743 res, sender->td_end, sender->td_maxend, sender->td_maxwin, 744 receiver->td_end, receiver->td_maxend, receiver->td_maxwin); 745 746 return res; 747} 748 749/* table of valid flag combinations - PUSH, ECE and CWR are always valid */ 750static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| 751 TCPHDR_URG) + 1] = 752{ 753 [TCPHDR_SYN] = 1, 754 [TCPHDR_SYN|TCPHDR_URG] = 1, 755 [TCPHDR_SYN|TCPHDR_ACK] = 1, 756 [TCPHDR_RST] = 1, 757 [TCPHDR_RST|TCPHDR_ACK] = 1, 758 [TCPHDR_FIN|TCPHDR_ACK] = 1, 759 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, 760 [TCPHDR_ACK] = 1, 761 [TCPHDR_ACK|TCPHDR_URG] = 1, 762}; 763 764/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 765static int BCMFASTPATH_HOST tcp_error(struct net *net, struct nf_conn *tmpl, 766 struct sk_buff *skb, 767 unsigned int dataoff, 768 enum ip_conntrack_info *ctinfo, 769 u_int8_t pf, 770 unsigned int hooknum) 771{ 772 const struct tcphdr *th; 773 struct tcphdr _tcph; 774 unsigned int tcplen = skb->len - dataoff; 775 u_int8_t tcpflags; 776 777 /* Smaller that minimal TCP header? */ 778 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 779 if (th == NULL) { 780 if (LOG_INVALID(net, IPPROTO_TCP)) 781 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 782 "nf_ct_tcp: short packet "); 783 return -NF_ACCEPT; 784 } 785 786 /* Not whole TCP header or malformed packet */ 787 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 788 if (LOG_INVALID(net, IPPROTO_TCP)) 789 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 790 "nf_ct_tcp: truncated/malformed packet "); 791 return -NF_ACCEPT; 792 } 793 794 /* Checksum invalid? Ignore. 795 * We skip checking packets on the outgoing path 796 * because the checksum is assumed to be correct. 797 */ 798 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 799 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 800 if (LOG_INVALID(net, IPPROTO_TCP)) 801 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 802 "nf_ct_tcp: bad TCP checksum "); 803 return -NF_ACCEPT; 804 } 805 806 /* Check TCP flags. */ 807 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); 808 if (!tcp_valid_flags[tcpflags]) { 809 if (LOG_INVALID(net, IPPROTO_TCP)) 810 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 811 "nf_ct_tcp: invalid TCP flag combination "); 812 return -NF_ACCEPT; 813 } 814 815 return NF_ACCEPT; 816} 817 818/* Returns verdict for packet, or -1 for invalid. */ 819static int BCMFASTPATH_HOST tcp_packet(struct nf_conn *ct, 820 const struct sk_buff *skb, 821 unsigned int dataoff, 822 enum ip_conntrack_info ctinfo, 823 u_int8_t pf, 824 unsigned int hooknum) 825{ 826 struct net *net = nf_ct_net(ct); 827 struct nf_conntrack_tuple *tuple; 828 enum tcp_conntrack new_state, old_state; 829 enum ip_conntrack_dir dir; 830 const struct tcphdr *th; 831 struct tcphdr _tcph; 832 unsigned long timeout; 833 unsigned int index; 834 835 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 836 BUG_ON(th == NULL); 837 838 spin_lock_bh(&ct->lock); 839 old_state = ct->proto.tcp.state; 840 dir = CTINFO2DIR(ctinfo); 841 index = get_conntrack_index(th); 842 new_state = tcp_conntracks[dir][index][old_state]; 843 tuple = &ct->tuplehash[dir].tuple; 844 845 switch (new_state) { 846 case TCP_CONNTRACK_SYN_SENT: 847 if (old_state < TCP_CONNTRACK_TIME_WAIT) 848 break; 849 /* RFC 1122: "When a connection is closed actively, 850 * it MUST linger in TIME-WAIT state for a time 2xMSL 851 * (Maximum Segment Lifetime). However, it MAY accept 852 * a new SYN from the remote TCP to reopen the connection 853 * directly from TIME-WAIT state, if..." 854 * We ignore the conditions because we are in the 855 * TIME-WAIT state anyway. 856 * 857 * Handle aborted connections: we and the server 858 * think there is an existing connection but the client 859 * aborts it and starts a new one. 860 */ 861 if (((ct->proto.tcp.seen[dir].flags 862 | ct->proto.tcp.seen[!dir].flags) 863 & IP_CT_TCP_FLAG_CLOSE_INIT) 864 || (ct->proto.tcp.last_dir == dir 865 && ct->proto.tcp.last_index == TCP_RST_SET)) { 866 /* Attempt to reopen a closed/aborted connection. 867 * Delete this connection and look up again. */ 868 spin_unlock_bh(&ct->lock); 869 870 /* Only repeat if we can actually remove the timer. 871 * Destruction may already be in progress in process 872 * context and we must give it a chance to terminate. 873 */ 874 if (nf_ct_kill(ct)) 875 return -NF_REPEAT; 876 return NF_DROP; 877 } 878 /* Fall through */ 879 case TCP_CONNTRACK_IGNORE: 880 /* Ignored packets: 881 * 882 * Our connection entry may be out of sync, so ignore 883 * packets which may signal the real connection between 884 * the client and the server. 885 * 886 * a) SYN in ORIGINAL 887 * b) SYN/ACK in REPLY 888 * c) ACK in reply direction after initial SYN in original. 889 * 890 * If the ignored packet is invalid, the receiver will send 891 * a RST we'll catch below. 892 */ 893 if (index == TCP_SYNACK_SET 894 && ct->proto.tcp.last_index == TCP_SYN_SET 895 && ct->proto.tcp.last_dir != dir 896 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { 897 /* b) This SYN/ACK acknowledges a SYN that we earlier 898 * ignored as invalid. This means that the client and 899 * the server are both in sync, while the firewall is 900 * not. We get in sync from the previously annotated 901 * values. 902 */ 903 old_state = TCP_CONNTRACK_SYN_SENT; 904 new_state = TCP_CONNTRACK_SYN_RECV; 905 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = 906 ct->proto.tcp.last_end; 907 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = 908 ct->proto.tcp.last_end; 909 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = 910 ct->proto.tcp.last_win == 0 ? 911 1 : ct->proto.tcp.last_win; 912 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = 913 ct->proto.tcp.last_wscale; 914 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = 915 ct->proto.tcp.last_flags; 916 memset(&ct->proto.tcp.seen[dir], 0, 917 sizeof(struct ip_ct_tcp_state)); 918 break; 919 } 920 ct->proto.tcp.last_index = index; 921 ct->proto.tcp.last_dir = dir; 922 ct->proto.tcp.last_seq = ntohl(th->seq); 923 ct->proto.tcp.last_end = 924 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); 925 ct->proto.tcp.last_win = ntohs(th->window); 926 927 /* a) This is a SYN in ORIGINAL. The client and the server 928 * may be in sync but we are not. In that case, we annotate 929 * the TCP options and let the packet go through. If it is a 930 * valid SYN packet, the server will reply with a SYN/ACK, and 931 * then we'll get in sync. Otherwise, the server ignores it. */ 932 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { 933 struct ip_ct_tcp_state seen = {}; 934 935 ct->proto.tcp.last_flags = 936 ct->proto.tcp.last_wscale = 0; 937 tcp_options(skb, dataoff, th, &seen); 938 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 939 ct->proto.tcp.last_flags |= 940 IP_CT_TCP_FLAG_WINDOW_SCALE; 941 ct->proto.tcp.last_wscale = seen.td_scale; 942 } 943 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { 944 ct->proto.tcp.last_flags |= 945 IP_CT_TCP_FLAG_SACK_PERM; 946 } 947 } 948 spin_unlock_bh(&ct->lock); 949 if (LOG_INVALID(net, IPPROTO_TCP)) 950 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 951 "nf_ct_tcp: invalid packet ignored "); 952 return NF_ACCEPT; 953 case TCP_CONNTRACK_MAX: 954 /* Invalid packet */ 955 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 956 dir, get_conntrack_index(th), old_state); 957 spin_unlock_bh(&ct->lock); 958 if (LOG_INVALID(net, IPPROTO_TCP)) 959 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 960 "nf_ct_tcp: invalid state "); 961 return -NF_ACCEPT; 962 case TCP_CONNTRACK_CLOSE: 963 if (index == TCP_RST_SET 964 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) 965 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { 966 /* Invalid RST */ 967 spin_unlock_bh(&ct->lock); 968 if (LOG_INVALID(net, IPPROTO_TCP)) 969 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 970 "nf_ct_tcp: invalid RST "); 971 return -NF_ACCEPT; 972 } 973 if (index == TCP_RST_SET 974 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) 975 && ct->proto.tcp.last_index == TCP_SYN_SET) 976 || (!test_bit(IPS_ASSURED_BIT, &ct->status) 977 && ct->proto.tcp.last_index == TCP_ACK_SET)) 978 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { 979 /* RST sent to invalid SYN or ACK we had let through 980 * at a) and c) above: 981 * 982 * a) SYN was in window then 983 * c) we hold a half-open connection. 984 * 985 * Delete our connection entry. 986 * We skip window checking, because packet might ACK 987 * segments we ignored. */ 988 goto in_window; 989 } 990 /* Just fall through */ 991 default: 992 /* Keep compilers happy. */ 993 break; 994 } 995 996#ifdef HNDCTF 997 /* Remove the ipc entries on receipt of FIN or RST */ 998 if (CTF_ENAB(kcih)) { 999 if (ct->ctf_flags & CTF_FLAGS_CACHED) { 1000 if (th->fin || th->rst) { 1001 ip_conntrack_ipct_delete(ct, 0); 1002 } 1003 goto in_window; 1004 } 1005 } 1006#endif /* HNDCTF */ 1007 1008 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, 1009 skb, dataoff, th, pf)) { 1010 spin_unlock_bh(&ct->lock); 1011 return -NF_ACCEPT; 1012 } 1013 in_window: 1014 /* From now on we have got in-window packets */ 1015 ct->proto.tcp.last_index = index; 1016 ct->proto.tcp.last_dir = dir; 1017 1018 pr_debug("tcp_conntracks: "); 1019 nf_ct_dump_tuple(tuple); 1020 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", 1021 (th->syn ? 1 : 0), (th->ack ? 1 : 0), 1022 (th->fin ? 1 : 0), (th->rst ? 1 : 0), 1023 old_state, new_state); 1024 1025 ct->proto.tcp.state = new_state; 1026 if (old_state != new_state 1027 && new_state == TCP_CONNTRACK_FIN_WAIT) 1028 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 1029 1030 if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && 1031 tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans) 1032 timeout = nf_ct_tcp_timeout_max_retrans; 1033 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & 1034 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && 1035 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged) 1036 timeout = nf_ct_tcp_timeout_unacknowledged; 1037 else 1038 timeout = tcp_timeouts[new_state]; 1039 spin_unlock_bh(&ct->lock); 1040 1041 if (new_state != old_state) 1042 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 1043 1044 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 1045 /* If only reply is a RST, we can consider ourselves not to 1046 have an established connection: this is a fairly common 1047 problem case, so we can delete the conntrack 1048 immediately. --RR */ 1049 if (th->rst) { 1050 nf_ct_kill_acct(ct, ctinfo, skb); 1051 return NF_ACCEPT; 1052 } 1053 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) 1054 && (old_state == TCP_CONNTRACK_SYN_RECV 1055 || old_state == TCP_CONNTRACK_ESTABLISHED) 1056 && new_state == TCP_CONNTRACK_ESTABLISHED) { 1057 /* Set ASSURED if we see see valid ack in ESTABLISHED 1058 after SYN_RECV or a valid answer for a picked up 1059 connection. */ 1060 set_bit(IPS_ASSURED_BIT, &ct->status); 1061 nf_conntrack_event_cache(IPCT_ASSURED, ct); 1062 } 1063 nf_ct_refresh_acct(ct, ctinfo, skb, timeout); 1064 1065 return NF_ACCEPT; 1066} 1067 1068/* Called when a new connection for this protocol found. */ 1069static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, 1070 unsigned int dataoff) 1071{ 1072 enum tcp_conntrack new_state; 1073 const struct tcphdr *th; 1074 struct tcphdr _tcph; 1075 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; 1076 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; 1077 1078 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 1079 BUG_ON(th == NULL); 1080 1081 /* Don't need lock here: this conntrack not in circulation yet */ 1082 new_state 1083 = tcp_conntracks[0][get_conntrack_index(th)] 1084 [TCP_CONNTRACK_NONE]; 1085 1086 /* Invalid: delete conntrack */ 1087 if (new_state >= TCP_CONNTRACK_MAX) { 1088 pr_debug("nf_ct_tcp: invalid new deleting.\n"); 1089 return false; 1090 } 1091 1092 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1093 /* SYN packet */ 1094 ct->proto.tcp.seen[0].td_end = 1095 segment_seq_plus_len(ntohl(th->seq), skb->len, 1096 dataoff, th); 1097 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1098 if (ct->proto.tcp.seen[0].td_maxwin == 0) 1099 ct->proto.tcp.seen[0].td_maxwin = 1; 1100 ct->proto.tcp.seen[0].td_maxend = 1101 ct->proto.tcp.seen[0].td_end; 1102 1103 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); 1104 ct->proto.tcp.seen[1].flags = 0; 1105 } else if (nf_ct_tcp_loose == 0) { 1106 /* Don't try to pick up connections. */ 1107 return false; 1108 } else { 1109 /* 1110 * We are in the middle of a connection, 1111 * its history is lost for us. 1112 * Let's try to use the data from the packet. 1113 */ 1114 ct->proto.tcp.seen[0].td_end = 1115 segment_seq_plus_len(ntohl(th->seq), skb->len, 1116 dataoff, th); 1117 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1118 if (ct->proto.tcp.seen[0].td_maxwin == 0) 1119 ct->proto.tcp.seen[0].td_maxwin = 1; 1120 ct->proto.tcp.seen[0].td_maxend = 1121 ct->proto.tcp.seen[0].td_end + 1122 ct->proto.tcp.seen[0].td_maxwin; 1123 ct->proto.tcp.seen[0].td_scale = 0; 1124 1125 /* We assume SACK and liberal window checking to handle 1126 * window scaling */ 1127 ct->proto.tcp.seen[0].flags = 1128 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | 1129 IP_CT_TCP_FLAG_BE_LIBERAL; 1130 } 1131 1132 ct->proto.tcp.seen[1].td_end = 0; 1133 ct->proto.tcp.seen[1].td_maxend = 0; 1134 ct->proto.tcp.seen[1].td_maxwin = 0; 1135 ct->proto.tcp.seen[1].td_scale = 0; 1136 1137 /* tcp_packet will set them */ 1138 ct->proto.tcp.state = TCP_CONNTRACK_NONE; 1139 ct->proto.tcp.last_index = TCP_NONE_SET; 1140 1141 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1142 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 1143 sender->td_end, sender->td_maxend, sender->td_maxwin, 1144 sender->td_scale, 1145 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 1146 receiver->td_scale); 1147 return true; 1148} 1149 1150#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1151 1152#include <linux/netfilter/nfnetlink.h> 1153#include <linux/netfilter/nfnetlink_conntrack.h> 1154 1155static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 1156 struct nf_conn *ct) 1157{ 1158 struct nlattr *nest_parms; 1159 struct nf_ct_tcp_flags tmp = {}; 1160 1161 spin_lock_bh(&ct->lock); 1162 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); 1163 if (!nest_parms) 1164 goto nla_put_failure; 1165 1166 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state); 1167 1168 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, 1169 ct->proto.tcp.seen[0].td_scale); 1170 1171 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, 1172 ct->proto.tcp.seen[1].td_scale); 1173 1174 tmp.flags = ct->proto.tcp.seen[0].flags; 1175 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 1176 sizeof(struct nf_ct_tcp_flags), &tmp); 1177 1178 tmp.flags = ct->proto.tcp.seen[1].flags; 1179 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, 1180 sizeof(struct nf_ct_tcp_flags), &tmp); 1181 spin_unlock_bh(&ct->lock); 1182 1183 nla_nest_end(skb, nest_parms); 1184 1185 return 0; 1186 1187nla_put_failure: 1188 spin_unlock_bh(&ct->lock); 1189 return -1; 1190} 1191 1192static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { 1193 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, 1194 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, 1195 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, 1196 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, 1197 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, 1198}; 1199 1200static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) 1201{ 1202 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; 1203 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; 1204 int err; 1205 1206 /* updates could not contain anything about the private 1207 * protocol info, in that case skip the parsing */ 1208 if (!pattr) 1209 return 0; 1210 1211 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy); 1212 if (err < 0) 1213 return err; 1214 1215 if (tb[CTA_PROTOINFO_TCP_STATE] && 1216 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) 1217 return -EINVAL; 1218 1219 spin_lock_bh(&ct->lock); 1220 if (tb[CTA_PROTOINFO_TCP_STATE]) 1221 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); 1222 1223 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { 1224 struct nf_ct_tcp_flags *attr = 1225 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); 1226 ct->proto.tcp.seen[0].flags &= ~attr->mask; 1227 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; 1228 } 1229 1230 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { 1231 struct nf_ct_tcp_flags *attr = 1232 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); 1233 ct->proto.tcp.seen[1].flags &= ~attr->mask; 1234 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; 1235 } 1236 1237 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && 1238 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && 1239 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && 1240 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 1241 ct->proto.tcp.seen[0].td_scale = 1242 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); 1243 ct->proto.tcp.seen[1].td_scale = 1244 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); 1245 } 1246 spin_unlock_bh(&ct->lock); 1247 1248 return 0; 1249} 1250 1251static int tcp_nlattr_size(void) 1252{ 1253 return nla_total_size(0) /* CTA_PROTOINFO_TCP */ 1254 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); 1255} 1256 1257static int tcp_nlattr_tuple_size(void) 1258{ 1259 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1260} 1261#endif 1262 1263#ifdef CONFIG_SYSCTL 1264static unsigned int tcp_sysctl_table_users; 1265static struct ctl_table_header *tcp_sysctl_header; 1266static struct ctl_table tcp_sysctl_table[] = { 1267 { 1268 .procname = "nf_conntrack_tcp_timeout_syn_sent", 1269 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], 1270 .maxlen = sizeof(unsigned int), 1271 .mode = 0644, 1272 .proc_handler = proc_dointvec_jiffies, 1273 }, 1274 { 1275 .procname = "nf_conntrack_tcp_timeout_syn_recv", 1276 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], 1277 .maxlen = sizeof(unsigned int), 1278 .mode = 0644, 1279 .proc_handler = proc_dointvec_jiffies, 1280 }, 1281 { 1282 .procname = "nf_conntrack_tcp_timeout_established", 1283 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], 1284 .maxlen = sizeof(unsigned int), 1285 .mode = 0644, 1286 .proc_handler = proc_dointvec_jiffies, 1287 }, 1288 { 1289 .procname = "nf_conntrack_tcp_timeout_fin_wait", 1290 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], 1291 .maxlen = sizeof(unsigned int), 1292 .mode = 0644, 1293 .proc_handler = proc_dointvec_jiffies, 1294 }, 1295 { 1296 .procname = "nf_conntrack_tcp_timeout_close_wait", 1297 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], 1298 .maxlen = sizeof(unsigned int), 1299 .mode = 0644, 1300 .proc_handler = proc_dointvec_jiffies, 1301 }, 1302 { 1303 .procname = "nf_conntrack_tcp_timeout_last_ack", 1304 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], 1305 .maxlen = sizeof(unsigned int), 1306 .mode = 0644, 1307 .proc_handler = proc_dointvec_jiffies, 1308 }, 1309 { 1310 .procname = "nf_conntrack_tcp_timeout_time_wait", 1311 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], 1312 .maxlen = sizeof(unsigned int), 1313 .mode = 0644, 1314 .proc_handler = proc_dointvec_jiffies, 1315 }, 1316 { 1317 .procname = "nf_conntrack_tcp_timeout_close", 1318 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], 1319 .maxlen = sizeof(unsigned int), 1320 .mode = 0644, 1321 .proc_handler = proc_dointvec_jiffies, 1322 }, 1323 { 1324 .procname = "nf_conntrack_tcp_timeout_max_retrans", 1325 .data = &nf_ct_tcp_timeout_max_retrans, 1326 .maxlen = sizeof(unsigned int), 1327 .mode = 0644, 1328 .proc_handler = proc_dointvec_jiffies, 1329 }, 1330 { 1331 .procname = "nf_conntrack_tcp_timeout_unacknowledged", 1332 .data = &nf_ct_tcp_timeout_unacknowledged, 1333 .maxlen = sizeof(unsigned int), 1334 .mode = 0644, 1335 .proc_handler = proc_dointvec_jiffies, 1336 }, 1337 { 1338 .procname = "nf_conntrack_tcp_loose", 1339 .data = &nf_ct_tcp_loose, 1340 .maxlen = sizeof(unsigned int), 1341 .mode = 0644, 1342 .proc_handler = proc_dointvec, 1343 }, 1344 { 1345 .procname = "nf_conntrack_tcp_be_liberal", 1346 .data = &nf_ct_tcp_be_liberal, 1347 .maxlen = sizeof(unsigned int), 1348 .mode = 0644, 1349 .proc_handler = proc_dointvec, 1350 }, 1351 { 1352 .procname = "nf_conntrack_tcp_max_retrans", 1353 .data = &nf_ct_tcp_max_retrans, 1354 .maxlen = sizeof(unsigned int), 1355 .mode = 0644, 1356 .proc_handler = proc_dointvec, 1357 }, 1358 { } 1359}; 1360 1361#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT 1362static struct ctl_table tcp_compat_sysctl_table[] = { 1363 { 1364 .procname = "ip_conntrack_tcp_timeout_syn_sent", 1365 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], 1366 .maxlen = sizeof(unsigned int), 1367 .mode = 0644, 1368 .proc_handler = proc_dointvec_jiffies, 1369 }, 1370 { 1371 .procname = "ip_conntrack_tcp_timeout_syn_sent2", 1372 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], 1373 .maxlen = sizeof(unsigned int), 1374 .mode = 0644, 1375 .proc_handler = proc_dointvec_jiffies, 1376 }, 1377 { 1378 .procname = "ip_conntrack_tcp_timeout_syn_recv", 1379 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], 1380 .maxlen = sizeof(unsigned int), 1381 .mode = 0644, 1382 .proc_handler = proc_dointvec_jiffies, 1383 }, 1384 { 1385 .procname = "ip_conntrack_tcp_timeout_established", 1386 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], 1387 .maxlen = sizeof(unsigned int), 1388 .mode = 0644, 1389 .proc_handler = proc_dointvec_jiffies, 1390 }, 1391 { 1392 .procname = "ip_conntrack_tcp_timeout_fin_wait", 1393 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], 1394 .maxlen = sizeof(unsigned int), 1395 .mode = 0644, 1396 .proc_handler = proc_dointvec_jiffies, 1397 }, 1398 { 1399 .procname = "ip_conntrack_tcp_timeout_close_wait", 1400 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], 1401 .maxlen = sizeof(unsigned int), 1402 .mode = 0644, 1403 .proc_handler = proc_dointvec_jiffies, 1404 }, 1405 { 1406 .procname = "ip_conntrack_tcp_timeout_last_ack", 1407 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], 1408 .maxlen = sizeof(unsigned int), 1409 .mode = 0644, 1410 .proc_handler = proc_dointvec_jiffies, 1411 }, 1412 { 1413 .procname = "ip_conntrack_tcp_timeout_time_wait", 1414 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], 1415 .maxlen = sizeof(unsigned int), 1416 .mode = 0644, 1417 .proc_handler = proc_dointvec_jiffies, 1418 }, 1419 { 1420 .procname = "ip_conntrack_tcp_timeout_close", 1421 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], 1422 .maxlen = sizeof(unsigned int), 1423 .mode = 0644, 1424 .proc_handler = proc_dointvec_jiffies, 1425 }, 1426 { 1427 .procname = "ip_conntrack_tcp_timeout_max_retrans", 1428 .data = &nf_ct_tcp_timeout_max_retrans, 1429 .maxlen = sizeof(unsigned int), 1430 .mode = 0644, 1431 .proc_handler = proc_dointvec_jiffies, 1432 }, 1433 { 1434 .procname = "ip_conntrack_tcp_loose", 1435 .data = &nf_ct_tcp_loose, 1436 .maxlen = sizeof(unsigned int), 1437 .mode = 0644, 1438 .proc_handler = proc_dointvec, 1439 }, 1440 { 1441 .procname = "ip_conntrack_tcp_be_liberal", 1442 .data = &nf_ct_tcp_be_liberal, 1443 .maxlen = sizeof(unsigned int), 1444 .mode = 0644, 1445 .proc_handler = proc_dointvec, 1446 }, 1447 { 1448 .procname = "ip_conntrack_tcp_max_retrans", 1449 .data = &nf_ct_tcp_max_retrans, 1450 .maxlen = sizeof(unsigned int), 1451 .mode = 0644, 1452 .proc_handler = proc_dointvec, 1453 }, 1454 { } 1455}; 1456#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ 1457#endif /* CONFIG_SYSCTL */ 1458 1459struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = 1460{ 1461 .l3proto = PF_INET, 1462 .l4proto = IPPROTO_TCP, 1463 .name = "tcp", 1464 .pkt_to_tuple = tcp_pkt_to_tuple, 1465 .invert_tuple = tcp_invert_tuple, 1466 .print_tuple = tcp_print_tuple, 1467 .print_conntrack = tcp_print_conntrack, 1468 .packet = tcp_packet, 1469 .new = tcp_new, 1470 .error = tcp_error, 1471#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1472 .to_nlattr = tcp_to_nlattr, 1473 .nlattr_size = tcp_nlattr_size, 1474 .from_nlattr = nlattr_to_tcp, 1475 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1476 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1477 .nlattr_tuple_size = tcp_nlattr_tuple_size, 1478 .nla_policy = nf_ct_port_nla_policy, 1479#endif 1480#ifdef CONFIG_SYSCTL 1481 .ctl_table_users = &tcp_sysctl_table_users, 1482 .ctl_table_header = &tcp_sysctl_header, 1483 .ctl_table = tcp_sysctl_table, 1484#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT 1485 .ctl_compat_table = tcp_compat_sysctl_table, 1486#endif 1487#endif 1488}; 1489EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); 1490 1491struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = 1492{ 1493 .l3proto = PF_INET6, 1494 .l4proto = IPPROTO_TCP, 1495 .name = "tcp", 1496 .pkt_to_tuple = tcp_pkt_to_tuple, 1497 .invert_tuple = tcp_invert_tuple, 1498 .print_tuple = tcp_print_tuple, 1499 .print_conntrack = tcp_print_conntrack, 1500 .packet = tcp_packet, 1501 .new = tcp_new, 1502 .error = tcp_error, 1503#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1504 .to_nlattr = tcp_to_nlattr, 1505 .nlattr_size = tcp_nlattr_size, 1506 .from_nlattr = nlattr_to_tcp, 1507 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1508 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1509 .nlattr_tuple_size = tcp_nlattr_tuple_size, 1510 .nla_policy = nf_ct_port_nla_policy, 1511#endif 1512#ifdef CONFIG_SYSCTL 1513 .ctl_table_users = &tcp_sysctl_table_users, 1514 .ctl_table_header = &tcp_sysctl_header, 1515 .ctl_table = tcp_sysctl_table, 1516#endif 1517}; 1518EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); 1519