1/* (C) 1999-2001 Paul `Rusty' Russell 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9#include <linux/types.h> 10#include <linux/timer.h> 11#include <linux/netfilter.h> 12#include <linux/module.h> 13#include <linux/in.h> 14#include <linux/tcp.h> 15#include <linux/spinlock.h> 16#include <linux/skbuff.h> 17#include <linux/ipv6.h> 18#include <net/ip6_checksum.h> 19 20#include <net/tcp.h> 21 22#include <linux/netfilter.h> 23#include <linux/netfilter_ipv4.h> 24#include <linux/netfilter_ipv6.h> 25#include <net/netfilter/nf_conntrack.h> 26#include <net/netfilter/nf_conntrack_l4proto.h> 27#include <net/netfilter/nf_conntrack_ecache.h> 28 29#define DEBUGP(format, args...) 30 31#ifdef HNDCTF 32#include <ctf/hndctf.h> 33extern int ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout); 34#endif /* HNDCTF */ 35 36/* Protects conntrack->proto.tcp */ 37static DEFINE_RWLOCK(tcp_lock); 38 39/* "Be conservative in what you do, 40 be liberal in what you accept from others." 41 If it's non-zero, we mark only out of window RST segments as INVALID. */ 42static int nf_ct_tcp_be_liberal __read_mostly = 0; 43 44/* If it is set to zero, we disable picking up already established 45 connections. */ 46static int nf_ct_tcp_loose __read_mostly = 1; 47 48/* Max number of the retransmitted packets without receiving an (acceptable) 49 ACK from the destination. If this number is reached, a shorter timer 50 will be started. */ 51static int nf_ct_tcp_max_retrans __read_mostly = 3; 52 53 54static const char *tcp_conntrack_names[] = { 55 "NONE", 56 "SYN_SENT", 57 "SYN_RECV", 58 "ESTABLISHED", 59 "FIN_WAIT", 60 "CLOSE_WAIT", 61 "LAST_ACK", 62 "TIME_WAIT", 63 "CLOSE", 64 "LISTEN" 65}; 66 67#define SECS * HZ 68#define MINS * 60 SECS 69#define HOURS * 60 MINS 70#define DAYS * 24 HOURS 71 72static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; 73static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; 74static unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; 75static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; 76static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; 77static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; 78static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; 79static unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; 80 81/* RFC1122 says the R2 limit should be at least 100 seconds. 82 Linux uses 15 packets as limit, which corresponds 83 to ~13-30min depending on RTO. */ 84static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; 85 86static unsigned int * tcp_timeouts[] = { 87 NULL, /* TCP_CONNTRACK_NONE */ 88 &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ 89 &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ 90 &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ 91 &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ 92 &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ 93 &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ 94 &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ 95 &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ 96 NULL, /* TCP_CONNTRACK_LISTEN */ 97 }; 98 99#define sNO TCP_CONNTRACK_NONE 100#define sSS TCP_CONNTRACK_SYN_SENT 101#define sSR TCP_CONNTRACK_SYN_RECV 102#define sES TCP_CONNTRACK_ESTABLISHED 103#define sFW TCP_CONNTRACK_FIN_WAIT 104#define sCW TCP_CONNTRACK_CLOSE_WAIT 105#define sLA TCP_CONNTRACK_LAST_ACK 106#define sTW TCP_CONNTRACK_TIME_WAIT 107#define sCL TCP_CONNTRACK_CLOSE 108#define sLI TCP_CONNTRACK_LISTEN 109#define sIV TCP_CONNTRACK_MAX 110#define sIG TCP_CONNTRACK_IGNORE 111 112/* What TCP flags are set from RST/SYN/FIN/ACK. */ 113enum tcp_bit_set { 114 TCP_SYN_SET, 115 TCP_SYNACK_SET, 116 TCP_FIN_SET, 117 TCP_ACK_SET, 118 TCP_RST_SET, 119 TCP_NONE_SET, 120}; 121 122/* 123 * The TCP state transition table needs a few words... 124 * 125 * We are the man in the middle. All the packets go through us 126 * but might get lost in transit to the destination. 127 * It is assumed that the destinations can't receive segments 128 * we haven't seen. 129 * 130 * The checked segment is in window, but our windows are *not* 131 * equivalent with the ones of the sender/receiver. We always 132 * try to guess the state of the current sender. 133 * 134 * The meaning of the states are: 135 * 136 * NONE: initial state 137 * SYN_SENT: SYN-only packet seen 138 * SYN_RECV: SYN-ACK packet seen 139 * ESTABLISHED: ACK packet seen 140 * FIN_WAIT: FIN packet seen 141 * CLOSE_WAIT: ACK seen (after FIN) 142 * LAST_ACK: FIN seen (after FIN) 143 * TIME_WAIT: last ACK seen 144 * CLOSE: closed connection 145 * 146 * LISTEN state is not used. 147 * 148 * Packets marked as IGNORED (sIG): 149 * if they may be either invalid or valid 150 * and the receiver may send back a connection 151 * closing RST or a SYN/ACK. 152 * 153 * Packets marked as INVALID (sIV): 154 * if they are invalid 155 * or we do not support the request (simultaneous open) 156 */ 157static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { 158 { 159/* ORIGINAL */ 160/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 161/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, 162/* 163 * sNO -> sSS Initialize a new connection 164 * sSS -> sSS Retransmitted SYN 165 * sSR -> sIG Late retransmitted SYN? 166 * sES -> sIG Error: SYNs in window outside the SYN_SENT state 167 * are errors. Receiver will reply with RST 168 * and close the connection. 169 * Or we are not in sync and hold a dead connection. 170 * sFW -> sIG 171 * sCW -> sIG 172 * sLA -> sIG 173 * sTW -> sSS Reopened connection (RFC 1122). 174 * sCL -> sSS 175 */ 176/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 177/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, 178/* 179 * A SYN/ACK from the client is always invalid: 180 * - either it tries to set up a simultaneous open, which is 181 * not supported; 182 * - or the firewall has just been inserted between the two hosts 183 * during the session set-up. The SYN will be retransmitted 184 * by the true client (or it'll time out). 185 */ 186/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 187/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 188/* 189 * sNO -> sIV Too late and no reason to do anything... 190 * sSS -> sIV Client migth not send FIN in this state: 191 * we enforce waiting for a SYN/ACK reply first. 192 * sSR -> sFW Close started. 193 * sES -> sFW 194 * sFW -> sLA FIN seen in both directions, waiting for 195 * the last ACK. 196 * Migth be a retransmitted FIN as well... 197 * sCW -> sLA 198 * sLA -> sLA Retransmitted FIN. Remain in the same state. 199 * sTW -> sTW 200 * sCL -> sCL 201 */ 202/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 203/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, 204/* 205 * sNO -> sES Assumed. 206 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. 207 * sSR -> sES Established state is reached. 208 * sES -> sES :-) 209 * sFW -> sCW Normal close request answered by ACK. 210 * sCW -> sCW 211 * sLA -> sTW Last ACK detected. 212 * sTW -> sTW Retransmitted last ACK. Remain in the same state. 213 * sCL -> sCL 214 */ 215/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 216/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, 217/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 218 }, 219 { 220/* REPLY */ 221/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 222/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, 223/* 224 * sNO -> sIV Never reached. 225 * sSS -> sIV Simultaneous open, not supported 226 * sSR -> sIV Simultaneous open, not supported. 227 * sES -> sIV Server may not initiate a connection. 228 * sFW -> sIV 229 * sCW -> sIV 230 * sLA -> sIV 231 * sTW -> sIV Reopened connection, but server may not do it. 232 * sCL -> sIV 233 */ 234/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 235/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, 236/* 237 * sSS -> sSR Standard open. 238 * sSR -> sSR Retransmitted SYN/ACK. 239 * sES -> sIG Late retransmitted SYN/ACK? 240 * sFW -> sIG Might be SYN/ACK answering ignored SYN 241 * sCW -> sIG 242 * sLA -> sIG 243 * sTW -> sIG 244 * sCL -> sIG 245 */ 246/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 247/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 248/* 249 * sSS -> sIV Server might not send FIN in this state. 250 * sSR -> sFW Close started. 251 * sES -> sFW 252 * sFW -> sLA FIN seen in both directions. 253 * sCW -> sLA 254 * sLA -> sLA Retransmitted FIN. 255 * sTW -> sTW 256 * sCL -> sCL 257 */ 258/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 259/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, 260/* 261 * sSS -> sIG Might be a half-open connection. 262 * sSR -> sSR Might answer late resent SYN. 263 * sES -> sES :-) 264 * sFW -> sCW Normal close request answered by ACK. 265 * sCW -> sCW 266 * sLA -> sTW Last ACK detected. 267 * sTW -> sTW Retransmitted last ACK. 268 * sCL -> sCL 269 */ 270/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 271/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, 272/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 273 } 274}; 275 276static int tcp_pkt_to_tuple(const struct sk_buff *skb, 277 unsigned int dataoff, 278 struct nf_conntrack_tuple *tuple) 279{ 280 struct tcphdr _hdr, *hp; 281 282 /* Actually only need first 8 bytes. */ 283 hp = skb_header_pointer(skb, dataoff, 8, &_hdr); 284 if (hp == NULL) 285 return 0; 286 287 tuple->src.u.tcp.port = hp->source; 288 tuple->dst.u.tcp.port = hp->dest; 289 290 return 1; 291} 292 293static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple, 294 const struct nf_conntrack_tuple *orig) 295{ 296 tuple->src.u.tcp.port = orig->dst.u.tcp.port; 297 tuple->dst.u.tcp.port = orig->src.u.tcp.port; 298 return 1; 299} 300 301/* Print out the per-protocol part of the tuple. */ 302static int tcp_print_tuple(struct seq_file *s, 303 const struct nf_conntrack_tuple *tuple) 304{ 305 return seq_printf(s, "sport=%hu dport=%hu ", 306 ntohs(tuple->src.u.tcp.port), 307 ntohs(tuple->dst.u.tcp.port)); 308} 309 310/* Print out the private part of the conntrack. */ 311static int tcp_print_conntrack(struct seq_file *s, 312 const struct nf_conn *conntrack) 313{ 314 enum tcp_conntrack state; 315 316 read_lock_bh(&tcp_lock); 317 state = conntrack->proto.tcp.state; 318 read_unlock_bh(&tcp_lock); 319 320 return seq_printf(s, "%s ", tcp_conntrack_names[state]); 321} 322 323static unsigned int get_conntrack_index(const struct tcphdr *tcph) 324{ 325 if (tcph->rst) return TCP_RST_SET; 326 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); 327 else if (tcph->fin) return TCP_FIN_SET; 328 else if (tcph->ack) return TCP_ACK_SET; 329 else return TCP_NONE_SET; 330} 331 332/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering 333 in IP Filter' by Guido van Rooij. 334 335 http://www.nluug.nl/events/sane2000/papers.html 336 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz 337 338 The boundaries and the conditions are changed according to RFC793: 339 the packet must intersect the window (i.e. segments may be 340 after the right or before the left edge) and thus receivers may ACK 341 segments after the right edge of the window. 342 343 td_maxend = max(sack + max(win,1)) seen in reply packets 344 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets 345 td_maxwin += seq + len - sender.td_maxend 346 if seq + len > sender.td_maxend 347 td_end = max(seq + len) seen in sent packets 348 349 I. Upper bound for valid data: seq <= sender.td_maxend 350 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin 351 III. Upper bound for valid ack: sack <= receiver.td_end 352 IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW 353 354 where sack is the highest right edge of sack block found in the packet. 355 356 The upper bound limit for a valid ack is not ignored - 357 we doesn't have to deal with fragments. 358*/ 359 360static inline __u32 segment_seq_plus_len(__u32 seq, 361 size_t len, 362 unsigned int dataoff, 363 struct tcphdr *tcph) 364{ 365 return (seq + len - dataoff - tcph->doff*4 366 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); 367} 368 369#define MAXACKWINCONST 66000 370#define MAXACKWINDOW(sender) \ 371 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ 372 : MAXACKWINCONST) 373 374/* 375 * Simplified tcp_parse_options routine from tcp_input.c 376 */ 377static void tcp_options(const struct sk_buff *skb, 378 unsigned int dataoff, 379 struct tcphdr *tcph, 380 struct ip_ct_tcp_state *state) 381{ 382 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 383 unsigned char *ptr; 384 int length = (tcph->doff*4) - sizeof(struct tcphdr); 385 386 if (!length) 387 return; 388 389 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), 390 length, buff); 391 BUG_ON(ptr == NULL); 392 393 state->td_scale = 394 state->flags = 0; 395 396 while (length > 0) { 397 int opcode=*ptr++; 398 int opsize; 399 400 switch (opcode) { 401 case TCPOPT_EOL: 402 return; 403 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 404 length--; 405 continue; 406 default: 407 opsize=*ptr++; 408 if (opsize < 2) /* "silly options" */ 409 return; 410 if (opsize > length) 411 break; /* don't parse partial options */ 412 413 if (opcode == TCPOPT_SACK_PERM 414 && opsize == TCPOLEN_SACK_PERM) 415 state->flags |= IP_CT_TCP_FLAG_SACK_PERM; 416 else if (opcode == TCPOPT_WINDOW 417 && opsize == TCPOLEN_WINDOW) { 418 state->td_scale = *(u_int8_t *)ptr; 419 420 if (state->td_scale > 14) { 421 /* See RFC1323 */ 422 state->td_scale = 14; 423 } 424 state->flags |= 425 IP_CT_TCP_FLAG_WINDOW_SCALE; 426 } 427 ptr += opsize - 2; 428 length -= opsize; 429 } 430 } 431} 432 433static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, 434 struct tcphdr *tcph, __u32 *sack) 435{ 436 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 437 unsigned char *ptr; 438 int length = (tcph->doff*4) - sizeof(struct tcphdr); 439 __u32 tmp; 440 441 if (!length) 442 return; 443 444 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), 445 length, buff); 446 BUG_ON(ptr == NULL); 447 448 /* Fast path for timestamp-only option */ 449 if (length == TCPOLEN_TSTAMP_ALIGNED*4 450 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) 451 | (TCPOPT_NOP << 16) 452 | (TCPOPT_TIMESTAMP << 8) 453 | TCPOLEN_TIMESTAMP)) 454 return; 455 456 while (length > 0) { 457 int opcode = *ptr++; 458 int opsize, i; 459 460 switch (opcode) { 461 case TCPOPT_EOL: 462 return; 463 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 464 length--; 465 continue; 466 default: 467 opsize = *ptr++; 468 if (opsize < 2) /* "silly options" */ 469 return; 470 if (opsize > length) 471 break; /* don't parse partial options */ 472 473 if (opcode == TCPOPT_SACK 474 && opsize >= (TCPOLEN_SACK_BASE 475 + TCPOLEN_SACK_PERBLOCK) 476 && !((opsize - TCPOLEN_SACK_BASE) 477 % TCPOLEN_SACK_PERBLOCK)) { 478 for (i = 0; 479 i < (opsize - TCPOLEN_SACK_BASE); 480 i += TCPOLEN_SACK_PERBLOCK) { 481 tmp = ntohl(*((__be32 *)(ptr+i)+1)); 482 483 if (after(tmp, *sack)) 484 *sack = tmp; 485 } 486 return; 487 } 488 ptr += opsize - 2; 489 length -= opsize; 490 } 491 } 492} 493 494static int tcp_in_window(struct ip_ct_tcp *state, 495 enum ip_conntrack_dir dir, 496 unsigned int index, 497 const struct sk_buff *skb, 498 unsigned int dataoff, 499 struct tcphdr *tcph, 500 int pf) 501{ 502 struct ip_ct_tcp_state *sender = &state->seen[dir]; 503 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 504 __u32 seq, ack, sack, end, win, swin; 505 int res; 506 507 /* 508 * Get the required data from the packet. 509 */ 510 seq = ntohl(tcph->seq); 511 ack = sack = ntohl(tcph->ack_seq); 512 win = ntohs(tcph->window); 513 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); 514 515 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) 516 tcp_sack(skb, dataoff, tcph, &sack); 517 518 DEBUGP("tcp_in_window: START\n"); 519 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " 520 "seq=%u ack=%u sack=%u win=%u end=%u\n", 521 NIPQUAD(iph->saddr), ntohs(tcph->source), 522 NIPQUAD(iph->daddr), ntohs(tcph->dest), 523 seq, ack, sack, win, end); 524 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 525 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 526 sender->td_end, sender->td_maxend, sender->td_maxwin, 527 sender->td_scale, 528 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 529 receiver->td_scale); 530 531 if (sender->td_end == 0) { 532 /* 533 * Initialize sender data. 534 */ 535 if (tcph->syn && tcph->ack) { 536 /* 537 * Outgoing SYN-ACK in reply to a SYN. 538 */ 539 sender->td_end = 540 sender->td_maxend = end; 541 sender->td_maxwin = (win == 0 ? 1 : win); 542 543 tcp_options(skb, dataoff, tcph, sender); 544 /* 545 * RFC 1323: 546 * Both sides must send the Window Scale option 547 * to enable window scaling in either direction. 548 */ 549 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE 550 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) 551 sender->td_scale = 552 receiver->td_scale = 0; 553 } else { 554 /* 555 * We are in the middle of a connection, 556 * its history is lost for us. 557 * Let's try to use the data from the packet. 558 */ 559 sender->td_end = end; 560 sender->td_maxwin = (win == 0 ? 1 : win); 561 sender->td_maxend = end + sender->td_maxwin; 562 } 563 } else if (((state->state == TCP_CONNTRACK_SYN_SENT 564 && dir == IP_CT_DIR_ORIGINAL) 565 || (state->state == TCP_CONNTRACK_SYN_RECV 566 && dir == IP_CT_DIR_REPLY)) 567 && after(end, sender->td_end)) { 568 /* 569 * RFC 793: "if a TCP is reinitialized ... then it need 570 * not wait at all; it must only be sure to use sequence 571 * numbers larger than those recently used." 572 */ 573 sender->td_end = 574 sender->td_maxend = end; 575 sender->td_maxwin = (win == 0 ? 1 : win); 576 577 tcp_options(skb, dataoff, tcph, sender); 578 } 579 580 if (!(tcph->ack)) { 581 /* 582 * If there is no ACK, just pretend it was set and OK. 583 */ 584 ack = sack = receiver->td_end; 585 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == 586 (TCP_FLAG_ACK|TCP_FLAG_RST)) 587 && (ack == 0)) { 588 /* 589 * Broken TCP stacks, that set ACK in RST packets as well 590 * with zero ack value. 591 */ 592 ack = sack = receiver->td_end; 593 } 594 595 if (seq == end 596 && (!tcph->rst 597 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) 598 /* 599 * Packets contains no data: we assume it is valid 600 * and check the ack value only. 601 * However RST segments are always validated by their 602 * SEQ number, except when seq == 0 (reset sent answering 603 * SYN. 604 */ 605 seq = end = sender->td_end; 606 607 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " 608 "seq=%u ack=%u sack =%u win=%u end=%u\n", 609 NIPQUAD(iph->saddr), ntohs(tcph->source), 610 NIPQUAD(iph->daddr), ntohs(tcph->dest), 611 seq, ack, sack, win, end); 612 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 613 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 614 sender->td_end, sender->td_maxend, sender->td_maxwin, 615 sender->td_scale, 616 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 617 receiver->td_scale); 618 619 DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", 620 before(seq, sender->td_maxend + 1), 621 after(end, sender->td_end - receiver->td_maxwin - 1), 622 before(sack, receiver->td_end + 1), 623 after(ack, receiver->td_end - MAXACKWINDOW(sender))); 624 625 if (before(seq, sender->td_maxend + 1) && 626 after(end, sender->td_end - receiver->td_maxwin - 1) && 627 before(sack, receiver->td_end + 1) && 628 after(ack, receiver->td_end - MAXACKWINDOW(sender))) { 629 /* 630 * Take into account window scaling (RFC 1323). 631 */ 632 if (!tcph->syn) 633 win <<= sender->td_scale; 634 635 /* 636 * Update sender data. 637 */ 638 swin = win + (sack - ack); 639 if (sender->td_maxwin < swin) 640 sender->td_maxwin = swin; 641 if (after(end, sender->td_end)) 642 sender->td_end = end; 643 /* 644 * Update receiver data. 645 */ 646 if (after(end, sender->td_maxend)) 647 receiver->td_maxwin += end - sender->td_maxend; 648 if (after(sack + win, receiver->td_maxend - 1)) { 649 receiver->td_maxend = sack + win; 650 if (win == 0) 651 receiver->td_maxend++; 652 } 653 654 /* 655 * Check retransmissions. 656 */ 657 if (index == TCP_ACK_SET) { 658 if (state->last_dir == dir 659 && state->last_seq == seq 660 && state->last_ack == ack 661 && state->last_end == end 662 && state->last_win == win) 663 state->retrans++; 664 else { 665 state->last_dir = dir; 666 state->last_seq = seq; 667 state->last_ack = ack; 668 state->last_end = end; 669 state->last_win = win; 670 state->retrans = 0; 671 } 672 } 673 res = 1; 674 } else { 675 res = 0; 676 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || 677 nf_ct_tcp_be_liberal) 678 res = 1; 679 if (!res && LOG_INVALID(IPPROTO_TCP)) 680 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 681 "nf_ct_tcp: %s ", 682 before(seq, sender->td_maxend + 1) ? 683 after(end, sender->td_end - receiver->td_maxwin - 1) ? 684 before(sack, receiver->td_end + 1) ? 685 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" 686 : "ACK is under the lower bound (possible overly delayed ACK)" 687 : "ACK is over the upper bound (ACKed data not seen yet)" 688 : "SEQ is under the lower bound (already ACKed data retransmitted)" 689 : "SEQ is over the upper bound (over the window of the receiver)"); 690 } 691 692 DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " 693 "receiver end=%u maxend=%u maxwin=%u\n", 694 res, sender->td_end, sender->td_maxend, sender->td_maxwin, 695 receiver->td_end, receiver->td_maxend, receiver->td_maxwin); 696 697 return res; 698} 699 700#ifdef CONFIG_NF_NAT_NEEDED 701/* Update sender->td_end after NAT successfully mangled the packet */ 702/* Caller must linearize skb at tcp header. */ 703void nf_conntrack_tcp_update(struct sk_buff *skb, 704 unsigned int dataoff, 705 struct nf_conn *conntrack, 706 int dir) 707{ 708 struct tcphdr *tcph = (void *)skb->data + dataoff; 709 __u32 end; 710#ifdef DEBUGP_VARS 711 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; 712 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; 713#endif 714 715 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); 716 717 write_lock_bh(&tcp_lock); 718 /* 719 * We have to worry for the ack in the reply packet only... 720 */ 721 if (after(end, conntrack->proto.tcp.seen[dir].td_end)) 722 conntrack->proto.tcp.seen[dir].td_end = end; 723 conntrack->proto.tcp.last_end = end; 724 write_unlock_bh(&tcp_lock); 725 DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " 726 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 727 sender->td_end, sender->td_maxend, sender->td_maxwin, 728 sender->td_scale, 729 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 730 receiver->td_scale); 731} 732EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); 733#endif 734 735#define TH_FIN 0x01 736#define TH_SYN 0x02 737#define TH_RST 0x04 738#define TH_PUSH 0x08 739#define TH_ACK 0x10 740#define TH_URG 0x20 741#define TH_ECE 0x40 742#define TH_CWR 0x80 743 744/* table of valid flag combinations - PUSH, ECE and CWR are always valid */ 745static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = 746{ 747 [TH_SYN] = 1, 748 [TH_SYN|TH_URG] = 1, 749 [TH_SYN|TH_ACK] = 1, 750 [TH_RST] = 1, 751 [TH_RST|TH_ACK] = 1, 752 [TH_FIN|TH_ACK] = 1, 753 [TH_FIN|TH_ACK|TH_URG] = 1, 754 [TH_ACK] = 1, 755 [TH_ACK|TH_URG] = 1, 756}; 757 758/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 759static int tcp_error(struct sk_buff *skb, 760 unsigned int dataoff, 761 enum ip_conntrack_info *ctinfo, 762 int pf, 763 unsigned int hooknum) 764{ 765 struct tcphdr _tcph, *th; 766 unsigned int tcplen = skb->len - dataoff; 767 u_int8_t tcpflags; 768 769 /* Smaller that minimal TCP header? */ 770 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 771 if (th == NULL) { 772 if (LOG_INVALID(IPPROTO_TCP)) 773 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 774 "nf_ct_tcp: short packet "); 775 return -NF_ACCEPT; 776 } 777 778 /* Not whole TCP header or malformed packet */ 779 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 780 if (LOG_INVALID(IPPROTO_TCP)) 781 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 782 "nf_ct_tcp: truncated/malformed packet "); 783 return -NF_ACCEPT; 784 } 785 786 /* Checksum invalid? Ignore. 787 * We skip checking packets on the outgoing path 788 * because the checksum is assumed to be correct. 789 */ 790 if (nf_conntrack_checksum && 791 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || 792 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && 793 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 794 if (LOG_INVALID(IPPROTO_TCP)) 795 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 796 "nf_ct_tcp: bad TCP checksum "); 797 return -NF_ACCEPT; 798 } 799 800 /* Check TCP flags. */ 801 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); 802 if (!tcp_valid_flags[tcpflags]) { 803 if (LOG_INVALID(IPPROTO_TCP)) 804 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 805 "nf_ct_tcp: invalid TCP flag combination "); 806 return -NF_ACCEPT; 807 } 808 809 return NF_ACCEPT; 810} 811 812/* Returns verdict for packet, or -1 for invalid. */ 813static int tcp_packet(struct nf_conn *conntrack, 814 const struct sk_buff *skb, 815 unsigned int dataoff, 816 enum ip_conntrack_info ctinfo, 817 int pf, 818 unsigned int hooknum) 819{ 820 enum tcp_conntrack new_state, old_state; 821 enum ip_conntrack_dir dir; 822 struct tcphdr *th, _tcph; 823 unsigned long timeout; 824 unsigned int index; 825 826 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 827 BUG_ON(th == NULL); 828 829 write_lock_bh(&tcp_lock); 830 old_state = conntrack->proto.tcp.state; 831 dir = CTINFO2DIR(ctinfo); 832 index = get_conntrack_index(th); 833 new_state = tcp_conntracks[dir][index][old_state]; 834 835 switch (new_state) { 836 case TCP_CONNTRACK_IGNORE: 837 /* Ignored packets: 838 * 839 * a) SYN in ORIGINAL 840 * b) SYN/ACK in REPLY 841 * c) ACK in reply direction after initial SYN in original. 842 */ 843 if (index == TCP_SYNACK_SET 844 && conntrack->proto.tcp.last_index == TCP_SYN_SET 845 && conntrack->proto.tcp.last_dir != dir 846 && ntohl(th->ack_seq) == 847 conntrack->proto.tcp.last_end) { 848 /* This SYN/ACK acknowledges a SYN that we earlier 849 * ignored as invalid. This means that the client and 850 * the server are both in sync, while the firewall is 851 * not. We kill this session and block the SYN/ACK so 852 * that the client cannot but retransmit its SYN and 853 * thus initiate a clean new session. 854 */ 855 write_unlock_bh(&tcp_lock); 856 if (LOG_INVALID(IPPROTO_TCP)) 857 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 858 "nf_ct_tcp: killing out of sync session "); 859 if (del_timer(&conntrack->timeout)) 860 conntrack->timeout.function((unsigned long) 861 conntrack); 862 return -NF_DROP; 863 } 864 conntrack->proto.tcp.last_index = index; 865 conntrack->proto.tcp.last_dir = dir; 866 conntrack->proto.tcp.last_seq = ntohl(th->seq); 867 conntrack->proto.tcp.last_end = 868 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); 869 870 write_unlock_bh(&tcp_lock); 871 if (LOG_INVALID(IPPROTO_TCP)) 872 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 873 "nf_ct_tcp: invalid packed ignored "); 874 return NF_ACCEPT; 875 case TCP_CONNTRACK_MAX: 876 /* Invalid packet */ 877 DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 878 dir, get_conntrack_index(th), 879 old_state); 880 write_unlock_bh(&tcp_lock); 881 if (LOG_INVALID(IPPROTO_TCP)) 882 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 883 "nf_ct_tcp: invalid state "); 884 return -NF_ACCEPT; 885 case TCP_CONNTRACK_SYN_SENT: 886 if (old_state < TCP_CONNTRACK_TIME_WAIT) 887 break; 888 if ((conntrack->proto.tcp.seen[dir].flags & 889 IP_CT_TCP_FLAG_CLOSE_INIT) 890 || after(ntohl(th->seq), 891 conntrack->proto.tcp.seen[dir].td_end)) { 892 /* Attempt to reopen a closed connection. 893 * Delete this connection and look up again. */ 894 write_unlock_bh(&tcp_lock); 895 if (del_timer(&conntrack->timeout)) 896 conntrack->timeout.function((unsigned long) 897 conntrack); 898 return -NF_REPEAT; 899 } else { 900 write_unlock_bh(&tcp_lock); 901 if (LOG_INVALID(IPPROTO_TCP)) 902 nf_log_packet(pf, 0, skb, NULL, NULL, 903 NULL, "nf_ct_tcp: invalid SYN"); 904 return -NF_ACCEPT; 905 } 906 case TCP_CONNTRACK_CLOSE: 907 if (index == TCP_RST_SET 908 && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) 909 && conntrack->proto.tcp.last_index == TCP_SYN_SET) 910 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 911 && conntrack->proto.tcp.last_index == TCP_ACK_SET)) 912 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { 913 /* RST sent to invalid SYN or ACK we had let through 914 * at a) and c) above: 915 * 916 * a) SYN was in window then 917 * c) we hold a half-open connection. 918 * 919 * Delete our connection entry. 920 * We skip window checking, because packet might ACK 921 * segments we ignored. */ 922 goto in_window; 923 } 924 /* Just fall through */ 925 default: 926 /* Keep compilers happy. */ 927 break; 928 } 929 930#ifdef HNDCTF 931 /* Remove the ipc entries on receipt of FIN or RST */ 932 if (CTF_ENAB(kcih)) { 933 if (conntrack->ctf_flags & CTF_FLAGS_CACHED) { 934 if (th->fin || th->rst) { 935 ip_conntrack_ipct_delete(conntrack, 0); 936 } 937 goto in_window; 938 } 939 } 940#endif /* HNDCTF */ 941 942 if (!tcp_in_window(&conntrack->proto.tcp, dir, index, 943 skb, dataoff, th, pf)) { 944 write_unlock_bh(&tcp_lock); 945 return -NF_ACCEPT; 946 } 947 in_window: 948 /* From now on we have got in-window packets */ 949 conntrack->proto.tcp.last_index = index; 950 951 DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " 952 "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", 953 NIPQUAD(iph->saddr), ntohs(th->source), 954 NIPQUAD(iph->daddr), ntohs(th->dest), 955 (th->syn ? 1 : 0), (th->ack ? 1 : 0), 956 (th->fin ? 1 : 0), (th->rst ? 1 : 0), 957 old_state, new_state); 958 959 conntrack->proto.tcp.state = new_state; 960 if (old_state != new_state 961 && (new_state == TCP_CONNTRACK_FIN_WAIT 962 || new_state == TCP_CONNTRACK_CLOSE)) 963 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 964 timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans 965 && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans 966 ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; 967 write_unlock_bh(&tcp_lock); 968 969 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); 970 if (new_state != old_state) 971 nf_conntrack_event_cache(IPCT_PROTOINFO, skb); 972 973 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { 974 /* If only reply is a RST, we can consider ourselves not to 975 have an established connection: this is a fairly common 976 problem case, so we can delete the conntrack 977 immediately. --RR */ 978 if (th->rst) { 979 if (del_timer(&conntrack->timeout)) 980 conntrack->timeout.function((unsigned long) 981 conntrack); 982 return NF_ACCEPT; 983 } 984 } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 985 && (old_state == TCP_CONNTRACK_SYN_RECV 986 || old_state == TCP_CONNTRACK_ESTABLISHED) 987 && new_state == TCP_CONNTRACK_ESTABLISHED) { 988 /* Set ASSURED if we see see valid ack in ESTABLISHED 989 after SYN_RECV or a valid answer for a picked up 990 connection. */ 991 set_bit(IPS_ASSURED_BIT, &conntrack->status); 992 nf_conntrack_event_cache(IPCT_STATUS, skb); 993 } 994 nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); 995 996 return NF_ACCEPT; 997} 998 999/* Called when a new connection for this protocol found. */ 1000static int tcp_new(struct nf_conn *conntrack, 1001 const struct sk_buff *skb, 1002 unsigned int dataoff) 1003{ 1004 enum tcp_conntrack new_state; 1005 struct tcphdr *th, _tcph; 1006#ifdef DEBUGP_VARS 1007 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; 1008 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; 1009#endif 1010 1011 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 1012 BUG_ON(th == NULL); 1013 1014 /* Don't need lock here: this conntrack not in circulation yet */ 1015 new_state 1016 = tcp_conntracks[0][get_conntrack_index(th)] 1017 [TCP_CONNTRACK_NONE]; 1018 1019 /* Invalid: delete conntrack */ 1020 if (new_state >= TCP_CONNTRACK_MAX) { 1021 DEBUGP("nf_ct_tcp: invalid new deleting.\n"); 1022 return 0; 1023 } 1024 1025 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1026 /* SYN packet */ 1027 conntrack->proto.tcp.seen[0].td_end = 1028 segment_seq_plus_len(ntohl(th->seq), skb->len, 1029 dataoff, th); 1030 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1031 if (conntrack->proto.tcp.seen[0].td_maxwin == 0) 1032 conntrack->proto.tcp.seen[0].td_maxwin = 1; 1033 conntrack->proto.tcp.seen[0].td_maxend = 1034 conntrack->proto.tcp.seen[0].td_end; 1035 1036 tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); 1037 conntrack->proto.tcp.seen[1].flags = 0; 1038 } else if (nf_ct_tcp_loose == 0) { 1039 /* Don't try to pick up connections. */ 1040 return 0; 1041 } else { 1042 /* 1043 * We are in the middle of a connection, 1044 * its history is lost for us. 1045 * Let's try to use the data from the packet. 1046 */ 1047 conntrack->proto.tcp.seen[0].td_end = 1048 segment_seq_plus_len(ntohl(th->seq), skb->len, 1049 dataoff, th); 1050 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1051 if (conntrack->proto.tcp.seen[0].td_maxwin == 0) 1052 conntrack->proto.tcp.seen[0].td_maxwin = 1; 1053 conntrack->proto.tcp.seen[0].td_maxend = 1054 conntrack->proto.tcp.seen[0].td_end + 1055 conntrack->proto.tcp.seen[0].td_maxwin; 1056 conntrack->proto.tcp.seen[0].td_scale = 0; 1057 1058 /* We assume SACK and liberal window checking to handle 1059 * window scaling */ 1060 conntrack->proto.tcp.seen[0].flags = 1061 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | 1062 IP_CT_TCP_FLAG_BE_LIBERAL; 1063 } 1064 1065 conntrack->proto.tcp.seen[1].td_end = 0; 1066 conntrack->proto.tcp.seen[1].td_maxend = 0; 1067 conntrack->proto.tcp.seen[1].td_maxwin = 1; 1068 conntrack->proto.tcp.seen[1].td_scale = 0; 1069 1070 /* tcp_packet will set them */ 1071 conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; 1072 conntrack->proto.tcp.last_index = TCP_NONE_SET; 1073 1074 DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1075 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 1076 sender->td_end, sender->td_maxend, sender->td_maxwin, 1077 sender->td_scale, 1078 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 1079 receiver->td_scale); 1080 return 1; 1081} 1082 1083#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1084 1085#include <linux/netfilter/nfnetlink.h> 1086#include <linux/netfilter/nfnetlink_conntrack.h> 1087 1088static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, 1089 const struct nf_conn *ct) 1090{ 1091 struct nfattr *nest_parms; 1092 struct nf_ct_tcp_flags tmp = {}; 1093 1094 read_lock_bh(&tcp_lock); 1095 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); 1096 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), 1097 &ct->proto.tcp.state); 1098 1099 NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), 1100 &ct->proto.tcp.seen[0].td_scale); 1101 1102 NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), 1103 &ct->proto.tcp.seen[1].td_scale); 1104 1105 tmp.flags = ct->proto.tcp.seen[0].flags; 1106 NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 1107 sizeof(struct nf_ct_tcp_flags), &tmp); 1108 1109 tmp.flags = ct->proto.tcp.seen[1].flags; 1110 NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, 1111 sizeof(struct nf_ct_tcp_flags), &tmp); 1112 read_unlock_bh(&tcp_lock); 1113 1114 NFA_NEST_END(skb, nest_parms); 1115 1116 return 0; 1117 1118nfattr_failure: 1119 read_unlock_bh(&tcp_lock); 1120 return -1; 1121} 1122 1123static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { 1124 [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), 1125 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t), 1126 [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t), 1127 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1] = sizeof(struct nf_ct_tcp_flags), 1128 [CTA_PROTOINFO_TCP_FLAGS_REPLY-1] = sizeof(struct nf_ct_tcp_flags) 1129}; 1130 1131static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct) 1132{ 1133 struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; 1134 struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; 1135 1136 /* updates could not contain anything about the private 1137 * protocol info, in that case skip the parsing */ 1138 if (!attr) 1139 return 0; 1140 1141 nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); 1142 1143 if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) 1144 return -EINVAL; 1145 1146 if (!tb[CTA_PROTOINFO_TCP_STATE-1]) 1147 return -EINVAL; 1148 1149 write_lock_bh(&tcp_lock); 1150 ct->proto.tcp.state = 1151 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); 1152 1153 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) { 1154 struct nf_ct_tcp_flags *attr = 1155 NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]); 1156 ct->proto.tcp.seen[0].flags &= ~attr->mask; 1157 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; 1158 } 1159 1160 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) { 1161 struct nf_ct_tcp_flags *attr = 1162 NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]); 1163 ct->proto.tcp.seen[1].flags &= ~attr->mask; 1164 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; 1165 } 1166 1167 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] && 1168 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] && 1169 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && 1170 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 1171 ct->proto.tcp.seen[0].td_scale = *(u_int8_t *) 1172 NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]); 1173 ct->proto.tcp.seen[1].td_scale = *(u_int8_t *) 1174 NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]); 1175 } 1176 write_unlock_bh(&tcp_lock); 1177 1178 return 0; 1179} 1180#endif 1181 1182#ifdef CONFIG_SYSCTL 1183static unsigned int tcp_sysctl_table_users; 1184static struct ctl_table_header *tcp_sysctl_header; 1185static struct ctl_table tcp_sysctl_table[] = { 1186 { 1187 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, 1188 .procname = "nf_conntrack_tcp_timeout_syn_sent", 1189 .data = &nf_ct_tcp_timeout_syn_sent, 1190 .maxlen = sizeof(unsigned int), 1191 .mode = 0644, 1192 .proc_handler = &proc_dointvec_jiffies, 1193 }, 1194 { 1195 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, 1196 .procname = "nf_conntrack_tcp_timeout_syn_recv", 1197 .data = &nf_ct_tcp_timeout_syn_recv, 1198 .maxlen = sizeof(unsigned int), 1199 .mode = 0644, 1200 .proc_handler = &proc_dointvec_jiffies, 1201 }, 1202 { 1203 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, 1204 .procname = "nf_conntrack_tcp_timeout_established", 1205 .data = &nf_ct_tcp_timeout_established, 1206 .maxlen = sizeof(unsigned int), 1207 .mode = 0644, 1208 .proc_handler = &proc_dointvec_jiffies, 1209 }, 1210 { 1211 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, 1212 .procname = "nf_conntrack_tcp_timeout_fin_wait", 1213 .data = &nf_ct_tcp_timeout_fin_wait, 1214 .maxlen = sizeof(unsigned int), 1215 .mode = 0644, 1216 .proc_handler = &proc_dointvec_jiffies, 1217 }, 1218 { 1219 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, 1220 .procname = "nf_conntrack_tcp_timeout_close_wait", 1221 .data = &nf_ct_tcp_timeout_close_wait, 1222 .maxlen = sizeof(unsigned int), 1223 .mode = 0644, 1224 .proc_handler = &proc_dointvec_jiffies, 1225 }, 1226 { 1227 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, 1228 .procname = "nf_conntrack_tcp_timeout_last_ack", 1229 .data = &nf_ct_tcp_timeout_last_ack, 1230 .maxlen = sizeof(unsigned int), 1231 .mode = 0644, 1232 .proc_handler = &proc_dointvec_jiffies, 1233 }, 1234 { 1235 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, 1236 .procname = "nf_conntrack_tcp_timeout_time_wait", 1237 .data = &nf_ct_tcp_timeout_time_wait, 1238 .maxlen = sizeof(unsigned int), 1239 .mode = 0644, 1240 .proc_handler = &proc_dointvec_jiffies, 1241 }, 1242 { 1243 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, 1244 .procname = "nf_conntrack_tcp_timeout_close", 1245 .data = &nf_ct_tcp_timeout_close, 1246 .maxlen = sizeof(unsigned int), 1247 .mode = 0644, 1248 .proc_handler = &proc_dointvec_jiffies, 1249 }, 1250 { 1251 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, 1252 .procname = "nf_conntrack_tcp_timeout_max_retrans", 1253 .data = &nf_ct_tcp_timeout_max_retrans, 1254 .maxlen = sizeof(unsigned int), 1255 .mode = 0644, 1256 .proc_handler = &proc_dointvec_jiffies, 1257 }, 1258 { 1259 .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, 1260 .procname = "nf_conntrack_tcp_loose", 1261 .data = &nf_ct_tcp_loose, 1262 .maxlen = sizeof(unsigned int), 1263 .mode = 0644, 1264 .proc_handler = &proc_dointvec, 1265 }, 1266 { 1267 .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, 1268 .procname = "nf_conntrack_tcp_be_liberal", 1269 .data = &nf_ct_tcp_be_liberal, 1270 .maxlen = sizeof(unsigned int), 1271 .mode = 0644, 1272 .proc_handler = &proc_dointvec, 1273 }, 1274 { 1275 .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, 1276 .procname = "nf_conntrack_tcp_max_retrans", 1277 .data = &nf_ct_tcp_max_retrans, 1278 .maxlen = sizeof(unsigned int), 1279 .mode = 0644, 1280 .proc_handler = &proc_dointvec, 1281 }, 1282 { 1283 .ctl_name = 0 1284 } 1285}; 1286 1287#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT 1288static struct ctl_table tcp_compat_sysctl_table[] = { 1289 { 1290 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, 1291 .procname = "ip_conntrack_tcp_timeout_syn_sent", 1292 .data = &nf_ct_tcp_timeout_syn_sent, 1293 .maxlen = sizeof(unsigned int), 1294 .mode = 0644, 1295 .proc_handler = &proc_dointvec_jiffies, 1296 }, 1297 { 1298 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, 1299 .procname = "ip_conntrack_tcp_timeout_syn_recv", 1300 .data = &nf_ct_tcp_timeout_syn_recv, 1301 .maxlen = sizeof(unsigned int), 1302 .mode = 0644, 1303 .proc_handler = &proc_dointvec_jiffies, 1304 }, 1305 { 1306 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, 1307 .procname = "ip_conntrack_tcp_timeout_established", 1308 .data = &nf_ct_tcp_timeout_established, 1309 .maxlen = sizeof(unsigned int), 1310 .mode = 0644, 1311 .proc_handler = &proc_dointvec_jiffies, 1312 }, 1313 { 1314 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, 1315 .procname = "ip_conntrack_tcp_timeout_fin_wait", 1316 .data = &nf_ct_tcp_timeout_fin_wait, 1317 .maxlen = sizeof(unsigned int), 1318 .mode = 0644, 1319 .proc_handler = &proc_dointvec_jiffies, 1320 }, 1321 { 1322 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, 1323 .procname = "ip_conntrack_tcp_timeout_close_wait", 1324 .data = &nf_ct_tcp_timeout_close_wait, 1325 .maxlen = sizeof(unsigned int), 1326 .mode = 0644, 1327 .proc_handler = &proc_dointvec_jiffies, 1328 }, 1329 { 1330 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, 1331 .procname = "ip_conntrack_tcp_timeout_last_ack", 1332 .data = &nf_ct_tcp_timeout_last_ack, 1333 .maxlen = sizeof(unsigned int), 1334 .mode = 0644, 1335 .proc_handler = &proc_dointvec_jiffies, 1336 }, 1337 { 1338 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, 1339 .procname = "ip_conntrack_tcp_timeout_time_wait", 1340 .data = &nf_ct_tcp_timeout_time_wait, 1341 .maxlen = sizeof(unsigned int), 1342 .mode = 0644, 1343 .proc_handler = &proc_dointvec_jiffies, 1344 }, 1345 { 1346 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, 1347 .procname = "ip_conntrack_tcp_timeout_close", 1348 .data = &nf_ct_tcp_timeout_close, 1349 .maxlen = sizeof(unsigned int), 1350 .mode = 0644, 1351 .proc_handler = &proc_dointvec_jiffies, 1352 }, 1353 { 1354 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, 1355 .procname = "ip_conntrack_tcp_timeout_max_retrans", 1356 .data = &nf_ct_tcp_timeout_max_retrans, 1357 .maxlen = sizeof(unsigned int), 1358 .mode = 0644, 1359 .proc_handler = &proc_dointvec_jiffies, 1360 }, 1361 { 1362 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, 1363 .procname = "ip_conntrack_tcp_loose", 1364 .data = &nf_ct_tcp_loose, 1365 .maxlen = sizeof(unsigned int), 1366 .mode = 0644, 1367 .proc_handler = &proc_dointvec, 1368 }, 1369 { 1370 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, 1371 .procname = "ip_conntrack_tcp_be_liberal", 1372 .data = &nf_ct_tcp_be_liberal, 1373 .maxlen = sizeof(unsigned int), 1374 .mode = 0644, 1375 .proc_handler = &proc_dointvec, 1376 }, 1377 { 1378 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, 1379 .procname = "ip_conntrack_tcp_max_retrans", 1380 .data = &nf_ct_tcp_max_retrans, 1381 .maxlen = sizeof(unsigned int), 1382 .mode = 0644, 1383 .proc_handler = &proc_dointvec, 1384 }, 1385 { 1386 .ctl_name = 0 1387 } 1388}; 1389#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ 1390#endif /* CONFIG_SYSCTL */ 1391 1392struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = 1393{ 1394 .l3proto = PF_INET, 1395 .l4proto = IPPROTO_TCP, 1396 .name = "tcp", 1397 .pkt_to_tuple = tcp_pkt_to_tuple, 1398 .invert_tuple = tcp_invert_tuple, 1399 .print_tuple = tcp_print_tuple, 1400 .print_conntrack = tcp_print_conntrack, 1401 .packet = tcp_packet, 1402 .new = tcp_new, 1403 .error = tcp_error, 1404#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1405 .to_nfattr = tcp_to_nfattr, 1406 .from_nfattr = nfattr_to_tcp, 1407 .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, 1408 .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, 1409#endif 1410#ifdef CONFIG_SYSCTL 1411 .ctl_table_users = &tcp_sysctl_table_users, 1412 .ctl_table_header = &tcp_sysctl_header, 1413 .ctl_table = tcp_sysctl_table, 1414#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT 1415 .ctl_compat_table = tcp_compat_sysctl_table, 1416#endif 1417#endif 1418}; 1419EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); 1420 1421struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = 1422{ 1423 .l3proto = PF_INET6, 1424 .l4proto = IPPROTO_TCP, 1425 .name = "tcp", 1426 .pkt_to_tuple = tcp_pkt_to_tuple, 1427 .invert_tuple = tcp_invert_tuple, 1428 .print_tuple = tcp_print_tuple, 1429 .print_conntrack = tcp_print_conntrack, 1430 .packet = tcp_packet, 1431 .new = tcp_new, 1432 .error = tcp_error, 1433#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1434 .to_nfattr = tcp_to_nfattr, 1435 .from_nfattr = nfattr_to_tcp, 1436 .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, 1437 .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, 1438#endif 1439#ifdef CONFIG_SYSCTL 1440 .ctl_table_users = &tcp_sysctl_table_users, 1441 .ctl_table_header = &tcp_sysctl_header, 1442 .ctl_table = tcp_sysctl_table, 1443#endif 1444}; 1445EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); 1446