1 2/* (C) 1999-2001 Paul `Rusty' Russell 3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation. 8 */ 9 10#include <linux/types.h> 11#include <linux/ip.h> 12#include <linux/netfilter.h> 13#include <linux/module.h> 14#include <linux/skbuff.h> 15#include <linux/icmp.h> 16#include <linux/sysctl.h> 17#include <net/route.h> 18#include <net/ip.h> 19 20#include <linux/netfilter_ipv4.h> 21#include <net/netfilter/nf_conntrack.h> 22#include <net/netfilter/nf_conntrack_helper.h> 23#include <net/netfilter/nf_conntrack_l4proto.h> 24#include <net/netfilter/nf_conntrack_l3proto.h> 25#include <net/netfilter/nf_conntrack_zones.h> 26#include <net/netfilter/nf_conntrack_core.h> 27#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 28#include <net/netfilter/nf_nat_helper.h> 29#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 30#include <net/netfilter/nf_log.h> 31 32int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, 33 struct nf_conn *ct, 34 enum ip_conntrack_info ctinfo); 35EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); 36 37static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 38 struct nf_conntrack_tuple *tuple) 39{ 40 const __be32 *ap; 41 __be32 _addrs[2]; 42 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 43 sizeof(u_int32_t) * 2, _addrs); 44 if (ap == NULL) 45 return false; 46 47 tuple->src.u3.ip = ap[0]; 48 tuple->dst.u3.ip = ap[1]; 49 50 return true; 51} 52 53static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, 54 const struct nf_conntrack_tuple *orig) 55{ 56 tuple->src.u3.ip = orig->dst.u3.ip; 57 tuple->dst.u3.ip = orig->src.u3.ip; 58 59 return true; 60} 61 62static int ipv4_print_tuple(struct seq_file *s, 63 const struct nf_conntrack_tuple *tuple) 64{ 65 return seq_printf(s, "src=%pI4 dst=%pI4 ", 66 &tuple->src.u3.ip, &tuple->dst.u3.ip); 67} 68 69static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 70 unsigned int *dataoff, u_int8_t *protonum) 71{ 72 const struct iphdr *iph; 73 struct iphdr _iph; 74 75 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 76 if (iph == NULL) 77 return -NF_DROP; 78 79 /* Conntrack defragments packets, we might still see fragments 80 * inside ICMP packets though. */ 81 if (iph->frag_off & htons(IP_OFFSET)) 82 return -NF_DROP; 83 84 *dataoff = nhoff + (iph->ihl << 2); 85 *protonum = iph->protocol; 86 87 return NF_ACCEPT; 88} 89 90static unsigned int ipv4_confirm(unsigned int hooknum, 91 struct sk_buff *skb, 92 const struct net_device *in, 93 const struct net_device *out, 94 int (*okfn)(struct sk_buff *)) 95{ 96 struct nf_conn *ct; 97 enum ip_conntrack_info ctinfo; 98 const struct nf_conn_help *help; 99 const struct nf_conntrack_helper *helper; 100 unsigned int ret; 101 102 /* This is where we call the helper: as the packet goes out. */ 103 ct = nf_ct_get(skb, &ctinfo); 104 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) 105 goto out; 106 107 help = nfct_help(ct); 108 if (!help) 109 goto out; 110 111 /* rcu_read_lock()ed by nf_hook_slow */ 112 helper = rcu_dereference(help->helper); 113 if (!helper) 114 goto out; 115 116 ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 117 ct, ctinfo); 118 if (ret != NF_ACCEPT) { 119 nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, 120 "nf_ct_%s: dropping packet", helper->name); 121 return ret; 122 } 123 124 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { 125 typeof(nf_nat_seq_adjust_hook) seq_adjust; 126 127 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); 128 if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { 129 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 130 return NF_DROP; 131 } 132 } 133out: 134 /* We've seen it coming out the other side: confirm it */ 135 return nf_conntrack_confirm(skb); 136} 137 138static unsigned int ipv4_conntrack_in(unsigned int hooknum, 139 struct sk_buff *skb, 140 const struct net_device *in, 141 const struct net_device *out, 142 int (*okfn)(struct sk_buff *)) 143{ 144 return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); 145} 146 147static unsigned int ipv4_conntrack_local(unsigned int hooknum, 148 struct sk_buff *skb, 149 const struct net_device *in, 150 const struct net_device *out, 151 int (*okfn)(struct sk_buff *)) 152{ 153 /* root is playing with raw sockets. */ 154 if (skb->len < sizeof(struct iphdr) || 155 ip_hdrlen(skb) < sizeof(struct iphdr)) 156 return NF_ACCEPT; 157 return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); 158} 159 160/* Connection tracking may drop packets, but never alters them, so 161 make it the first hook. */ 162static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { 163 { 164 .hook = ipv4_conntrack_in, 165 .owner = THIS_MODULE, 166 .pf = NFPROTO_IPV4, 167 .hooknum = NF_INET_PRE_ROUTING, 168 .priority = NF_IP_PRI_CONNTRACK, 169 }, 170 { 171 .hook = ipv4_conntrack_local, 172 .owner = THIS_MODULE, 173 .pf = NFPROTO_IPV4, 174 .hooknum = NF_INET_LOCAL_OUT, 175 .priority = NF_IP_PRI_CONNTRACK, 176 }, 177 { 178 .hook = ipv4_confirm, 179 .owner = THIS_MODULE, 180 .pf = NFPROTO_IPV4, 181 .hooknum = NF_INET_POST_ROUTING, 182 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 183 }, 184 { 185 .hook = ipv4_confirm, 186 .owner = THIS_MODULE, 187 .pf = NFPROTO_IPV4, 188 .hooknum = NF_INET_LOCAL_IN, 189 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 190 }, 191}; 192 193#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 194static int log_invalid_proto_min = 0; 195static int log_invalid_proto_max = 255; 196 197static ctl_table ip_ct_sysctl_table[] = { 198 { 199 .procname = "ip_conntrack_max", 200 .data = &nf_conntrack_max, 201 .maxlen = sizeof(int), 202 .mode = 0644, 203 .proc_handler = proc_dointvec, 204 }, 205 { 206 .procname = "ip_conntrack_count", 207 .data = &init_net.ct.count, 208 .maxlen = sizeof(int), 209 .mode = 0444, 210 .proc_handler = proc_dointvec, 211 }, 212 { 213 .procname = "ip_conntrack_buckets", 214 .data = &init_net.ct.htable_size, 215 .maxlen = sizeof(unsigned int), 216 .mode = 0444, 217 .proc_handler = proc_dointvec, 218 }, 219 { 220 .procname = "ip_conntrack_checksum", 221 .data = &init_net.ct.sysctl_checksum, 222 .maxlen = sizeof(int), 223 .mode = 0644, 224 .proc_handler = proc_dointvec, 225 }, 226 { 227 .procname = "ip_conntrack_log_invalid", 228 .data = &init_net.ct.sysctl_log_invalid, 229 .maxlen = sizeof(unsigned int), 230 .mode = 0644, 231 .proc_handler = proc_dointvec_minmax, 232 .extra1 = &log_invalid_proto_min, 233 .extra2 = &log_invalid_proto_max, 234 }, 235 { } 236}; 237#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ 238 239/* Fast function for those who don't want to parse /proc (and I don't 240 blame them). */ 241/* Reversing the socket's dst/src point of view gives us the reply 242 mapping. */ 243static int 244getorigdst(struct sock *sk, int optval, void __user *user, int *len) 245{ 246 const struct inet_sock *inet = inet_sk(sk); 247 const struct nf_conntrack_tuple_hash *h; 248 struct nf_conntrack_tuple tuple; 249 250 memset(&tuple, 0, sizeof(tuple)); 251 tuple.src.u3.ip = inet->inet_rcv_saddr; 252 tuple.src.u.tcp.port = inet->inet_sport; 253 tuple.dst.u3.ip = inet->inet_daddr; 254 tuple.dst.u.tcp.port = inet->inet_dport; 255 tuple.src.l3num = PF_INET; 256 tuple.dst.protonum = sk->sk_protocol; 257 258 /* We only do TCP and SCTP at the moment: is there a better way? */ 259 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { 260 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 261 return -ENOPROTOOPT; 262 } 263 264 if ((unsigned int) *len < sizeof(struct sockaddr_in)) { 265 pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n", 266 *len, sizeof(struct sockaddr_in)); 267 return -EINVAL; 268 } 269 270 h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); 271 if (h) { 272 struct sockaddr_in sin; 273 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 274 275 sin.sin_family = AF_INET; 276 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 277 .tuple.dst.u.tcp.port; 278 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 279 .tuple.dst.u3.ip; 280 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 281 282 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", 283 &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 284 nf_ct_put(ct); 285 if (copy_to_user(user, &sin, sizeof(sin)) != 0) 286 return -EFAULT; 287 else 288 return 0; 289 } 290 pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", 291 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), 292 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); 293 return -ENOENT; 294} 295 296#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 297 298#include <linux/netfilter/nfnetlink.h> 299#include <linux/netfilter/nfnetlink_conntrack.h> 300 301static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 302 const struct nf_conntrack_tuple *tuple) 303{ 304 NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip); 305 NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip); 306 return 0; 307 308nla_put_failure: 309 return -1; 310} 311 312static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { 313 [CTA_IP_V4_SRC] = { .type = NLA_U32 }, 314 [CTA_IP_V4_DST] = { .type = NLA_U32 }, 315}; 316 317static int ipv4_nlattr_to_tuple(struct nlattr *tb[], 318 struct nf_conntrack_tuple *t) 319{ 320 if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 321 return -EINVAL; 322 323 t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]); 324 t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]); 325 326 return 0; 327} 328 329static int ipv4_nlattr_tuple_size(void) 330{ 331 return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); 332} 333#endif 334 335static struct nf_sockopt_ops so_getorigdst = { 336 .pf = PF_INET, 337 .get_optmin = SO_ORIGINAL_DST, 338 .get_optmax = SO_ORIGINAL_DST+1, 339 .get = &getorigdst, 340 .owner = THIS_MODULE, 341}; 342 343struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { 344 .l3proto = PF_INET, 345 .name = "ipv4", 346 .pkt_to_tuple = ipv4_pkt_to_tuple, 347 .invert_tuple = ipv4_invert_tuple, 348 .print_tuple = ipv4_print_tuple, 349 .get_l4proto = ipv4_get_l4proto, 350#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 351 .tuple_to_nlattr = ipv4_tuple_to_nlattr, 352 .nlattr_tuple_size = ipv4_nlattr_tuple_size, 353 .nlattr_to_tuple = ipv4_nlattr_to_tuple, 354 .nla_policy = ipv4_nla_policy, 355#endif 356#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 357 .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, 358 .ctl_table = ip_ct_sysctl_table, 359#endif 360 .me = THIS_MODULE, 361}; 362 363module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 364 &nf_conntrack_htable_size, 0600); 365 366MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 367MODULE_ALIAS("ip_conntrack"); 368MODULE_LICENSE("GPL"); 369 370static int __init nf_conntrack_l3proto_ipv4_init(void) 371{ 372 int ret = 0; 373 374 need_conntrack(); 375 nf_defrag_ipv4_enable(); 376 377 ret = nf_register_sockopt(&so_getorigdst); 378 if (ret < 0) { 379 printk(KERN_ERR "Unable to register netfilter socket option\n"); 380 return ret; 381 } 382 383 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); 384 if (ret < 0) { 385 pr_err("nf_conntrack_ipv4: can't register tcp.\n"); 386 goto cleanup_sockopt; 387 } 388 389 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); 390 if (ret < 0) { 391 pr_err("nf_conntrack_ipv4: can't register udp.\n"); 392 goto cleanup_tcp; 393 } 394 395 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); 396 if (ret < 0) { 397 pr_err("nf_conntrack_ipv4: can't register icmp.\n"); 398 goto cleanup_udp; 399 } 400 401 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); 402 if (ret < 0) { 403 pr_err("nf_conntrack_ipv4: can't register ipv4\n"); 404 goto cleanup_icmp; 405 } 406 407 ret = nf_register_hooks(ipv4_conntrack_ops, 408 ARRAY_SIZE(ipv4_conntrack_ops)); 409 if (ret < 0) { 410 pr_err("nf_conntrack_ipv4: can't register hooks.\n"); 411 goto cleanup_ipv4; 412 } 413#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 414 ret = nf_conntrack_ipv4_compat_init(); 415 if (ret < 0) 416 goto cleanup_hooks; 417#endif 418 return ret; 419#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 420 cleanup_hooks: 421 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); 422#endif 423 cleanup_ipv4: 424 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 425 cleanup_icmp: 426 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); 427 cleanup_udp: 428 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); 429 cleanup_tcp: 430 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); 431 cleanup_sockopt: 432 nf_unregister_sockopt(&so_getorigdst); 433 return ret; 434} 435 436static void __exit nf_conntrack_l3proto_ipv4_fini(void) 437{ 438 synchronize_net(); 439#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 440 nf_conntrack_ipv4_compat_fini(); 441#endif 442 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); 443 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 444 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); 445 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); 446 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); 447 nf_unregister_sockopt(&so_getorigdst); 448} 449 450module_init(nf_conntrack_l3proto_ipv4_init); 451module_exit(nf_conntrack_l3proto_ipv4_fini); 452 453void need_ipv4_conntrack(void) 454{ 455 return; 456} 457EXPORT_SYMBOL_GPL(need_ipv4_conntrack); 458