1/* NAT for netfilter; shared with compatibility layer. */ 2 3/* (C) 1999-2001 Paul `Rusty' Russell 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/module.h> 12#include <linux/types.h> 13#include <linux/timer.h> 14#include <linux/skbuff.h> 15#include <linux/gfp.h> 16#include <net/checksum.h> 17#include <net/icmp.h> 18#include <net/ip.h> 19#include <net/tcp.h> /* For tcp_prot in getorigdst */ 20#include <linux/icmp.h> 21#include <linux/udp.h> 22#include <linux/jhash.h> 23 24#include <linux/netfilter_ipv4.h> 25#include <net/netfilter/nf_conntrack.h> 26#include <net/netfilter/nf_conntrack_core.h> 27#include <net/netfilter/nf_nat.h> 28#include <net/netfilter/nf_nat_protocol.h> 29#include <net/netfilter/nf_nat_core.h> 30#include <net/netfilter/nf_nat_helper.h> 31#include <net/netfilter/nf_conntrack_helper.h> 32#include <net/netfilter/nf_conntrack_l3proto.h> 33#include <net/netfilter/nf_conntrack_l4proto.h> 34#include <net/netfilter/nf_conntrack_zones.h> 35#ifdef CONFIG_IP_NF_TARGET_CONE 36#include <linux/netfilter_ipv4/ipt_cone.h> 37#endif /* CONFIG_IP_NF_TARGET_CONE */ 38#ifdef HNDCTF 39#include <linux/if.h> 40#include <linux/if_vlan.h> 41#include <typedefs.h> 42#include <osl.h> 43#include <ctf/hndctf.h> 44 45#define NFC_CTF_ENABLED (1 << 31) 46#endif /* HNDCTF */ 47 48static DEFINE_SPINLOCK(nf_nat_lock); 49 50static struct nf_conntrack_l3proto *l3proto __read_mostly; 51 52#define MAX_IP_NAT_PROTO 256 53static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] 54 __read_mostly; 55 56static inline const struct nf_nat_protocol * 57__nf_nat_proto_find(u_int8_t protonum) 58{ 59 return rcu_dereference(nf_nat_protos[protonum]); 60} 61 62const struct nf_nat_protocol * 63nf_nat_proto_find_get(u_int8_t protonum) 64{ 65 const struct nf_nat_protocol *p; 66 67 rcu_read_lock(); 68 p = __nf_nat_proto_find(protonum); 69 if (!try_module_get(p->me)) 70 p = &nf_nat_unknown_protocol; 71 rcu_read_unlock(); 72 73 return p; 74} 75EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); 76 77void 78nf_nat_proto_put(const struct nf_nat_protocol *p) 79{ 80 module_put(p->me); 81} 82EXPORT_SYMBOL_GPL(nf_nat_proto_put); 83 84/* We keep an extra hash for each conntrack, for fast searching. */ 85static inline unsigned int 86hash_by_src(const struct net *net, u16 zone, 87 const struct nf_conntrack_tuple *tuple) 88{ 89 unsigned int hash; 90 91 /* Original src, to ensure we map it consistently if poss. */ 92 hash = jhash_3words((__force u32)tuple->src.u3.ip, 93 (__force u32)tuple->src.u.all ^ zone, 94 tuple->dst.protonum, 0); 95 return ((u64)hash * net->ipv4.nat_htable_size) >> 32; 96} 97 98#ifdef HNDCTF 99extern void ip_conntrack_ipct_add(struct sk_buff *skb, u_int32_t hooknum, 100 struct nf_conn *ct, enum ip_conntrack_info ci, 101 struct nf_conntrack_tuple *manip); 102#endif /* HNDCTF */ 103 104/* Is this tuple already taken? (not by us) */ 105int 106nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, 107 const struct nf_conn *ignored_conntrack) 108{ 109 /* Conntrack tracking doesn't keep track of outgoing tuples; only 110 incoming ones. NAT means they don't have a fixed mapping, 111 so we invert the tuple and look for the incoming reply. 112 113 We could keep a separate hash if this proves too slow. */ 114 struct nf_conntrack_tuple reply; 115 116 nf_ct_invert_tuplepr(&reply, tuple); 117 return nf_conntrack_tuple_taken(&reply, ignored_conntrack); 118} 119EXPORT_SYMBOL(nf_nat_used_tuple); 120 121/* If we source map this tuple so reply looks like reply_tuple, will 122 * that meet the constraints of range. */ 123static int 124in_range(const struct nf_conntrack_tuple *tuple, 125 const struct nf_nat_range *range) 126{ 127 const struct nf_nat_protocol *proto; 128 int ret = 0; 129 130 /* If we are supposed to map IPs, then we must be in the 131 range specified, otherwise let this drag us onto a new src IP. */ 132 if (range->flags & IP_NAT_RANGE_MAP_IPS) { 133 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || 134 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) 135 return 0; 136 } 137 138 rcu_read_lock(); 139 proto = __nf_nat_proto_find(tuple->dst.protonum); 140 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 141 proto->in_range(tuple, IP_NAT_MANIP_SRC, 142 &range->min, &range->max)) 143 ret = 1; 144 rcu_read_unlock(); 145 146 return ret; 147} 148 149static inline int 150same_src(const struct nf_conn *ct, 151 const struct nf_conntrack_tuple *tuple) 152{ 153 const struct nf_conntrack_tuple *t; 154 155 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 156 return (t->dst.protonum == tuple->dst.protonum && 157 t->src.u3.ip == tuple->src.u3.ip && 158 t->src.u.all == tuple->src.u.all); 159} 160 161/* Only called for SRC manip */ 162static int 163find_appropriate_src(struct net *net, u16 zone, 164 const struct nf_conntrack_tuple *tuple, 165 struct nf_conntrack_tuple *result, 166 const struct nf_nat_range *range) 167{ 168 unsigned int h = hash_by_src(net, zone, tuple); 169 const struct nf_conn_nat *nat; 170 const struct nf_conn *ct; 171 const struct hlist_node *n; 172 173 rcu_read_lock(); 174 hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { 175 ct = nat->ct; 176 if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { 177 /* Copy source part from reply tuple. */ 178 nf_ct_invert_tuplepr(result, 179 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 180 result->dst = tuple->dst; 181 182 if (in_range(result, range)) { 183 rcu_read_unlock(); 184 return 1; 185 } 186 } 187 } 188 rcu_read_unlock(); 189 return 0; 190} 191 192/* For [FUTURE] fragmentation handling, we want the least-used 193 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus 194 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports 195 1-65535, we don't do pro-rata allocation based on ports; we choose 196 the ip with the lowest src-ip/dst-ip/proto usage. 197*/ 198static void 199find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, 200 const struct nf_nat_range *range, 201 const struct nf_conn *ct, 202 enum nf_nat_manip_type maniptype) 203{ 204 __be32 *var_ipp; 205 /* Host order */ 206 u_int32_t minip, maxip, j; 207 208 /* No IP mapping? Do nothing. */ 209 if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) 210 return; 211 212 if (maniptype == IP_NAT_MANIP_SRC) 213 var_ipp = &tuple->src.u3.ip; 214 else 215 var_ipp = &tuple->dst.u3.ip; 216 217 /* Fast path: only one choice. */ 218 if (range->min_ip == range->max_ip) { 219 *var_ipp = range->min_ip; 220 return; 221 } 222 223 /* Hashing source and destination IPs gives a fairly even 224 * spread in practice (if there are a small number of IPs 225 * involved, there usually aren't that many connections 226 * anyway). The consistency means that servers see the same 227 * client coming from the same IP (some Internet Banking sites 228 * like this), even across reboots. */ 229 minip = ntohl(range->min_ip); 230 maxip = ntohl(range->max_ip); 231 j = jhash_2words((__force u32)tuple->src.u3.ip, 232 range->flags & IP_NAT_RANGE_PERSISTENT ? 233 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); 234 j = ((u64)j * (maxip - minip + 1)) >> 32; 235 *var_ipp = htonl(minip + j); 236} 237 238/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, 239 * we change the source to map into the range. For NF_INET_PRE_ROUTING 240 * and NF_INET_LOCAL_OUT, we change the destination to map into the 241 * range. It might not be possible to get a unique tuple, but we try. 242 * At worst (or if we race), we will end up with a final duplicate in 243 * __ip_conntrack_confirm and drop the packet. */ 244static void 245get_unique_tuple(struct nf_conntrack_tuple *tuple, 246 const struct nf_conntrack_tuple *orig_tuple, 247 const struct nf_nat_range *range, 248 struct nf_conn *ct, 249 enum nf_nat_manip_type maniptype) 250{ 251 struct net *net = nf_ct_net(ct); 252 const struct nf_nat_protocol *proto; 253 u16 zone = nf_ct_zone(ct); 254 255 /* 1) If this srcip/proto/src-proto-part is currently mapped, 256 and that same mapping gives a unique tuple within the given 257 range, use that. 258 259 This is only required for source (ie. NAT/masq) mappings. 260 So far, we don't do local source mappings, so multiple 261 manips not an issue. */ 262 if (maniptype == IP_NAT_MANIP_SRC && 263 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { 264 if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { 265 pr_debug("get_unique_tuple: Found current src map\n"); 266 if (!nf_nat_used_tuple(tuple, ct)) 267 return; 268 } 269 } 270 271 /* 2) Select the least-used IP/proto combination in the given 272 range. */ 273 *tuple = *orig_tuple; 274 find_best_ips_proto(zone, tuple, range, ct, maniptype); 275 276 /* 3) The per-protocol part of the manip is made to map into 277 the range to make a unique tuple. */ 278 279 rcu_read_lock(); 280 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 281 282 /* Only bother mapping if it's not already in range and unique */ 283 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && 284 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 285 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 286 !nf_nat_used_tuple(tuple, ct)) 287 goto out; 288 289 /* Last change: get protocol to try to obtain unique tuple. */ 290 proto->unique_tuple(tuple, range, maniptype, ct); 291out: 292 rcu_read_unlock(); 293} 294 295unsigned int 296nf_nat_setup_info(struct nf_conn *ct, 297 const struct nf_nat_range *range, 298 enum nf_nat_manip_type maniptype) 299{ 300 struct net *net = nf_ct_net(ct); 301 struct nf_conntrack_tuple curr_tuple, new_tuple; 302 struct nf_conn_nat *nat; 303 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); 304 305 /* nat helper or nfctnetlink also setup binding */ 306 nat = nfct_nat(ct); 307 if (!nat) { 308 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); 309 if (nat == NULL) { 310 pr_debug("failed to add NAT extension\n"); 311 return NF_ACCEPT; 312 } 313 } 314 315 NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC || 316 maniptype == IP_NAT_MANIP_DST); 317 BUG_ON(nf_nat_initialized(ct, maniptype)); 318 319 /* What we've got will look like inverse of reply. Normally 320 this is what is in the conntrack, except for prior 321 manipulations (future optimization: if num_manips == 0, 322 orig_tp = 323 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ 324 nf_ct_invert_tuplepr(&curr_tuple, 325 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 326 327 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); 328 329 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { 330 struct nf_conntrack_tuple reply; 331 332 /* Alter conntrack table so will recognize replies. */ 333 nf_ct_invert_tuplepr(&reply, &new_tuple); 334 nf_conntrack_alter_reply(ct, &reply); 335 336 /* Non-atomic: we own this at the moment. */ 337 if (maniptype == IP_NAT_MANIP_SRC) 338 ct->status |= IPS_SRC_NAT; 339 else 340 ct->status |= IPS_DST_NAT; 341 } 342 343 /* Place in source hash if this is the first time. */ 344 if (have_to_hash) { 345 unsigned int srchash; 346 347 srchash = hash_by_src(net, nf_ct_zone(ct), 348 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 349 spin_lock_bh(&nf_nat_lock); 350 /* nf_conntrack_alter_reply might re-allocate exntension aera */ 351 nat = nfct_nat(ct); 352 nat->ct = ct; 353 hlist_add_head_rcu(&nat->bysource, 354 &net->ipv4.nat_bysource[srchash]); 355 spin_unlock_bh(&nf_nat_lock); 356 } 357 358 /* It's done. */ 359 if (maniptype == IP_NAT_MANIP_DST) 360 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 361 else 362 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 363 364 return NF_ACCEPT; 365} 366EXPORT_SYMBOL(nf_nat_setup_info); 367 368/* Returns true if succeeded. */ 369static bool 370manip_pkt(u_int16_t proto, 371 struct sk_buff *skb, 372 unsigned int iphdroff, 373 const struct nf_conntrack_tuple *target, 374 enum nf_nat_manip_type maniptype) 375{ 376 struct iphdr *iph; 377 const struct nf_nat_protocol *p; 378 379 if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) 380 return false; 381 382 iph = (void *)skb->data + iphdroff; 383 384 /* Manipulate protcol part. */ 385 386 /* rcu_read_lock()ed by nf_hook_slow */ 387 p = __nf_nat_proto_find(proto); 388 if (!p->manip_pkt(skb, iphdroff, target, maniptype)) 389 return false; 390 391 iph = (void *)skb->data + iphdroff; 392 393 if (maniptype == IP_NAT_MANIP_SRC) { 394 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 395 iph->saddr = target->src.u3.ip; 396 } else { 397 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 398 iph->daddr = target->dst.u3.ip; 399 } 400 return true; 401} 402 403/* Do packet manipulations according to nf_nat_setup_info. */ 404unsigned int nf_nat_packet(struct nf_conn *ct, 405 enum ip_conntrack_info ctinfo, 406 unsigned int hooknum, 407 struct sk_buff *skb) 408{ 409 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 410 unsigned long statusbit; 411 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); 412 413 if (mtype == IP_NAT_MANIP_SRC) 414 statusbit = IPS_SRC_NAT; 415 else 416 statusbit = IPS_DST_NAT; 417 418 /* Invert if this is reply dir. */ 419 if (dir == IP_CT_DIR_REPLY) 420 statusbit ^= IPS_NAT_MASK; 421 422 /* Non-atomic: these bits don't change. */ 423 if (ct->status & statusbit) { 424 struct nf_conntrack_tuple target; 425 426 /* We are aiming to look like inverse of other direction. */ 427 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 428#ifdef HNDCTF 429 ip_conntrack_ipct_add(skb, hooknum, ct, ctinfo, &target); 430#endif /* HNDCTF */ 431 if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) 432 return NF_DROP; 433 } else { 434#ifdef HNDCTF 435#endif /* HNDCTF */ 436 } 437 438 return NF_ACCEPT; 439} 440EXPORT_SYMBOL_GPL(nf_nat_packet); 441 442/* Dir is direction ICMP is coming from (opposite to packet it contains) */ 443int nf_nat_icmp_reply_translation(struct nf_conn *ct, 444 enum ip_conntrack_info ctinfo, 445 unsigned int hooknum, 446 struct sk_buff *skb) 447{ 448 struct { 449 struct icmphdr icmp; 450 struct iphdr ip; 451 } *inside; 452 const struct nf_conntrack_l4proto *l4proto; 453 struct nf_conntrack_tuple inner, target; 454 int hdrlen = ip_hdrlen(skb); 455 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 456 unsigned long statusbit; 457 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 458 459 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 460 return 0; 461 462 inside = (void *)skb->data + hdrlen; 463 464 /* We're actually going to mangle it beyond trivial checksum 465 adjustment, so make sure the current checksum is correct. */ 466 if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) 467 return 0; 468 469 /* Must be RELATED */ 470 NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || 471 skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); 472 473 /* Redirects on non-null nats must be dropped, else they'll 474 start talking to each other without our translation, and be 475 confused... --RR */ 476 if (inside->icmp.type == ICMP_REDIRECT) { 477 /* If NAT isn't finished, assume it and drop. */ 478 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) 479 return 0; 480 481 if (ct->status & IPS_NAT_MASK) 482 return 0; 483 } 484 485 pr_debug("icmp_reply_translation: translating error %p manip %u " 486 "dir %s\n", skb, manip, 487 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 488 489 /* rcu_read_lock()ed by nf_hook_slow */ 490 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 491 492 if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), 493 (hdrlen + 494 sizeof(struct icmphdr) + inside->ip.ihl * 4), 495 (u_int16_t)AF_INET, inside->ip.protocol, 496 &inner, l3proto, l4proto)) 497 return 0; 498 499 /* Change inner back to look like incoming packet. We do the 500 opposite manip on this hook to normal, because it might not 501 pass all hooks (locally-generated ICMP). Consider incoming 502 packet: PREROUTING (DST manip), routing produces ICMP, goes 503 through POSTROUTING (which must correct the DST manip). */ 504 if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), 505 &ct->tuplehash[!dir].tuple, !manip)) 506 return 0; 507 508 if (skb->ip_summed != CHECKSUM_PARTIAL) { 509 /* Reloading "inside" here since manip_pkt inner. */ 510 inside = (void *)skb->data + hdrlen; 511 inside->icmp.checksum = 0; 512 inside->icmp.checksum = 513 csum_fold(skb_checksum(skb, hdrlen, 514 skb->len - hdrlen, 0)); 515 } 516 517 /* Change outer to look the reply to an incoming packet 518 * (proto 0 means don't invert per-proto part). */ 519 if (manip == IP_NAT_MANIP_SRC) 520 statusbit = IPS_SRC_NAT; 521 else 522 statusbit = IPS_DST_NAT; 523 524 /* Invert if this is reply dir. */ 525 if (dir == IP_CT_DIR_REPLY) 526 statusbit ^= IPS_NAT_MASK; 527 528 if (ct->status & statusbit) { 529 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 530 if (!manip_pkt(0, skb, 0, &target, manip)) 531 return 0; 532 } 533 534 return 1; 535} 536EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 537 538/* Protocol registration. */ 539int nf_nat_protocol_register(const struct nf_nat_protocol *proto) 540{ 541 int ret = 0; 542 543 spin_lock_bh(&nf_nat_lock); 544 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 545 ret = -EBUSY; 546 goto out; 547 } 548 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); 549 out: 550 spin_unlock_bh(&nf_nat_lock); 551 return ret; 552} 553EXPORT_SYMBOL(nf_nat_protocol_register); 554 555/* Noone stores the protocol anywhere; simply delete it. */ 556void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) 557{ 558 spin_lock_bh(&nf_nat_lock); 559 rcu_assign_pointer(nf_nat_protos[proto->protonum], 560 &nf_nat_unknown_protocol); 561 spin_unlock_bh(&nf_nat_lock); 562 synchronize_rcu(); 563} 564EXPORT_SYMBOL(nf_nat_protocol_unregister); 565 566/* Noone using conntrack by the time this called. */ 567static void nf_nat_cleanup_conntrack(struct nf_conn *ct) 568{ 569 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); 570 571 if (nat == NULL || nat->ct == NULL) 572 return; 573 574 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); 575 576 spin_lock_bh(&nf_nat_lock); 577 hlist_del_rcu(&nat->bysource); 578 spin_unlock_bh(&nf_nat_lock); 579#ifdef CONFIG_IP_NF_TARGET_CONE 580 /* Detach from cone list */ 581 ipt_cone_cleanup_conntrack(nat); 582#endif /* CONFIG_IP_NF_TARGET_CONE */ 583} 584 585static void nf_nat_move_storage(void *new, void *old) 586{ 587 struct nf_conn_nat *new_nat = new; 588 struct nf_conn_nat *old_nat = old; 589 struct nf_conn *ct = old_nat->ct; 590 591 if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) 592 return; 593 594 spin_lock_bh(&nf_nat_lock); 595 new_nat->ct = ct; 596 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 597 spin_unlock_bh(&nf_nat_lock); 598} 599 600static struct nf_ct_ext_type nat_extend __read_mostly = { 601 .len = sizeof(struct nf_conn_nat), 602 .align = __alignof__(struct nf_conn_nat), 603 .destroy = nf_nat_cleanup_conntrack, 604 .move = nf_nat_move_storage, 605 .id = NF_CT_EXT_NAT, 606 .flags = NF_CT_EXT_F_PREALLOC, 607}; 608 609#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 610 611#include <linux/netfilter/nfnetlink.h> 612#include <linux/netfilter/nfnetlink_conntrack.h> 613 614static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 615 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 616 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 617}; 618 619static int nfnetlink_parse_nat_proto(struct nlattr *attr, 620 const struct nf_conn *ct, 621 struct nf_nat_range *range) 622{ 623 struct nlattr *tb[CTA_PROTONAT_MAX+1]; 624 const struct nf_nat_protocol *npt; 625 int err; 626 627 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); 628 if (err < 0) 629 return err; 630 631 npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); 632 if (npt->nlattr_to_range) 633 err = npt->nlattr_to_range(tb, range); 634 nf_nat_proto_put(npt); 635 return err; 636} 637 638static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { 639 [CTA_NAT_MINIP] = { .type = NLA_U32 }, 640 [CTA_NAT_MAXIP] = { .type = NLA_U32 }, 641}; 642 643static int 644nfnetlink_parse_nat(const struct nlattr *nat, 645 const struct nf_conn *ct, struct nf_nat_range *range) 646{ 647 struct nlattr *tb[CTA_NAT_MAX+1]; 648 int err; 649 650 memset(range, 0, sizeof(*range)); 651 652 err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); 653 if (err < 0) 654 return err; 655 656 if (tb[CTA_NAT_MINIP]) 657 range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); 658 659 if (!tb[CTA_NAT_MAXIP]) 660 range->max_ip = range->min_ip; 661 else 662 range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); 663 664 if (range->min_ip) 665 range->flags |= IP_NAT_RANGE_MAP_IPS; 666 667 if (!tb[CTA_NAT_PROTO]) 668 return 0; 669 670 err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); 671 if (err < 0) 672 return err; 673 674 return 0; 675} 676 677static int 678nfnetlink_parse_nat_setup(struct nf_conn *ct, 679 enum nf_nat_manip_type manip, 680 const struct nlattr *attr) 681{ 682 struct nf_nat_range range; 683 684 if (nfnetlink_parse_nat(attr, ct, &range) < 0) 685 return -EINVAL; 686 if (nf_nat_initialized(ct, manip)) 687 return -EEXIST; 688 689 return nf_nat_setup_info(ct, &range, manip); 690} 691#else 692static int 693nfnetlink_parse_nat_setup(struct nf_conn *ct, 694 enum nf_nat_manip_type manip, 695 const struct nlattr *attr) 696{ 697 return -EOPNOTSUPP; 698} 699#endif 700 701static int __net_init nf_nat_net_init(struct net *net) 702{ 703 /* Leave them the same for the moment. */ 704 net->ipv4.nat_htable_size = net->ct.htable_size; 705 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 706 &net->ipv4.nat_vmalloced, 0); 707 if (!net->ipv4.nat_bysource) 708 return -ENOMEM; 709 return 0; 710} 711 712/* Clear NAT section of all conntracks, in case we're loaded again. */ 713static int clean_nat(struct nf_conn *i, void *data) 714{ 715 struct nf_conn_nat *nat = nfct_nat(i); 716 717 if (!nat) 718 return 0; 719 memset(nat, 0, sizeof(*nat)); 720 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); 721 return 0; 722} 723 724static void __net_exit nf_nat_net_exit(struct net *net) 725{ 726 nf_ct_iterate_cleanup(net, &clean_nat, NULL); 727 synchronize_rcu(); 728 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, 729 net->ipv4.nat_htable_size); 730} 731 732static struct pernet_operations nf_nat_net_ops = { 733 .init = nf_nat_net_init, 734 .exit = nf_nat_net_exit, 735}; 736 737static int __init nf_nat_init(void) 738{ 739 size_t i; 740 int ret; 741 742 need_ipv4_conntrack(); 743 744 ret = nf_ct_extend_register(&nat_extend); 745 if (ret < 0) { 746 printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); 747 return ret; 748 } 749 750 ret = register_pernet_subsys(&nf_nat_net_ops); 751 if (ret < 0) 752 goto cleanup_extend; 753 754 /* Sew in builtin protocols. */ 755 spin_lock_bh(&nf_nat_lock); 756 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 757 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); 758 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); 759 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); 760 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); 761 spin_unlock_bh(&nf_nat_lock); 762 763 /* Initialize fake conntrack so that NAT will skip it */ 764 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); 765 766 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 767 768 BUG_ON(nf_nat_seq_adjust_hook != NULL); 769 rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); 770 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); 771 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, 772 nfnetlink_parse_nat_setup); 773 BUG_ON(nf_ct_nat_offset != NULL); 774 rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); 775 return 0; 776 777 cleanup_extend: 778 nf_ct_extend_unregister(&nat_extend); 779 return ret; 780} 781 782static void __exit nf_nat_cleanup(void) 783{ 784 unregister_pernet_subsys(&nf_nat_net_ops); 785 nf_ct_l3proto_put(l3proto); 786 nf_ct_extend_unregister(&nat_extend); 787 rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); 788 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); 789 rcu_assign_pointer(nf_ct_nat_offset, NULL); 790 synchronize_net(); 791} 792 793MODULE_LICENSE("GPL"); 794MODULE_ALIAS("nf-nat-ipv4"); 795 796module_init(nf_nat_init); 797module_exit(nf_nat_cleanup); 798