1// SPDX-License-Identifier: GPL-2.0-only 2/* (C) 1999-2001 Paul `Rusty' Russell 3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 */ 5 6#include <linux/types.h> 7#include <linux/export.h> 8#include <linux/init.h> 9#include <linux/udp.h> 10#include <linux/tcp.h> 11#include <linux/icmp.h> 12#include <linux/icmpv6.h> 13 14#include <linux/dccp.h> 15#include <linux/sctp.h> 16#include <net/sctp/checksum.h> 17 18#include <linux/netfilter.h> 19#include <net/netfilter/nf_nat.h> 20 21#include <linux/ipv6.h> 22#include <linux/netfilter_ipv6.h> 23#include <net/checksum.h> 24#include <net/ip6_checksum.h> 25#include <net/ip6_route.h> 26#include <net/xfrm.h> 27#include <net/ipv6.h> 28 29#include <net/netfilter/nf_conntrack_core.h> 30#include <net/netfilter/nf_conntrack.h> 31#include <linux/netfilter/nfnetlink_conntrack.h> 32 33static void nf_csum_update(struct sk_buff *skb, 34 unsigned int iphdroff, __sum16 *check, 35 const struct nf_conntrack_tuple *t, 36 enum nf_nat_manip_type maniptype); 37 38static void 39__udp_manip_pkt(struct sk_buff *skb, 40 unsigned int iphdroff, struct udphdr *hdr, 41 const struct nf_conntrack_tuple *tuple, 42 enum nf_nat_manip_type maniptype, bool do_csum) 43{ 44 __be16 *portptr, newport; 45 46 if (maniptype == NF_NAT_MANIP_SRC) { 47 /* Get rid of src port */ 48 newport = tuple->src.u.udp.port; 49 portptr = &hdr->source; 50 } else { 51 /* Get rid of dst port */ 52 newport = tuple->dst.u.udp.port; 53 portptr = &hdr->dest; 54 } 55 if (do_csum) { 56 nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); 57 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 58 false); 59 if (!hdr->check) 60 hdr->check = CSUM_MANGLED_0; 61 } 62 *portptr = newport; 63} 64 65static bool udp_manip_pkt(struct sk_buff *skb, 66 unsigned int iphdroff, unsigned int hdroff, 67 const struct nf_conntrack_tuple *tuple, 68 enum nf_nat_manip_type maniptype) 69{ 70 struct udphdr *hdr; 71 72 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) 73 return false; 74 75 hdr = (struct udphdr *)(skb->data + hdroff); 76 __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check); 77 78 return true; 79} 80 81static bool udplite_manip_pkt(struct sk_buff *skb, 82 unsigned int iphdroff, unsigned int hdroff, 83 const struct nf_conntrack_tuple *tuple, 84 enum nf_nat_manip_type maniptype) 85{ 86#ifdef CONFIG_NF_CT_PROTO_UDPLITE 87 struct udphdr *hdr; 88 89 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) 90 return false; 91 92 hdr = (struct udphdr *)(skb->data + hdroff); 93 __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true); 94#endif 95 return true; 96} 97 98static bool 99sctp_manip_pkt(struct sk_buff *skb, 100 unsigned int iphdroff, unsigned int hdroff, 101 const struct nf_conntrack_tuple *tuple, 102 enum nf_nat_manip_type maniptype) 103{ 104#ifdef CONFIG_NF_CT_PROTO_SCTP 105 struct sctphdr *hdr; 106 int hdrsize = 8; 107 108 /* This could be an inner header returned in imcp packet; in such 109 * cases we cannot update the checksum field since it is outside 110 * of the 8 bytes of transport layer headers we are guaranteed. 111 */ 112 if (skb->len >= hdroff + sizeof(*hdr)) 113 hdrsize = sizeof(*hdr); 114 115 if (skb_ensure_writable(skb, hdroff + hdrsize)) 116 return false; 117 118 hdr = (struct sctphdr *)(skb->data + hdroff); 119 120 if (maniptype == NF_NAT_MANIP_SRC) { 121 /* Get rid of src port */ 122 hdr->source = tuple->src.u.sctp.port; 123 } else { 124 /* Get rid of dst port */ 125 hdr->dest = tuple->dst.u.sctp.port; 126 } 127 128 if (hdrsize < sizeof(*hdr)) 129 return true; 130 131 if (skb->ip_summed != CHECKSUM_PARTIAL) { 132 hdr->checksum = sctp_compute_cksum(skb, hdroff); 133 skb->ip_summed = CHECKSUM_NONE; 134 } 135 136#endif 137 return true; 138} 139 140static bool 141tcp_manip_pkt(struct sk_buff *skb, 142 unsigned int iphdroff, unsigned int hdroff, 143 const struct nf_conntrack_tuple *tuple, 144 enum nf_nat_manip_type maniptype) 145{ 146 struct tcphdr *hdr; 147 __be16 *portptr, newport, oldport; 148 int hdrsize = 8; /* TCP connection tracking guarantees this much */ 149 150 /* this could be a inner header returned in icmp packet; in such 151 cases we cannot update the checksum field since it is outside of 152 the 8 bytes of transport layer headers we are guaranteed */ 153 if (skb->len >= hdroff + sizeof(struct tcphdr)) 154 hdrsize = sizeof(struct tcphdr); 155 156 if (skb_ensure_writable(skb, hdroff + hdrsize)) 157 return false; 158 159 hdr = (struct tcphdr *)(skb->data + hdroff); 160 161 if (maniptype == NF_NAT_MANIP_SRC) { 162 /* Get rid of src port */ 163 newport = tuple->src.u.tcp.port; 164 portptr = &hdr->source; 165 } else { 166 /* Get rid of dst port */ 167 newport = tuple->dst.u.tcp.port; 168 portptr = &hdr->dest; 169 } 170 171 oldport = *portptr; 172 *portptr = newport; 173 174 if (hdrsize < sizeof(*hdr)) 175 return true; 176 177 nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); 178 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); 179 return true; 180} 181 182static bool 183dccp_manip_pkt(struct sk_buff *skb, 184 unsigned int iphdroff, unsigned int hdroff, 185 const struct nf_conntrack_tuple *tuple, 186 enum nf_nat_manip_type maniptype) 187{ 188#ifdef CONFIG_NF_CT_PROTO_DCCP 189 struct dccp_hdr *hdr; 190 __be16 *portptr, oldport, newport; 191 int hdrsize = 8; /* DCCP connection tracking guarantees this much */ 192 193 if (skb->len >= hdroff + sizeof(struct dccp_hdr)) 194 hdrsize = sizeof(struct dccp_hdr); 195 196 if (skb_ensure_writable(skb, hdroff + hdrsize)) 197 return false; 198 199 hdr = (struct dccp_hdr *)(skb->data + hdroff); 200 201 if (maniptype == NF_NAT_MANIP_SRC) { 202 newport = tuple->src.u.dccp.port; 203 portptr = &hdr->dccph_sport; 204 } else { 205 newport = tuple->dst.u.dccp.port; 206 portptr = &hdr->dccph_dport; 207 } 208 209 oldport = *portptr; 210 *portptr = newport; 211 212 if (hdrsize < sizeof(*hdr)) 213 return true; 214 215 nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); 216 inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, 217 false); 218#endif 219 return true; 220} 221 222static bool 223icmp_manip_pkt(struct sk_buff *skb, 224 unsigned int iphdroff, unsigned int hdroff, 225 const struct nf_conntrack_tuple *tuple, 226 enum nf_nat_manip_type maniptype) 227{ 228 struct icmphdr *hdr; 229 230 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) 231 return false; 232 233 hdr = (struct icmphdr *)(skb->data + hdroff); 234 switch (hdr->type) { 235 case ICMP_ECHO: 236 case ICMP_ECHOREPLY: 237 case ICMP_TIMESTAMP: 238 case ICMP_TIMESTAMPREPLY: 239 case ICMP_INFO_REQUEST: 240 case ICMP_INFO_REPLY: 241 case ICMP_ADDRESS: 242 case ICMP_ADDRESSREPLY: 243 break; 244 default: 245 return true; 246 } 247 inet_proto_csum_replace2(&hdr->checksum, skb, 248 hdr->un.echo.id, tuple->src.u.icmp.id, false); 249 hdr->un.echo.id = tuple->src.u.icmp.id; 250 return true; 251} 252 253static bool 254icmpv6_manip_pkt(struct sk_buff *skb, 255 unsigned int iphdroff, unsigned int hdroff, 256 const struct nf_conntrack_tuple *tuple, 257 enum nf_nat_manip_type maniptype) 258{ 259 struct icmp6hdr *hdr; 260 261 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) 262 return false; 263 264 hdr = (struct icmp6hdr *)(skb->data + hdroff); 265 nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype); 266 if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST || 267 hdr->icmp6_type == ICMPV6_ECHO_REPLY) { 268 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, 269 hdr->icmp6_identifier, 270 tuple->src.u.icmp.id, false); 271 hdr->icmp6_identifier = tuple->src.u.icmp.id; 272 } 273 return true; 274} 275 276/* manipulate a GRE packet according to maniptype */ 277static bool 278gre_manip_pkt(struct sk_buff *skb, 279 unsigned int iphdroff, unsigned int hdroff, 280 const struct nf_conntrack_tuple *tuple, 281 enum nf_nat_manip_type maniptype) 282{ 283#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) 284 const struct gre_base_hdr *greh; 285 struct pptp_gre_header *pgreh; 286 287 /* pgreh includes two optional 32bit fields which are not required 288 * to be there. That's where the magic '8' comes from */ 289 if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8)) 290 return false; 291 292 greh = (void *)skb->data + hdroff; 293 pgreh = (struct pptp_gre_header *)greh; 294 295 /* we only have destination manip of a packet, since 'source key' 296 * is not present in the packet itself */ 297 if (maniptype != NF_NAT_MANIP_DST) 298 return true; 299 300 switch (greh->flags & GRE_VERSION) { 301 case GRE_VERSION_0: 302 /* We do not currently NAT any GREv0 packets. 303 * Try to behave like "nf_nat_proto_unknown" */ 304 break; 305 case GRE_VERSION_1: 306 pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); 307 pgreh->call_id = tuple->dst.u.gre.key; 308 break; 309 default: 310 pr_debug("can't nat unknown GRE version\n"); 311 return false; 312 } 313#endif 314 return true; 315} 316 317static bool l4proto_manip_pkt(struct sk_buff *skb, 318 unsigned int iphdroff, unsigned int hdroff, 319 const struct nf_conntrack_tuple *tuple, 320 enum nf_nat_manip_type maniptype) 321{ 322 switch (tuple->dst.protonum) { 323 case IPPROTO_TCP: 324 return tcp_manip_pkt(skb, iphdroff, hdroff, 325 tuple, maniptype); 326 case IPPROTO_UDP: 327 return udp_manip_pkt(skb, iphdroff, hdroff, 328 tuple, maniptype); 329 case IPPROTO_UDPLITE: 330 return udplite_manip_pkt(skb, iphdroff, hdroff, 331 tuple, maniptype); 332 case IPPROTO_SCTP: 333 return sctp_manip_pkt(skb, iphdroff, hdroff, 334 tuple, maniptype); 335 case IPPROTO_ICMP: 336 return icmp_manip_pkt(skb, iphdroff, hdroff, 337 tuple, maniptype); 338 case IPPROTO_ICMPV6: 339 return icmpv6_manip_pkt(skb, iphdroff, hdroff, 340 tuple, maniptype); 341 case IPPROTO_DCCP: 342 return dccp_manip_pkt(skb, iphdroff, hdroff, 343 tuple, maniptype); 344 case IPPROTO_GRE: 345 return gre_manip_pkt(skb, iphdroff, hdroff, 346 tuple, maniptype); 347 } 348 349 /* If we don't know protocol -- no error, pass it unmodified. */ 350 return true; 351} 352 353static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, 354 unsigned int iphdroff, 355 const struct nf_conntrack_tuple *target, 356 enum nf_nat_manip_type maniptype) 357{ 358 struct iphdr *iph; 359 unsigned int hdroff; 360 361 if (skb_ensure_writable(skb, iphdroff + sizeof(*iph))) 362 return false; 363 364 iph = (void *)skb->data + iphdroff; 365 hdroff = iphdroff + iph->ihl * 4; 366 367 if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype)) 368 return false; 369 iph = (void *)skb->data + iphdroff; 370 371 if (maniptype == NF_NAT_MANIP_SRC) { 372 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 373 iph->saddr = target->src.u3.ip; 374 } else { 375 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 376 iph->daddr = target->dst.u3.ip; 377 } 378 return true; 379} 380 381static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, 382 unsigned int iphdroff, 383 const struct nf_conntrack_tuple *target, 384 enum nf_nat_manip_type maniptype) 385{ 386#if IS_ENABLED(CONFIG_IPV6) 387 struct ipv6hdr *ipv6h; 388 __be16 frag_off; 389 int hdroff; 390 u8 nexthdr; 391 392 if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h))) 393 return false; 394 395 ipv6h = (void *)skb->data + iphdroff; 396 nexthdr = ipv6h->nexthdr; 397 hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), 398 &nexthdr, &frag_off); 399 if (hdroff < 0) 400 goto manip_addr; 401 402 if ((frag_off & htons(~0x7)) == 0 && 403 !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype)) 404 return false; 405 406 /* must reload, offset might have changed */ 407 ipv6h = (void *)skb->data + iphdroff; 408 409manip_addr: 410 if (maniptype == NF_NAT_MANIP_SRC) 411 ipv6h->saddr = target->src.u3.in6; 412 else 413 ipv6h->daddr = target->dst.u3.in6; 414 415#endif 416 return true; 417} 418 419unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, 420 enum nf_nat_manip_type mtype, 421 enum ip_conntrack_dir dir) 422{ 423 struct nf_conntrack_tuple target; 424 425 /* We are aiming to look like inverse of other direction. */ 426 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); 427 428 switch (target.src.l3num) { 429 case NFPROTO_IPV6: 430 if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype)) 431 return NF_ACCEPT; 432 break; 433 case NFPROTO_IPV4: 434 if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype)) 435 return NF_ACCEPT; 436 break; 437 default: 438 WARN_ON_ONCE(1); 439 break; 440 } 441 442 return NF_DROP; 443} 444 445static void nf_nat_ipv4_csum_update(struct sk_buff *skb, 446 unsigned int iphdroff, __sum16 *check, 447 const struct nf_conntrack_tuple *t, 448 enum nf_nat_manip_type maniptype) 449{ 450 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 451 __be32 oldip, newip; 452 453 if (maniptype == NF_NAT_MANIP_SRC) { 454 oldip = iph->saddr; 455 newip = t->src.u3.ip; 456 } else { 457 oldip = iph->daddr; 458 newip = t->dst.u3.ip; 459 } 460 inet_proto_csum_replace4(check, skb, oldip, newip, true); 461} 462 463static void nf_nat_ipv6_csum_update(struct sk_buff *skb, 464 unsigned int iphdroff, __sum16 *check, 465 const struct nf_conntrack_tuple *t, 466 enum nf_nat_manip_type maniptype) 467{ 468#if IS_ENABLED(CONFIG_IPV6) 469 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); 470 const struct in6_addr *oldip, *newip; 471 472 if (maniptype == NF_NAT_MANIP_SRC) { 473 oldip = &ipv6h->saddr; 474 newip = &t->src.u3.in6; 475 } else { 476 oldip = &ipv6h->daddr; 477 newip = &t->dst.u3.in6; 478 } 479 inet_proto_csum_replace16(check, skb, oldip->s6_addr32, 480 newip->s6_addr32, true); 481#endif 482} 483 484static void nf_csum_update(struct sk_buff *skb, 485 unsigned int iphdroff, __sum16 *check, 486 const struct nf_conntrack_tuple *t, 487 enum nf_nat_manip_type maniptype) 488{ 489 switch (t->src.l3num) { 490 case NFPROTO_IPV4: 491 nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype); 492 return; 493 case NFPROTO_IPV6: 494 nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype); 495 return; 496 } 497} 498 499static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, 500 u8 proto, void *data, __sum16 *check, 501 int datalen, int oldlen) 502{ 503 if (skb->ip_summed != CHECKSUM_PARTIAL) { 504 const struct iphdr *iph = ip_hdr(skb); 505 506 skb->ip_summed = CHECKSUM_PARTIAL; 507 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + 508 ip_hdrlen(skb); 509 skb->csum_offset = (void *)check - data; 510 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, 511 proto, 0); 512 } else { 513 inet_proto_csum_replace2(check, skb, 514 htons(oldlen), htons(datalen), true); 515 } 516} 517 518#if IS_ENABLED(CONFIG_IPV6) 519static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, 520 u8 proto, void *data, __sum16 *check, 521 int datalen, int oldlen) 522{ 523 if (skb->ip_summed != CHECKSUM_PARTIAL) { 524 const struct ipv6hdr *ipv6h = ipv6_hdr(skb); 525 526 skb->ip_summed = CHECKSUM_PARTIAL; 527 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + 528 (data - (void *)skb->data); 529 skb->csum_offset = (void *)check - data; 530 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 531 datalen, proto, 0); 532 } else { 533 inet_proto_csum_replace2(check, skb, 534 htons(oldlen), htons(datalen), true); 535 } 536} 537#endif 538 539void nf_nat_csum_recalc(struct sk_buff *skb, 540 u8 nfproto, u8 proto, void *data, __sum16 *check, 541 int datalen, int oldlen) 542{ 543 switch (nfproto) { 544 case NFPROTO_IPV4: 545 nf_nat_ipv4_csum_recalc(skb, proto, data, check, 546 datalen, oldlen); 547 return; 548#if IS_ENABLED(CONFIG_IPV6) 549 case NFPROTO_IPV6: 550 nf_nat_ipv6_csum_recalc(skb, proto, data, check, 551 datalen, oldlen); 552 return; 553#endif 554 } 555 556 WARN_ON_ONCE(1); 557} 558 559int nf_nat_icmp_reply_translation(struct sk_buff *skb, 560 struct nf_conn *ct, 561 enum ip_conntrack_info ctinfo, 562 unsigned int hooknum) 563{ 564 struct { 565 struct icmphdr icmp; 566 struct iphdr ip; 567 } *inside; 568 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 569 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 570 unsigned int hdrlen = ip_hdrlen(skb); 571 struct nf_conntrack_tuple target; 572 unsigned long statusbit; 573 574 WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); 575 576 if (skb_ensure_writable(skb, hdrlen + sizeof(*inside))) 577 return 0; 578 if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP)) 579 return 0; 580 581 inside = (void *)skb->data + hdrlen; 582 if (inside->icmp.type == ICMP_REDIRECT) { 583 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) 584 return 0; 585 if (ct->status & IPS_NAT_MASK) 586 return 0; 587 } 588 589 if (manip == NF_NAT_MANIP_SRC) 590 statusbit = IPS_SRC_NAT; 591 else 592 statusbit = IPS_DST_NAT; 593 594 /* Invert if this is reply direction */ 595 if (dir == IP_CT_DIR_REPLY) 596 statusbit ^= IPS_NAT_MASK; 597 598 if (!(ct->status & statusbit)) 599 return 1; 600 601 if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), 602 &ct->tuplehash[!dir].tuple, !manip)) 603 return 0; 604 605 if (skb->ip_summed != CHECKSUM_PARTIAL) { 606 /* Reloading "inside" here since manip_pkt may reallocate */ 607 inside = (void *)skb->data + hdrlen; 608 inside->icmp.checksum = 0; 609 inside->icmp.checksum = 610 csum_fold(skb_checksum(skb, hdrlen, 611 skb->len - hdrlen, 0)); 612 } 613 614 /* Change outer to look like the reply to an incoming packet */ 615 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); 616 target.dst.protonum = IPPROTO_ICMP; 617 if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) 618 return 0; 619 620 return 1; 621} 622EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 623 624static unsigned int 625nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, 626 const struct nf_hook_state *state) 627{ 628 struct nf_conn *ct; 629 enum ip_conntrack_info ctinfo; 630 631 ct = nf_ct_get(skb, &ctinfo); 632 if (!ct) 633 return NF_ACCEPT; 634 635 if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { 636 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 637 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 638 state->hook)) 639 return NF_DROP; 640 else 641 return NF_ACCEPT; 642 } 643 } 644 645 return nf_nat_inet_fn(priv, skb, state); 646} 647 648static unsigned int 649nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb, 650 const struct nf_hook_state *state) 651{ 652 unsigned int ret; 653 __be32 daddr = ip_hdr(skb)->daddr; 654 655 ret = nf_nat_ipv4_fn(priv, skb, state); 656 if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr) 657 skb_dst_drop(skb); 658 659 return ret; 660} 661 662#ifdef CONFIG_XFRM 663static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) 664{ 665 struct sock *sk = skb->sk; 666 struct dst_entry *dst; 667 unsigned int hh_len; 668 struct flowi fl; 669 int err; 670 671 err = xfrm_decode_session(net, skb, &fl, family); 672 if (err < 0) 673 return err; 674 675 dst = skb_dst(skb); 676 if (dst->xfrm) 677 dst = ((struct xfrm_dst *)dst)->route; 678 if (!dst_hold_safe(dst)) 679 return -EHOSTUNREACH; 680 681 if (sk && !net_eq(net, sock_net(sk))) 682 sk = NULL; 683 684 dst = xfrm_lookup(net, dst, &fl, sk, 0); 685 if (IS_ERR(dst)) 686 return PTR_ERR(dst); 687 688 skb_dst_drop(skb); 689 skb_dst_set(skb, dst); 690 691 /* Change in oif may mean change in hh_len. */ 692 hh_len = skb_dst(skb)->dev->hard_header_len; 693 if (skb_headroom(skb) < hh_len && 694 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) 695 return -ENOMEM; 696 return 0; 697} 698#endif 699 700static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport) 701{ 702 enum ip_conntrack_info ctinfo; 703 enum ip_conntrack_dir dir; 704 const struct nf_conn *ct; 705 706 ct = nf_ct_get(skb, &ctinfo); 707 if (!ct) 708 return false; 709 710 switch (nf_ct_protonum(ct)) { 711 case IPPROTO_TCP: 712 case IPPROTO_UDP: 713 break; 714 default: 715 return false; 716 } 717 718 dir = CTINFO2DIR(ctinfo); 719 if (dir != IP_CT_DIR_ORIGINAL) 720 return false; 721 722 return ct->tuplehash[!dir].tuple.dst.u.all != sport; 723} 724 725static unsigned int 726nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, 727 const struct nf_hook_state *state) 728{ 729 __be32 saddr = ip_hdr(skb)->saddr; 730 struct sock *sk = skb->sk; 731 unsigned int ret; 732 733 ret = nf_nat_ipv4_fn(priv, skb, state); 734 735 if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) 736 return ret; 737 738 /* skb has a socket assigned via tcp edemux. We need to check 739 * if nf_nat_ipv4_fn() has mangled the packet in a way that 740 * edemux would not have found this socket. 741 * 742 * This includes both changes to the source address and changes 743 * to the source port, which are both handled by the 744 * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux 745 * might have found a socket for the old (pre-snat) address. 746 */ 747 if (saddr != ip_hdr(skb)->saddr || 748 nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) 749 skb_orphan(skb); /* TCP edemux obtained wrong socket */ 750 751 return ret; 752} 753 754static unsigned int 755nf_nat_ipv4_out(void *priv, struct sk_buff *skb, 756 const struct nf_hook_state *state) 757{ 758#ifdef CONFIG_XFRM 759 const struct nf_conn *ct; 760 enum ip_conntrack_info ctinfo; 761 int err; 762#endif 763 unsigned int ret; 764 765 ret = nf_nat_ipv4_fn(priv, skb, state); 766#ifdef CONFIG_XFRM 767 if (ret != NF_ACCEPT) 768 return ret; 769 770 if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) 771 return ret; 772 773 ct = nf_ct_get(skb, &ctinfo); 774 if (ct) { 775 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 776 777 if (ct->tuplehash[dir].tuple.src.u3.ip != 778 ct->tuplehash[!dir].tuple.dst.u3.ip || 779 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 780 ct->tuplehash[dir].tuple.src.u.all != 781 ct->tuplehash[!dir].tuple.dst.u.all)) { 782 err = nf_xfrm_me_harder(state->net, skb, AF_INET); 783 if (err < 0) 784 ret = NF_DROP_ERR(err); 785 } 786 } 787#endif 788 return ret; 789} 790 791static unsigned int 792nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, 793 const struct nf_hook_state *state) 794{ 795 const struct nf_conn *ct; 796 enum ip_conntrack_info ctinfo; 797 unsigned int ret; 798 int err; 799 800 ret = nf_nat_ipv4_fn(priv, skb, state); 801 if (ret != NF_ACCEPT) 802 return ret; 803 804 ct = nf_ct_get(skb, &ctinfo); 805 if (ct) { 806 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 807 808 if (ct->tuplehash[dir].tuple.dst.u3.ip != 809 ct->tuplehash[!dir].tuple.src.u3.ip) { 810 err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); 811 if (err < 0) 812 ret = NF_DROP_ERR(err); 813 } 814#ifdef CONFIG_XFRM 815 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 816 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 817 ct->tuplehash[dir].tuple.dst.u.all != 818 ct->tuplehash[!dir].tuple.src.u.all) { 819 err = nf_xfrm_me_harder(state->net, skb, AF_INET); 820 if (err < 0) 821 ret = NF_DROP_ERR(err); 822 } 823#endif 824 } 825 return ret; 826} 827 828static const struct nf_hook_ops nf_nat_ipv4_ops[] = { 829 /* Before packet filtering, change destination */ 830 { 831 .hook = nf_nat_ipv4_pre_routing, 832 .pf = NFPROTO_IPV4, 833 .hooknum = NF_INET_PRE_ROUTING, 834 .priority = NF_IP_PRI_NAT_DST, 835 }, 836 /* After packet filtering, change source */ 837 { 838 .hook = nf_nat_ipv4_out, 839 .pf = NFPROTO_IPV4, 840 .hooknum = NF_INET_POST_ROUTING, 841 .priority = NF_IP_PRI_NAT_SRC, 842 }, 843 /* Before packet filtering, change destination */ 844 { 845 .hook = nf_nat_ipv4_local_fn, 846 .pf = NFPROTO_IPV4, 847 .hooknum = NF_INET_LOCAL_OUT, 848 .priority = NF_IP_PRI_NAT_DST, 849 }, 850 /* After packet filtering, change source */ 851 { 852 .hook = nf_nat_ipv4_local_in, 853 .pf = NFPROTO_IPV4, 854 .hooknum = NF_INET_LOCAL_IN, 855 .priority = NF_IP_PRI_NAT_SRC, 856 }, 857}; 858 859int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) 860{ 861 return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops, 862 ARRAY_SIZE(nf_nat_ipv4_ops)); 863} 864EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn); 865 866void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) 867{ 868 nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 869} 870EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn); 871 872#if IS_ENABLED(CONFIG_IPV6) 873int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, 874 struct nf_conn *ct, 875 enum ip_conntrack_info ctinfo, 876 unsigned int hooknum, 877 unsigned int hdrlen) 878{ 879 struct { 880 struct icmp6hdr icmp6; 881 struct ipv6hdr ip6; 882 } *inside; 883 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 884 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 885 struct nf_conntrack_tuple target; 886 unsigned long statusbit; 887 888 WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); 889 890 if (skb_ensure_writable(skb, hdrlen + sizeof(*inside))) 891 return 0; 892 if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) 893 return 0; 894 895 inside = (void *)skb->data + hdrlen; 896 if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { 897 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) 898 return 0; 899 if (ct->status & IPS_NAT_MASK) 900 return 0; 901 } 902 903 if (manip == NF_NAT_MANIP_SRC) 904 statusbit = IPS_SRC_NAT; 905 else 906 statusbit = IPS_DST_NAT; 907 908 /* Invert if this is reply direction */ 909 if (dir == IP_CT_DIR_REPLY) 910 statusbit ^= IPS_NAT_MASK; 911 912 if (!(ct->status & statusbit)) 913 return 1; 914 915 if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), 916 &ct->tuplehash[!dir].tuple, !manip)) 917 return 0; 918 919 if (skb->ip_summed != CHECKSUM_PARTIAL) { 920 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 921 922 inside = (void *)skb->data + hdrlen; 923 inside->icmp6.icmp6_cksum = 0; 924 inside->icmp6.icmp6_cksum = 925 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 926 skb->len - hdrlen, IPPROTO_ICMPV6, 927 skb_checksum(skb, hdrlen, 928 skb->len - hdrlen, 0)); 929 } 930 931 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); 932 target.dst.protonum = IPPROTO_ICMPV6; 933 if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) 934 return 0; 935 936 return 1; 937} 938EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); 939 940static unsigned int 941nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, 942 const struct nf_hook_state *state) 943{ 944 struct nf_conn *ct; 945 enum ip_conntrack_info ctinfo; 946 __be16 frag_off; 947 int hdrlen; 948 u8 nexthdr; 949 950 ct = nf_ct_get(skb, &ctinfo); 951 /* Can't track? It's not due to stress, or conntrack would 952 * have dropped it. Hence it's the user's responsibilty to 953 * packet filter it out, or implement conntrack/NAT for that 954 * protocol. 8) --RR 955 */ 956 if (!ct) 957 return NF_ACCEPT; 958 959 if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { 960 nexthdr = ipv6_hdr(skb)->nexthdr; 961 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 962 &nexthdr, &frag_off); 963 964 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 965 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, 966 state->hook, 967 hdrlen)) 968 return NF_DROP; 969 else 970 return NF_ACCEPT; 971 } 972 } 973 974 return nf_nat_inet_fn(priv, skb, state); 975} 976 977static unsigned int 978nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb, 979 const struct nf_hook_state *state) 980{ 981 struct in6_addr saddr = ipv6_hdr(skb)->saddr; 982 struct sock *sk = skb->sk; 983 unsigned int ret; 984 985 ret = nf_nat_ipv6_fn(priv, skb, state); 986 987 if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) 988 return ret; 989 990 /* see nf_nat_ipv4_local_in */ 991 if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) || 992 nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) 993 skb_orphan(skb); 994 995 return ret; 996} 997 998static unsigned int 999nf_nat_ipv6_in(void *priv, struct sk_buff *skb, 1000 const struct nf_hook_state *state) 1001{ 1002 unsigned int ret, verdict; 1003 struct in6_addr daddr = ipv6_hdr(skb)->daddr; 1004 1005 ret = nf_nat_ipv6_fn(priv, skb, state); 1006 verdict = ret & NF_VERDICT_MASK; 1007 if (verdict != NF_DROP && verdict != NF_STOLEN && 1008 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) 1009 skb_dst_drop(skb); 1010 1011 return ret; 1012} 1013 1014static unsigned int 1015nf_nat_ipv6_out(void *priv, struct sk_buff *skb, 1016 const struct nf_hook_state *state) 1017{ 1018#ifdef CONFIG_XFRM 1019 const struct nf_conn *ct; 1020 enum ip_conntrack_info ctinfo; 1021 int err; 1022#endif 1023 unsigned int ret; 1024 1025 ret = nf_nat_ipv6_fn(priv, skb, state); 1026#ifdef CONFIG_XFRM 1027 if (ret != NF_ACCEPT) 1028 return ret; 1029 1030 if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) 1031 return ret; 1032 ct = nf_ct_get(skb, &ctinfo); 1033 if (ct) { 1034 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 1035 1036 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, 1037 &ct->tuplehash[!dir].tuple.dst.u3) || 1038 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 1039 ct->tuplehash[dir].tuple.src.u.all != 1040 ct->tuplehash[!dir].tuple.dst.u.all)) { 1041 err = nf_xfrm_me_harder(state->net, skb, AF_INET6); 1042 if (err < 0) 1043 ret = NF_DROP_ERR(err); 1044 } 1045 } 1046#endif 1047 1048 return ret; 1049} 1050 1051static unsigned int 1052nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, 1053 const struct nf_hook_state *state) 1054{ 1055 const struct nf_conn *ct; 1056 enum ip_conntrack_info ctinfo; 1057 unsigned int ret; 1058 int err; 1059 1060 ret = nf_nat_ipv6_fn(priv, skb, state); 1061 if (ret != NF_ACCEPT) 1062 return ret; 1063 1064 ct = nf_ct_get(skb, &ctinfo); 1065 if (ct) { 1066 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 1067 1068 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, 1069 &ct->tuplehash[!dir].tuple.src.u3)) { 1070 err = nf_ip6_route_me_harder(state->net, state->sk, skb); 1071 if (err < 0) 1072 ret = NF_DROP_ERR(err); 1073 } 1074#ifdef CONFIG_XFRM 1075 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 1076 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 1077 ct->tuplehash[dir].tuple.dst.u.all != 1078 ct->tuplehash[!dir].tuple.src.u.all) { 1079 err = nf_xfrm_me_harder(state->net, skb, AF_INET6); 1080 if (err < 0) 1081 ret = NF_DROP_ERR(err); 1082 } 1083#endif 1084 } 1085 1086 return ret; 1087} 1088 1089static const struct nf_hook_ops nf_nat_ipv6_ops[] = { 1090 /* Before packet filtering, change destination */ 1091 { 1092 .hook = nf_nat_ipv6_in, 1093 .pf = NFPROTO_IPV6, 1094 .hooknum = NF_INET_PRE_ROUTING, 1095 .priority = NF_IP6_PRI_NAT_DST, 1096 }, 1097 /* After packet filtering, change source */ 1098 { 1099 .hook = nf_nat_ipv6_out, 1100 .pf = NFPROTO_IPV6, 1101 .hooknum = NF_INET_POST_ROUTING, 1102 .priority = NF_IP6_PRI_NAT_SRC, 1103 }, 1104 /* Before packet filtering, change destination */ 1105 { 1106 .hook = nf_nat_ipv6_local_fn, 1107 .pf = NFPROTO_IPV6, 1108 .hooknum = NF_INET_LOCAL_OUT, 1109 .priority = NF_IP6_PRI_NAT_DST, 1110 }, 1111 /* After packet filtering, change source */ 1112 { 1113 .hook = nf_nat_ipv6_local_in, 1114 .pf = NFPROTO_IPV6, 1115 .hooknum = NF_INET_LOCAL_IN, 1116 .priority = NF_IP6_PRI_NAT_SRC, 1117 }, 1118}; 1119 1120int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) 1121{ 1122 return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops, 1123 ARRAY_SIZE(nf_nat_ipv6_ops)); 1124} 1125EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn); 1126 1127void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) 1128{ 1129 nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 1130} 1131EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn); 1132#endif /* CONFIG_IPV6 */ 1133 1134#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT) 1135int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops) 1136{ 1137 int ret; 1138 1139 if (WARN_ON_ONCE(ops->pf != NFPROTO_INET)) 1140 return -EINVAL; 1141 1142 ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops, 1143 ARRAY_SIZE(nf_nat_ipv6_ops)); 1144 if (ret) 1145 return ret; 1146 1147 ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops, 1148 ARRAY_SIZE(nf_nat_ipv4_ops)); 1149 if (ret) 1150 nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, 1151 ARRAY_SIZE(nf_nat_ipv6_ops)); 1152 return ret; 1153} 1154EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn); 1155 1156void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops) 1157{ 1158 nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 1159 nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 1160} 1161EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn); 1162#endif /* NFT INET NAT */ 1163