1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015-2019 Yandex LLC 5 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/11/sys/netpfil/ipfw/nat64/nat64_translate.c 364162 2020-08-12 12:08:50Z ae $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/counter.h> 35#include <sys/errno.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/mbuf.h> 39#include <sys/module.h> 40#include <sys/rmlock.h> 41#include <sys/rwlock.h> 42#include <sys/socket.h> 43#include <sys/queue.h> 44 45#include <net/if.h> 46#include <net/if_var.h> 47#include <net/if_pflog.h> 48#include <net/pfil.h> 49#include <net/netisr.h> 50#include <net/route.h> 51 52#include <netinet/in.h> 53#include <netinet/in_fib.h> 54#include <netinet/ip.h> 55#include <netinet/ip_var.h> 56#include <netinet/ip_fw.h> 57#include <netinet/ip6.h> 58#include <netinet/icmp6.h> 59#include <netinet/ip_icmp.h> 60#include <netinet/tcp.h> 61#include <netinet/udp.h> 62#include <netinet6/in6_var.h> 63#include <netinet6/in6_fib.h> 64#include <netinet6/ip6_var.h> 65#include <netinet6/ip_fw_nat64.h> 66 67#include <netpfil/pf/pf.h> 68#include <netpfil/ipfw/ip_fw_private.h> 69#include <machine/in_cksum.h> 70 71#include "ip_fw_nat64.h" 72#include "nat64_translate.h" 73 74 75typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *, 76 struct sockaddr *, struct nat64_counters *, void *); 77typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *, 78 void *); 79 80static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *, 81 struct mbuf *); 82static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *, 83 struct mbuf *); 84static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *); 85static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *, 86 struct nat64_counters *, void *); 87static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *, 88 void *); 89static int nat64_direct_output(struct ifnet *, struct mbuf *, 90 struct sockaddr *, struct nat64_counters *, void *); 91 92struct nat64_methods { 93 nat64_output_t output; 94 nat64_output_one_t output_one; 95}; 96static const struct nat64_methods nat64_netisr = { 97 .output = nat64_output, 98 .output_one = nat64_output_one 99}; 100static const struct nat64_methods nat64_direct = { 101 .output = nat64_direct_output, 102 .output_one = nat64_direct_output_one 103}; 104static VNET_DEFINE(const struct nat64_methods *, nat64out) = &nat64_netisr; 105#define V_nat64out VNET(nat64out) 106 107void 108nat64_set_output_method(int direct) 109{ 110 111 V_nat64out = direct != 0 ? &nat64_direct: &nat64_netisr; 112} 113 114int 115nat64_get_output_method(void) 116{ 117 118 return (V_nat64out == &nat64_direct ? 1: 0); 119} 120 121static void 122nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) 123{ 124 125 logdata->dir = PF_OUT; 126 logdata->af = family; 127 ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); 128} 129 130static int 131nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 132 struct nat64_counters *stats, void *logdata) 133{ 134 int error; 135 136 if (logdata != NULL) 137 nat64_log(logdata, m, dst->sa_family); 138 error = (*ifp->if_output)(ifp, m, dst, NULL); 139 if (error != 0) 140 NAT64STAT_INC(stats, oerrors); 141 return (error); 142} 143 144static int 145nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats, 146 void *logdata) 147{ 148 struct nhop6_basic nh6; 149 struct nhop4_basic nh4; 150 struct sockaddr_in6 dst6; 151 struct sockaddr_in dst4; 152 struct sockaddr *dst; 153 struct ip6_hdr *ip6; 154 struct ip *ip4; 155 struct ifnet *ifp; 156 int error; 157 158 ip4 = mtod(m, struct ip *); 159 switch (ip4->ip_v) { 160 case IPVERSION: 161 dst4.sin_addr = ip4->ip_dst; 162 error = nat64_find_route4(&nh4, &dst4, m); 163 if (error != 0) 164 NAT64STAT_INC(stats, noroute4); 165 else { 166 ifp = nh4.nh_ifp; 167 dst = (struct sockaddr *)&dst4; 168 } 169 break; 170 case (IPV6_VERSION >> 4): 171 ip6 = mtod(m, struct ip6_hdr *); 172 dst6.sin6_addr = ip6->ip6_dst; 173 error = nat64_find_route6(&nh6, &dst6, m); 174 if (error != 0) 175 NAT64STAT_INC(stats, noroute6); 176 else { 177 ifp = nh6.nh_ifp; 178 dst = (struct sockaddr *)&dst6; 179 } 180 break; 181 default: 182 m_freem(m); 183 NAT64STAT_INC(stats, dropped); 184 DPRINTF(DP_DROPS, "dropped due to unknown IP version"); 185 return (EAFNOSUPPORT); 186 } 187 if (error != 0) { 188 m_freem(m); 189 return (EHOSTUNREACH); 190 } 191 if (logdata != NULL) 192 nat64_log(logdata, m, dst->sa_family); 193 error = (*ifp->if_output)(ifp, m, dst, NULL); 194 if (error != 0) 195 NAT64STAT_INC(stats, oerrors); 196 return (error); 197} 198 199static int 200nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 201 struct nat64_counters *stats, void *logdata) 202{ 203 struct ip *ip4; 204 int ret, af; 205 206 ip4 = mtod(m, struct ip *); 207 switch (ip4->ip_v) { 208 case IPVERSION: 209 af = AF_INET; 210 ret = NETISR_IP; 211 break; 212 case (IPV6_VERSION >> 4): 213 af = AF_INET6; 214 ret = NETISR_IPV6; 215 break; 216 default: 217 m_freem(m); 218 NAT64STAT_INC(stats, dropped); 219 DPRINTF(DP_DROPS, "unknown IP version"); 220 return (EAFNOSUPPORT); 221 } 222 if (logdata != NULL) 223 nat64_log(logdata, m, af); 224 if (m->m_pkthdr.rcvif == NULL) 225 m->m_pkthdr.rcvif = V_loif; 226 ret = netisr_queue(ret, m); 227 if (ret != 0) 228 NAT64STAT_INC(stats, oerrors); 229 return (ret); 230} 231 232static int 233nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata) 234{ 235 236 return (nat64_output(NULL, m, NULL, stats, logdata)); 237} 238 239/* 240 * Check the given IPv6 prefix and length according to RFC6052: 241 * The prefixes can only have one of the following lengths: 242 * 32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long). 243 * Returns zero on success, otherwise EINVAL. 244 */ 245int 246nat64_check_prefixlen(int length) 247{ 248 249 switch (length) { 250 case 32: 251 case 40: 252 case 48: 253 case 56: 254 case 64: 255 case 96: 256 return (0); 257 } 258 return (EINVAL); 259} 260 261int 262nat64_check_prefix6(const struct in6_addr *prefix, int length) 263{ 264 265 if (nat64_check_prefixlen(length) != 0) 266 return (EINVAL); 267 268 /* Well-known prefix has 96 prefix length */ 269 if (IN6_IS_ADDR_WKPFX(prefix) && length != 96) 270 return (EINVAL); 271 272 /* Bits 64 to 71 must be set to zero */ 273 if (prefix->__u6_addr.__u6_addr8[8] != 0) 274 return (EINVAL); 275 276 /* Some extra checks */ 277 if (IN6_IS_ADDR_MULTICAST(prefix) || 278 IN6_IS_ADDR_UNSPECIFIED(prefix) || 279 IN6_IS_ADDR_LOOPBACK(prefix)) 280 return (EINVAL); 281 return (0); 282} 283 284int 285nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia) 286{ 287 288 if (cfg->flags & NAT64_ALLOW_PRIVATE) 289 return (0); 290 291 /* WKPFX must not be used to represent non-global IPv4 addresses */ 292 if (cfg->flags & NAT64_WKPFX) { 293 /* IN_PRIVATE */ 294 if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || 295 (ia & htonl(0xfff00000)) == htonl(0xac100000) || 296 (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) 297 return (1); 298 /* 299 * RFC 5735: 300 * 192.0.0.0/24 - reserved for IETF protocol assignments 301 * 192.88.99.0/24 - for use as 6to4 relay anycast addresses 302 * 198.18.0.0/15 - for use in benchmark tests 303 * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use 304 * in documentation and example code 305 */ 306 if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || 307 (ia & htonl(0xffffff00)) == htonl(0xc0586300) || 308 (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || 309 (ia & htonl(0xffffff00)) == htonl(0xc0000200) || 310 (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || 311 (ia & htonl(0xffffff00)) == htonl(0xcb007100)) 312 return (1); 313 } 314 return (0); 315} 316 317/* 318 * Embed @ia IPv4 address into @ip6 IPv6 address. 319 * Place to embedding determined from prefix length @plen. 320 */ 321void 322nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia) 323{ 324 325 switch (plen) { 326 case 32: 327 case 96: 328 ip6->s6_addr32[plen / 32] = ia; 329 break; 330 case 40: 331 case 48: 332 case 56: 333 /* 334 * Preserve prefix bits. 335 * Since suffix bits should be zero and reserved for future 336 * use, we just overwrite the whole word, where they are. 337 */ 338 ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32); 339#if BYTE_ORDER == BIG_ENDIAN 340 ip6->s6_addr32[1] |= ia >> (plen % 32); 341 ip6->s6_addr32[2] = ia << (24 - plen % 32); 342#elif BYTE_ORDER == LITTLE_ENDIAN 343 ip6->s6_addr32[1] |= ia << (plen % 32); 344 ip6->s6_addr32[2] = ia >> (24 - plen % 32); 345#endif 346 break; 347 case 64: 348#if BYTE_ORDER == BIG_ENDIAN 349 ip6->s6_addr32[2] = ia >> 8; 350 ip6->s6_addr32[3] = ia << 24; 351#elif BYTE_ORDER == LITTLE_ENDIAN 352 ip6->s6_addr32[2] = ia << 8; 353 ip6->s6_addr32[3] = ia >> 24; 354#endif 355 break; 356 default: 357 panic("Wrong plen: %d", plen); 358 }; 359 /* 360 * Bits 64 to 71 of the address are reserved for compatibility 361 * with the host identifier format defined in the IPv6 addressing 362 * architecture [RFC4291]. These bits MUST be set to zero. 363 */ 364 ip6->s6_addr8[8] = 0; 365} 366 367in_addr_t 368nat64_extract_ip4(const struct in6_addr *ip6, int plen) 369{ 370 in_addr_t ia; 371 372 /* 373 * According to RFC 6052 p2.2: 374 * IPv4-embedded IPv6 addresses are composed of a variable-length 375 * prefix, the embedded IPv4 address, and a variable length suffix. 376 * The suffix bits are reserved for future extensions and SHOULD 377 * be set to zero. 378 */ 379 switch (plen) { 380 case 32: 381 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) 382 goto badip6; 383 break; 384 case 40: 385 if (ip6->s6_addr32[3] != 0 || 386 (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) 387 goto badip6; 388 break; 389 case 48: 390 if (ip6->s6_addr32[3] != 0 || 391 (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) 392 goto badip6; 393 break; 394 case 56: 395 if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) 396 goto badip6; 397 break; 398 case 64: 399 if (ip6->s6_addr8[8] != 0 || 400 (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) 401 goto badip6; 402 }; 403 switch (plen) { 404 case 32: 405 case 96: 406 ia = ip6->s6_addr32[plen / 32]; 407 break; 408 case 40: 409 case 48: 410 case 56: 411#if BYTE_ORDER == BIG_ENDIAN 412 ia = (ip6->s6_addr32[1] << (plen % 32)) | 413 (ip6->s6_addr32[2] >> (24 - plen % 32)); 414#elif BYTE_ORDER == LITTLE_ENDIAN 415 ia = (ip6->s6_addr32[1] >> (plen % 32)) | 416 (ip6->s6_addr32[2] << (24 - plen % 32)); 417#endif 418 break; 419 case 64: 420#if BYTE_ORDER == BIG_ENDIAN 421 ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); 422#elif BYTE_ORDER == LITTLE_ENDIAN 423 ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); 424#endif 425 break; 426 default: 427 return (0); 428 }; 429 if (nat64_check_ip4(ia) == 0) 430 return (ia); 431 432 DPRINTF(DP_GENERIC | DP_DROPS, 433 "invalid destination address: %08x", ia); 434 return (0); 435badip6: 436 DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address"); 437 return (0); 438} 439 440/* 441 * According to RFC 1624 the equation for incremental checksum update is: 442 * HC' = ~(~HC + ~m + m') -- [Eqn. 3] 443 * HC' = HC - ~m - m' -- [Eqn. 4] 444 * So, when we are replacing IPv4 addresses to IPv6, we 445 * can assume, that new bytes previously were zeros, and vise versa - 446 * when we replacing IPv6 addresses to IPv4, now unused bytes become 447 * zeros. The payload length in pseudo header has bigger size, but one 448 * half of it should be zero. Using the equation 4 we get: 449 * HC' = HC - (~m0 + m0') -- m0 is first changed word 450 * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word 451 * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = 452 * = HC - sum(~m[i] + m'[i]) 453 * 454 * The function result should be used as follows: 455 * IPv6 to IPv4: HC' = cksum_add(HC, result) 456 * IPv4 to IPv6: HC' = cksum_add(HC, ~result) 457 */ 458static uint16_t 459nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) 460{ 461 uint32_t sum; 462 uint16_t *p; 463 464 sum = ~ip->ip_src.s_addr >> 16; 465 sum += ~ip->ip_src.s_addr & 0xffff; 466 sum += ~ip->ip_dst.s_addr >> 16; 467 sum += ~ip->ip_dst.s_addr & 0xffff; 468 469 for (p = (uint16_t *)&ip6->ip6_src; 470 p < (uint16_t *)(&ip6->ip6_src + 2); p++) 471 sum += *p; 472 473 while (sum >> 16) 474 sum = (sum & 0xffff) + (sum >> 16); 475 return (sum); 476} 477 478static void 479nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, 480 uint16_t plen, uint8_t proto, struct ip *ip) 481{ 482 483 /* assume addresses are already initialized */ 484 ip->ip_v = IPVERSION; 485 ip->ip_hl = sizeof(*ip) >> 2; 486 ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 487 ip->ip_len = htons(sizeof(*ip) + plen); 488 ip->ip_ttl = ip6->ip6_hlim; 489 /* Forwarding code will decrement TTL for netisr based output. */ 490 if (V_nat64out == &nat64_direct) 491 ip->ip_ttl -= IPV6_HLIMDEC; 492 ip->ip_sum = 0; 493 ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; 494 ip_fillid(ip); 495 if (frag != NULL) { 496 ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); 497 if (frag->ip6f_offlg & IP6F_MORE_FRAG) 498 ip->ip_off |= htons(IP_MF); 499 } else { 500 ip->ip_off = htons(IP_DF); 501 } 502 ip->ip_sum = in_cksum_hdr(ip); 503} 504 505#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) 506static NAT64NOINLINE int 507nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6, 508 struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id, 509 uint16_t ip_off) 510{ 511 struct ip6_frag ip6f; 512 struct mbuf *n; 513 uint16_t hlen, len, offset; 514 int plen; 515 516 plen = ntohs(ip6->ip6_plen); 517 hlen = sizeof(struct ip6_hdr); 518 519 /* Fragmentation isn't needed */ 520 if (ip_off == 0 && plen <= mtu - hlen) { 521 M_PREPEND(m, hlen, M_NOWAIT); 522 if (m == NULL) { 523 NAT64STAT_INC(stats, nomem); 524 return (ENOMEM); 525 } 526 bcopy(ip6, mtod(m, void *), hlen); 527 if (mbufq_enqueue(mq, m) != 0) { 528 m_freem(m); 529 NAT64STAT_INC(stats, dropped); 530 DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); 531 return (ENOBUFS); 532 } 533 return (0); 534 } 535 536 hlen += sizeof(struct ip6_frag); 537 ip6f.ip6f_reserved = 0; 538 ip6f.ip6f_nxt = ip6->ip6_nxt; 539 ip6->ip6_nxt = IPPROTO_FRAGMENT; 540 if (ip_off != 0) { 541 /* 542 * We have got an IPv4 fragment. 543 * Use offset value and ip_id from original fragment. 544 */ 545 ip6f.ip6f_ident = htonl(ntohs(ip_id)); 546 offset = (ntohs(ip_off) & IP_OFFMASK) << 3; 547 NAT64STAT_INC(stats, ifrags); 548 } else { 549 /* The packet size exceeds interface MTU */ 550 ip6f.ip6f_ident = htonl(ip6_randomid()); 551 offset = 0; /* First fragment*/ 552 } 553 while (plen > 0 && m != NULL) { 554 n = NULL; 555 len = FRAGSZ(mtu) & ~7; 556 if (len > plen) 557 len = plen; 558 ip6->ip6_plen = htons(len + sizeof(ip6f)); 559 ip6f.ip6f_offlg = ntohs(offset); 560 if (len < plen || (ip_off & htons(IP_MF)) != 0) 561 ip6f.ip6f_offlg |= IP6F_MORE_FRAG; 562 offset += len; 563 plen -= len; 564 if (plen > 0) { 565 n = m_split(m, len, M_NOWAIT); 566 if (n == NULL) 567 goto fail; 568 } 569 M_PREPEND(m, hlen, M_NOWAIT); 570 if (m == NULL) 571 goto fail; 572 bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); 573 bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), 574 sizeof(struct ip6_frag)); 575 if (mbufq_enqueue(mq, m) != 0) 576 goto fail; 577 m = n; 578 } 579 NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); 580 return (0); 581fail: 582 if (m != NULL) 583 m_freem(m); 584 if (n != NULL) 585 m_freem(n); 586 mbufq_drain(mq); 587 NAT64STAT_INC(stats, nomem); 588 return (ENOMEM); 589} 590 591static NAT64NOINLINE int 592nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst, 593 struct mbuf *m) 594{ 595 596 if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0, 597 pnh) != 0) 598 return (EHOSTUNREACH); 599 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT)) 600 return (EHOSTUNREACH); 601 /* 602 * XXX: we need to use destination address with embedded scope 603 * zone id, because LLTABLE uses such form of addresses for lookup. 604 */ 605 dst->sin6_family = AF_INET6; 606 dst->sin6_len = sizeof(*dst); 607 dst->sin6_addr = pnh->nh_addr; 608 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 609 dst->sin6_addr.s6_addr16[1] = 610 htons(pnh->nh_ifp->if_index & 0xffff); 611 dst->sin6_port = 0; 612 dst->sin6_scope_id = 0; 613 dst->sin6_flowinfo = 0; 614 615 return (0); 616} 617 618#define NAT64_ICMP6_PLEN 64 619static NAT64NOINLINE void 620nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, 621 struct nat64_counters *stats, void *logdata) 622{ 623 struct icmp6_hdr *icmp6; 624 struct ip6_hdr *ip6, *oip6; 625 struct mbuf *n; 626 int len, plen; 627 628 len = 0; 629 plen = nat64_getlasthdr(m, &len); 630 if (plen < 0) { 631 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 632 goto freeit; 633 } 634 /* 635 * Do not send ICMPv6 in reply to ICMPv6 errors. 636 */ 637 if (plen == IPPROTO_ICMPV6) { 638 if (m->m_len < len + sizeof(*icmp6)) { 639 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 640 goto freeit; 641 } 642 icmp6 = mtodo(m, len); 643 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || 644 icmp6->icmp6_type == ND_REDIRECT) { 645 DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " 646 "ICMPv6 errors"); 647 goto freeit; 648 } 649 } 650 /* 651 if (icmp6_ratelimit(&ip6->ip6_src, type, code)) 652 goto freeit; 653 */ 654 ip6 = mtod(m, struct ip6_hdr *); 655 switch (type) { 656 case ICMP6_DST_UNREACH: 657 case ICMP6_PACKET_TOO_BIG: 658 case ICMP6_TIME_EXCEEDED: 659 case ICMP6_PARAM_PROB: 660 break; 661 default: 662 goto freeit; 663 } 664 /* Calculate length of ICMPv6 payload */ 665 len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: 666 m->m_pkthdr.len; 667 668 /* Create new ICMPv6 datagram */ 669 plen = len + sizeof(struct icmp6_hdr); 670 n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, 671 MT_HEADER, M_PKTHDR); 672 if (n == NULL) { 673 NAT64STAT_INC(stats, nomem); 674 m_freem(m); 675 return; 676 } 677 /* 678 * Move pkthdr from original mbuf. We should have initialized some 679 * fields, because we can reinject this mbuf to netisr and it will 680 * go trough input path (it requires at least rcvif should be set). 681 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf 682 * in the chain, when we will do M_PREPEND() or make some type of 683 * tunneling. 684 */ 685 m_move_pkthdr(n, m); 686 M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); 687 688 n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; 689 oip6 = mtod(n, struct ip6_hdr *); 690 oip6->ip6_src = ip6->ip6_dst; 691 oip6->ip6_dst = ip6->ip6_src; 692 oip6->ip6_nxt = IPPROTO_ICMPV6; 693 oip6->ip6_flow = 0; 694 oip6->ip6_vfc |= IPV6_VERSION; 695 oip6->ip6_hlim = V_ip6_defhlim; 696 oip6->ip6_plen = htons(plen); 697 698 icmp6 = mtodo(n, sizeof(struct ip6_hdr)); 699 icmp6->icmp6_cksum = 0; 700 icmp6->icmp6_type = type; 701 icmp6->icmp6_code = code; 702 icmp6->icmp6_mtu = htonl(mtu); 703 704 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + 705 sizeof(struct icmp6_hdr))); 706 icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, 707 sizeof(struct ip6_hdr), plen); 708 m_freem(m); 709 V_nat64out->output_one(n, stats, logdata); 710 return; 711freeit: 712 NAT64STAT_INC(stats, dropped); 713 m_freem(m); 714} 715 716static NAT64NOINLINE int 717nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst, 718 struct mbuf *m) 719{ 720 721 if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0) 722 return (EHOSTUNREACH); 723 if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT)) 724 return (EHOSTUNREACH); 725 726 dst->sin_family = AF_INET; 727 dst->sin_len = sizeof(*dst); 728 dst->sin_addr = pnh->nh_addr; 729 dst->sin_port = 0; 730 return (0); 731} 732 733#define NAT64_ICMP_PLEN 64 734static NAT64NOINLINE void 735nat64_icmp_reflect(struct mbuf *m, uint8_t type, 736 uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata) 737{ 738 struct icmp *icmp; 739 struct ip *ip, *oip; 740 struct mbuf *n; 741 int len, plen; 742 743 ip = mtod(m, struct ip *); 744 /* Do not send ICMP error if packet is not the first fragment */ 745 if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { 746 DPRINTF(DP_DROPS, "not first fragment"); 747 goto freeit; 748 } 749 /* Do not send ICMP in reply to ICMP errors */ 750 if (ip->ip_p == IPPROTO_ICMP) { 751 if (m->m_len < (ip->ip_hl << 2)) { 752 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 753 goto freeit; 754 } 755 icmp = mtodo(m, ip->ip_hl << 2); 756 if (!ICMP_INFOTYPE(icmp->icmp_type)) { 757 DPRINTF(DP_DROPS, "do not send ICMP in reply to " 758 "ICMP errors"); 759 goto freeit; 760 } 761 } 762 switch (type) { 763 case ICMP_UNREACH: 764 case ICMP_TIMXCEED: 765 case ICMP_PARAMPROB: 766 break; 767 default: 768 goto freeit; 769 } 770 /* Calculate length of ICMP payload */ 771 len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: 772 m->m_pkthdr.len; 773 774 /* Create new ICMPv4 datagram */ 775 plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); 776 n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, 777 MT_HEADER, M_PKTHDR); 778 if (n == NULL) { 779 NAT64STAT_INC(stats, nomem); 780 m_freem(m); 781 return; 782 } 783 m_move_pkthdr(n, m); 784 M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); 785 786 n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; 787 oip = mtod(n, struct ip *); 788 oip->ip_v = IPVERSION; 789 oip->ip_hl = sizeof(struct ip) >> 2; 790 oip->ip_tos = 0; 791 oip->ip_len = htons(n->m_pkthdr.len); 792 oip->ip_ttl = V_ip_defttl; 793 oip->ip_p = IPPROTO_ICMP; 794 ip_fillid(oip); 795 oip->ip_off = htons(IP_DF); 796 oip->ip_src = ip->ip_dst; 797 oip->ip_dst = ip->ip_src; 798 oip->ip_sum = 0; 799 oip->ip_sum = in_cksum_hdr(oip); 800 801 icmp = mtodo(n, sizeof(struct ip)); 802 icmp->icmp_type = type; 803 icmp->icmp_code = code; 804 icmp->icmp_cksum = 0; 805 icmp->icmp_pmvoid = 0; 806 icmp->icmp_nextmtu = htons(mtu); 807 m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + 808 sizeof(struct icmphdr) + sizeof(uint32_t))); 809 icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, 810 sizeof(struct ip)); 811 m_freem(m); 812 V_nat64out->output_one(n, stats, logdata); 813 return; 814freeit: 815 NAT64STAT_INC(stats, dropped); 816 m_freem(m); 817} 818 819/* Translate ICMP echo request/reply into ICMPv6 */ 820static void 821nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, 822 uint16_t id, uint8_t type) 823{ 824 uint16_t old; 825 826 old = *(uint16_t *)icmp6; /* save type+code in one word */ 827 icmp6->icmp6_type = type; 828 /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ 829 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 830 old, *(uint16_t *)icmp6); 831 if (id != 0) { 832 old = icmp6->icmp6_id; 833 icmp6->icmp6_id = id; 834 /* Reflect ICMP id translation in the cksum */ 835 icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, 836 old, id); 837 } 838 /* Reflect IPv6 pseudo header in the cksum */ 839 icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 840 IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); 841} 842 843static NAT64NOINLINE struct mbuf * 844nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, 845 int offset, struct nat64_config *cfg) 846{ 847 struct ip ip; 848 struct icmp *icmp; 849 struct tcphdr *tcp; 850 struct udphdr *udp; 851 struct ip6_hdr *eip6; 852 struct mbuf *n; 853 uint32_t mtu; 854 int len, hlen, plen; 855 uint8_t type, code; 856 857 if (m->m_len < offset + ICMP_MINLEN) 858 m = m_pullup(m, offset + ICMP_MINLEN); 859 if (m == NULL) { 860 NAT64STAT_INC(&cfg->stats, nomem); 861 return (m); 862 } 863 mtu = 0; 864 icmp = mtodo(m, offset); 865 /* RFC 7915 p4.2 */ 866 switch (icmp->icmp_type) { 867 case ICMP_ECHOREPLY: 868 type = ICMP6_ECHO_REPLY; 869 code = 0; 870 break; 871 case ICMP_UNREACH: 872 type = ICMP6_DST_UNREACH; 873 switch (icmp->icmp_code) { 874 case ICMP_UNREACH_NET: 875 case ICMP_UNREACH_HOST: 876 case ICMP_UNREACH_SRCFAIL: 877 case ICMP_UNREACH_NET_UNKNOWN: 878 case ICMP_UNREACH_HOST_UNKNOWN: 879 case ICMP_UNREACH_TOSNET: 880 case ICMP_UNREACH_TOSHOST: 881 code = ICMP6_DST_UNREACH_NOROUTE; 882 break; 883 case ICMP_UNREACH_PROTOCOL: 884 type = ICMP6_PARAM_PROB; 885 code = ICMP6_PARAMPROB_NEXTHEADER; 886 break; 887 case ICMP_UNREACH_PORT: 888 code = ICMP6_DST_UNREACH_NOPORT; 889 break; 890 case ICMP_UNREACH_NEEDFRAG: 891 type = ICMP6_PACKET_TOO_BIG; 892 code = 0; 893 /* XXX: needs an additional look */ 894 mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); 895 break; 896 case ICMP_UNREACH_NET_PROHIB: 897 case ICMP_UNREACH_HOST_PROHIB: 898 case ICMP_UNREACH_FILTER_PROHIB: 899 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 900 code = ICMP6_DST_UNREACH_ADMIN; 901 break; 902 default: 903 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 904 icmp->icmp_type, icmp->icmp_code); 905 goto freeit; 906 } 907 break; 908 case ICMP_TIMXCEED: 909 type = ICMP6_TIME_EXCEEDED; 910 code = icmp->icmp_code; 911 break; 912 case ICMP_ECHO: 913 type = ICMP6_ECHO_REQUEST; 914 code = 0; 915 break; 916 case ICMP_PARAMPROB: 917 type = ICMP6_PARAM_PROB; 918 switch (icmp->icmp_code) { 919 case ICMP_PARAMPROB_ERRATPTR: 920 case ICMP_PARAMPROB_LENGTH: 921 code = ICMP6_PARAMPROB_HEADER; 922 switch (icmp->icmp_pptr) { 923 case 0: /* Version/IHL */ 924 case 1: /* Type Of Service */ 925 mtu = icmp->icmp_pptr; 926 break; 927 case 2: /* Total Length */ 928 case 3: mtu = 4; /* Payload Length */ 929 break; 930 case 8: /* Time to Live */ 931 mtu = 7; /* Hop Limit */ 932 break; 933 case 9: /* Protocol */ 934 mtu = 6; /* Next Header */ 935 break; 936 case 12: /* Source address */ 937 case 13: 938 case 14: 939 case 15: 940 mtu = 8; 941 break; 942 case 16: /* Destination address */ 943 case 17: 944 case 18: 945 case 19: 946 mtu = 24; 947 break; 948 default: /* Silently drop */ 949 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 950 " code %d, pptr %d", icmp->icmp_type, 951 icmp->icmp_code, icmp->icmp_pptr); 952 goto freeit; 953 } 954 break; 955 default: 956 DPRINTF(DP_DROPS, "Unsupported ICMP type %d," 957 " code %d, pptr %d", icmp->icmp_type, 958 icmp->icmp_code, icmp->icmp_pptr); 959 goto freeit; 960 } 961 break; 962 default: 963 DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", 964 icmp->icmp_type, icmp->icmp_code); 965 goto freeit; 966 } 967 /* 968 * For echo request/reply we can use original payload, 969 * but we need adjust icmp_cksum, because ICMPv6 cksum covers 970 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. 971 */ 972 if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { 973 nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); 974 return (m); 975 } 976 /* 977 * For other types of ICMP messages we need to translate inner 978 * IPv4 header to IPv6 header. 979 * Assume ICMP src is the same as payload dst 980 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header 981 * and ( NATIP1, Hostdst1 ) in ICMP copy header. 982 * In that case, we already have map for NATIP1 and GWsrc1. 983 * The only thing we need is to copy IPv6 map prefix to 984 * Hostdst1. 985 */ 986 hlen = offset + ICMP_MINLEN; 987 if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { 988 DPRINTF(DP_DROPS, "Message is too short %d", 989 m->m_pkthdr.len); 990 goto freeit; 991 } 992 m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); 993 if (ip.ip_v != IPVERSION) { 994 DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); 995 goto freeit; 996 } 997 hlen += ip.ip_hl << 2; /* Skip inner IP header */ 998 if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || 999 nat64_check_ip4(ip.ip_dst.s_addr) != 0 || 1000 nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 || 1001 nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) { 1002 DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", 1003 ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); 1004 goto freeit; 1005 } 1006 if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { 1007 DPRINTF(DP_DROPS, "Message is too short %d", 1008 m->m_pkthdr.len); 1009 goto freeit; 1010 } 1011#if 0 1012 /* 1013 * Check that inner source matches the outer destination. 1014 * XXX: We need some method to convert IPv4 into IPv6 address here, 1015 * and compare IPv6 addresses. 1016 */ 1017 if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { 1018 DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", 1019 "%04x vs %04x", ip.ip_src.s_addr, 1020 nat64_get_ip4(&ip6->ip6_dst)); 1021 goto freeit; 1022 } 1023#endif 1024 /* 1025 * Create new mbuf for ICMPv6 datagram. 1026 * NOTE: len is data length just after inner IP header. 1027 */ 1028 len = m->m_pkthdr.len - hlen; 1029 if (sizeof(struct ip6_hdr) + 1030 sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) 1031 len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - 1032 sizeof(struct ip6_hdr); 1033 plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; 1034 n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); 1035 if (n == NULL) { 1036 NAT64STAT_INC(&cfg->stats, nomem); 1037 m_freem(m); 1038 return (NULL); 1039 } 1040 m_move_pkthdr(n, m); 1041 M_ALIGN(n, offset + plen + max_hdr); 1042 n->m_len = n->m_pkthdr.len = offset + plen; 1043 /* Adjust ip6_plen in outer header */ 1044 ip6->ip6_plen = htons(plen); 1045 /* Construct new inner IPv6 header */ 1046 eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); 1047 eip6->ip6_src = ip6->ip6_dst; 1048 1049 /* Use the same prefix that we have in outer header */ 1050 eip6->ip6_dst = ip6->ip6_src; 1051 MPASS(cfg->flags & NAT64_PLATPFX); 1052 nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr); 1053 1054 eip6->ip6_flow = htonl(ip.ip_tos << 20); 1055 eip6->ip6_vfc |= IPV6_VERSION; 1056 eip6->ip6_hlim = ip.ip_ttl; 1057 eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); 1058 eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; 1059 m_copydata(m, hlen, len, (char *)(eip6 + 1)); 1060 /* 1061 * We need to translate source port in the inner ULP header, 1062 * and adjust ULP checksum. 1063 */ 1064 switch (ip.ip_p) { 1065 case IPPROTO_TCP: 1066 if (len < offsetof(struct tcphdr, th_sum)) 1067 break; 1068 tcp = TCP(eip6 + 1); 1069 if (icmpid != 0) { 1070 tcp->th_sum = cksum_adjust(tcp->th_sum, 1071 tcp->th_sport, icmpid); 1072 tcp->th_sport = icmpid; 1073 } 1074 tcp->th_sum = cksum_add(tcp->th_sum, 1075 ~nat64_cksum_convert(eip6, &ip)); 1076 break; 1077 case IPPROTO_UDP: 1078 if (len < offsetof(struct udphdr, uh_sum)) 1079 break; 1080 udp = UDP(eip6 + 1); 1081 if (icmpid != 0) { 1082 udp->uh_sum = cksum_adjust(udp->uh_sum, 1083 udp->uh_sport, icmpid); 1084 udp->uh_sport = icmpid; 1085 } 1086 udp->uh_sum = cksum_add(udp->uh_sum, 1087 ~nat64_cksum_convert(eip6, &ip)); 1088 break; 1089 case IPPROTO_ICMP: 1090 /* 1091 * Check if this is an ICMP error message for echo request 1092 * that we sent. I.e. ULP in the data containing invoking 1093 * packet is IPPROTO_ICMP and its type is ICMP_ECHO. 1094 */ 1095 icmp = (struct icmp *)(eip6 + 1); 1096 if (icmp->icmp_type != ICMP_ECHO) { 1097 m_freem(n); 1098 goto freeit; 1099 } 1100 /* 1101 * For our client this original datagram should looks 1102 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. 1103 * Thus we need adjust icmp_cksum and convert type from 1104 * ICMP_ECHO to ICMP6_ECHO_REQUEST. 1105 */ 1106 nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, 1107 ICMP6_ECHO_REQUEST); 1108 } 1109 m_freem(m); 1110 /* Convert ICMPv4 into ICMPv6 header */ 1111 icmp = mtodo(n, offset); 1112 ICMP6(icmp)->icmp6_type = type; 1113 ICMP6(icmp)->icmp6_code = code; 1114 ICMP6(icmp)->icmp6_mtu = htonl(mtu); 1115 ICMP6(icmp)->icmp6_cksum = 0; 1116 ICMP6(icmp)->icmp6_cksum = cksum_add( 1117 ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), 1118 in_cksum_skip(n, n->m_pkthdr.len, offset)); 1119 return (n); 1120freeit: 1121 m_freem(m); 1122 NAT64STAT_INC(&cfg->stats, dropped); 1123 return (NULL); 1124} 1125 1126int 1127nat64_getlasthdr(struct mbuf *m, int *offset) 1128{ 1129 struct ip6_hdr *ip6; 1130 struct ip6_hbh *hbh; 1131 int proto, hlen; 1132 1133 if (offset != NULL) 1134 hlen = *offset; 1135 else 1136 hlen = 0; 1137 1138 if (m->m_len < hlen + sizeof(*ip6)) 1139 return (-1); 1140 1141 ip6 = mtodo(m, hlen); 1142 hlen += sizeof(*ip6); 1143 proto = ip6->ip6_nxt; 1144 /* Skip extension headers */ 1145 while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || 1146 proto == IPPROTO_DSTOPTS) { 1147 hbh = mtodo(m, hlen); 1148 /* 1149 * We expect mbuf has contigious data up to 1150 * upper level header. 1151 */ 1152 if (m->m_len < hlen) 1153 return (-1); 1154 /* 1155 * We doesn't support Jumbo payload option, 1156 * so return error. 1157 */ 1158 if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) 1159 return (-1); 1160 proto = hbh->ip6h_nxt; 1161 hlen += (hbh->ip6h_len + 1) << 3; 1162 } 1163 if (offset != NULL) 1164 *offset = hlen; 1165 return (proto); 1166} 1167 1168int 1169nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, 1170 struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg, 1171 void *logdata) 1172{ 1173 struct nhop6_basic nh; 1174 struct ip6_hdr ip6; 1175 struct sockaddr_in6 dst; 1176 struct ip *ip; 1177 struct mbufq mq; 1178 uint16_t ip_id, ip_off; 1179 uint16_t *csum; 1180 int plen, hlen; 1181 uint8_t proto; 1182 1183 ip = mtod(m, struct ip*); 1184 1185 if (ip->ip_ttl <= IPTTLDEC) { 1186 nat64_icmp_reflect(m, ICMP_TIMXCEED, 1187 ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata); 1188 return (NAT64RETURN); 1189 } 1190 1191 ip6.ip6_dst = *daddr; 1192 ip6.ip6_src = *saddr; 1193 1194 hlen = ip->ip_hl << 2; 1195 plen = ntohs(ip->ip_len) - hlen; 1196 proto = ip->ip_p; 1197 1198 /* Save ip_id and ip_off, both are in network byte order */ 1199 ip_id = ip->ip_id; 1200 ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); 1201 1202 /* Fragment length must be multiple of 8 octets */ 1203 if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { 1204 nat64_icmp_reflect(m, ICMP_PARAMPROB, 1205 ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata); 1206 return (NAT64RETURN); 1207 } 1208 /* Fragmented ICMP is unsupported */ 1209 if (proto == IPPROTO_ICMP && ip_off != 0) { 1210 DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); 1211 NAT64STAT_INC(&cfg->stats, dropped); 1212 return (NAT64MFREE); 1213 } 1214 1215 dst.sin6_addr = ip6.ip6_dst; 1216 if (nat64_find_route6(&nh, &dst, m) != 0) { 1217 NAT64STAT_INC(&cfg->stats, noroute6); 1218 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 1219 &cfg->stats, logdata); 1220 return (NAT64RETURN); 1221 } 1222 if (nh.nh_mtu < plen + sizeof(ip6) && 1223 (ip->ip_off & htons(IP_DF)) != 0) { 1224 nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 1225 FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata); 1226 return (NAT64RETURN); 1227 } 1228 1229 ip6.ip6_flow = htonl(ip->ip_tos << 20); 1230 ip6.ip6_vfc |= IPV6_VERSION; 1231 ip6.ip6_hlim = ip->ip_ttl; 1232 /* Forwarding code will decrement TTL for netisr based output. */ 1233 if (V_nat64out == &nat64_direct) 1234 ip6.ip6_hlim -= IPTTLDEC; 1235 ip6.ip6_plen = htons(plen); 1236 ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; 1237 1238 /* Handle delayed checksums if needed. */ 1239 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1240 in_delayed_cksum(m); 1241 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1242 } 1243 /* Convert checksums. */ 1244 switch (proto) { 1245 case IPPROTO_TCP: 1246 csum = &TCP(mtodo(m, hlen))->th_sum; 1247 if (lport != 0) { 1248 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1249 *csum = cksum_adjust(*csum, tcp->th_dport, lport); 1250 tcp->th_dport = lport; 1251 } 1252 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1253 break; 1254 case IPPROTO_UDP: 1255 csum = &UDP(mtodo(m, hlen))->uh_sum; 1256 if (lport != 0) { 1257 struct udphdr *udp = UDP(mtodo(m, hlen)); 1258 *csum = cksum_adjust(*csum, udp->uh_dport, lport); 1259 udp->uh_dport = lport; 1260 } 1261 *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); 1262 break; 1263 case IPPROTO_ICMP: 1264 m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg); 1265 if (m == NULL) /* stats already accounted */ 1266 return (NAT64RETURN); 1267 } 1268 1269 m_adj(m, hlen); 1270 mbufq_init(&mq, 255); 1271 nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off); 1272 while ((m = mbufq_dequeue(&mq)) != NULL) { 1273 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1274 &cfg->stats, logdata) != 0) 1275 break; 1276 NAT64STAT_INC(&cfg->stats, opcnt46); 1277 } 1278 mbufq_drain(&mq); 1279 return (NAT64RETURN); 1280} 1281 1282int 1283nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, 1284 struct nat64_config *cfg, void *logdata) 1285{ 1286 struct ip ip; 1287 struct icmp6_hdr *icmp6; 1288 struct ip6_frag *ip6f; 1289 struct ip6_hdr *ip6, *ip6i; 1290 uint32_t mtu; 1291 int plen, proto; 1292 uint8_t type, code; 1293 1294 if (hlen == 0) { 1295 ip6 = mtod(m, struct ip6_hdr *); 1296 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1297 nat64_check_ip6(&ip6->ip6_dst) != 0) 1298 return (NAT64SKIP); 1299 1300 proto = nat64_getlasthdr(m, &hlen); 1301 if (proto != IPPROTO_ICMPV6) { 1302 DPRINTF(DP_DROPS, 1303 "dropped due to mbuf isn't contigious"); 1304 NAT64STAT_INC(&cfg->stats, dropped); 1305 return (NAT64MFREE); 1306 } 1307 } 1308 1309 /* 1310 * Translate ICMPv6 type and code to ICMPv4 (RFC7915). 1311 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). 1312 */ 1313 icmp6 = mtodo(m, hlen); 1314 mtu = 0; 1315 switch (icmp6->icmp6_type) { 1316 case ICMP6_DST_UNREACH: 1317 type = ICMP_UNREACH; 1318 switch (icmp6->icmp6_code) { 1319 case ICMP6_DST_UNREACH_NOROUTE: 1320 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1321 case ICMP6_DST_UNREACH_ADDR: 1322 code = ICMP_UNREACH_HOST; 1323 break; 1324 case ICMP6_DST_UNREACH_ADMIN: 1325 code = ICMP_UNREACH_HOST_PROHIB; 1326 break; 1327 case ICMP6_DST_UNREACH_NOPORT: 1328 code = ICMP_UNREACH_PORT; 1329 break; 1330 default: 1331 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1332 " code %d", icmp6->icmp6_type, 1333 icmp6->icmp6_code); 1334 NAT64STAT_INC(&cfg->stats, dropped); 1335 return (NAT64MFREE); 1336 } 1337 break; 1338 case ICMP6_PACKET_TOO_BIG: 1339 type = ICMP_UNREACH; 1340 code = ICMP_UNREACH_NEEDFRAG; 1341 mtu = ntohl(icmp6->icmp6_mtu); 1342 if (mtu < IPV6_MMTU) { 1343 DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," 1344 " code %d", mtu, icmp6->icmp6_type, 1345 icmp6->icmp6_code); 1346 NAT64STAT_INC(&cfg->stats, dropped); 1347 return (NAT64MFREE); 1348 } 1349 /* 1350 * Adjust MTU to reflect difference between 1351 * IPv6 an IPv4 headers. 1352 */ 1353 mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); 1354 break; 1355 case ICMP6_TIME_EXCEEDED: 1356 type = ICMP_TIMXCEED; 1357 code = icmp6->icmp6_code; 1358 break; 1359 case ICMP6_PARAM_PROB: 1360 switch (icmp6->icmp6_code) { 1361 case ICMP6_PARAMPROB_HEADER: 1362 type = ICMP_PARAMPROB; 1363 code = ICMP_PARAMPROB_ERRATPTR; 1364 mtu = ntohl(icmp6->icmp6_pptr); 1365 switch (mtu) { 1366 case 0: /* Version/Traffic Class */ 1367 case 1: /* Traffic Class/Flow Label */ 1368 break; 1369 case 4: /* Payload Length */ 1370 case 5: 1371 mtu = 2; 1372 break; 1373 case 6: /* Next Header */ 1374 mtu = 9; 1375 break; 1376 case 7: /* Hop Limit */ 1377 mtu = 8; 1378 break; 1379 default: 1380 if (mtu >= 8 && mtu <= 23) { 1381 mtu = 12; /* Source address */ 1382 break; 1383 } 1384 if (mtu >= 24 && mtu <= 39) { 1385 mtu = 16; /* Destination address */ 1386 break; 1387 } 1388 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1389 " code %d, pptr %d", icmp6->icmp6_type, 1390 icmp6->icmp6_code, mtu); 1391 NAT64STAT_INC(&cfg->stats, dropped); 1392 return (NAT64MFREE); 1393 } 1394 case ICMP6_PARAMPROB_NEXTHEADER: 1395 type = ICMP_UNREACH; 1396 code = ICMP_UNREACH_PROTOCOL; 1397 break; 1398 default: 1399 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," 1400 " code %d, pptr %d", icmp6->icmp6_type, 1401 icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); 1402 NAT64STAT_INC(&cfg->stats, dropped); 1403 return (NAT64MFREE); 1404 } 1405 break; 1406 default: 1407 DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", 1408 icmp6->icmp6_type, icmp6->icmp6_code); 1409 NAT64STAT_INC(&cfg->stats, dropped); 1410 return (NAT64MFREE); 1411 } 1412 1413 hlen += sizeof(struct icmp6_hdr); 1414 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { 1415 NAT64STAT_INC(&cfg->stats, dropped); 1416 DPRINTF(DP_DROPS, "Message is too short %d", 1417 m->m_pkthdr.len); 1418 return (NAT64MFREE); 1419 } 1420 /* 1421 * We need at least ICMP_MINLEN bytes of original datagram payload 1422 * to generate ICMP message. It is nice that ICMP_MINLEN is equal 1423 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment 1424 * header we will not have to do m_pullup() again. 1425 * 1426 * What we have here: 1427 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) 1428 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] 1429 * We need to translate it to: 1430 * 1431 * Outer header: (alias_host, v4exthost) 1432 * Inner header: (v4exthost, alias_host) [sport, alias_port] 1433 * 1434 * Assume caller function has checked if v4mapPRefix+v4host 1435 * matches configured prefix. 1436 * The only two things we should be provided with are mapping between 1437 * IPv6iHost <> alias_host and between dport and alias_port. 1438 */ 1439 if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) 1440 m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); 1441 if (m == NULL) { 1442 NAT64STAT_INC(&cfg->stats, nomem); 1443 return (NAT64RETURN); 1444 } 1445 ip6 = mtod(m, struct ip6_hdr *); 1446 ip6i = mtodo(m, hlen); 1447 ip6f = NULL; 1448 proto = ip6i->ip6_nxt; 1449 plen = ntohs(ip6i->ip6_plen); 1450 hlen += sizeof(struct ip6_hdr); 1451 if (proto == IPPROTO_FRAGMENT) { 1452 if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + 1453 ICMP_MINLEN) 1454 goto fail; 1455 ip6f = mtodo(m, hlen); 1456 proto = ip6f->ip6f_nxt; 1457 plen -= sizeof(struct ip6_frag); 1458 hlen += sizeof(struct ip6_frag); 1459 /* Ajust MTU to reflect frag header size */ 1460 if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) 1461 mtu -= sizeof(struct ip6_frag); 1462 } 1463 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { 1464 DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", 1465 proto); 1466 goto fail; 1467 } 1468 if (nat64_check_ip6(&ip6i->ip6_src) != 0 || 1469 nat64_check_ip6(&ip6i->ip6_dst) != 0) { 1470 DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); 1471 goto fail; 1472 } 1473 /* Check if outer dst is the same as inner src */ 1474 if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { 1475 DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); 1476 goto fail; 1477 } 1478 1479 /* Now we need to make a fake IPv4 packet to generate ICMP message */ 1480 ip.ip_dst.s_addr = aaddr; 1481 ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen); 1482 if (ip.ip_src.s_addr == 0) 1483 goto fail; 1484 /* XXX: Make fake ulp header */ 1485 if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */ 1486 ip6i->ip6_hlim += IPV6_HLIMDEC; 1487 nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); 1488 m_adj(m, hlen - sizeof(struct ip)); 1489 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1490 nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats, 1491 logdata); 1492 return (NAT64RETURN); 1493fail: 1494 /* 1495 * We must call m_freem() because mbuf pointer could be 1496 * changed with m_pullup(). 1497 */ 1498 m_freem(m); 1499 NAT64STAT_INC(&cfg->stats, dropped); 1500 return (NAT64RETURN); 1501} 1502 1503int 1504nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, 1505 struct nat64_config *cfg, void *logdata) 1506{ 1507 struct ip ip; 1508 struct nhop4_basic nh; 1509 struct sockaddr_in dst; 1510 struct ip6_frag *frag; 1511 struct ip6_hdr *ip6; 1512 struct icmp6_hdr *icmp6; 1513 uint16_t *csum; 1514 int plen, hlen, proto; 1515 1516 /* 1517 * XXX: we expect ipfw_chk() did m_pullup() up to upper level 1518 * protocol's headers. Also we skip some checks, that ip6_input(), 1519 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. 1520 */ 1521 ip6 = mtod(m, struct ip6_hdr *); 1522 if (nat64_check_ip6(&ip6->ip6_src) != 0 || 1523 nat64_check_ip6(&ip6->ip6_dst) != 0) { 1524 return (NAT64SKIP); 1525 } 1526 1527 /* Starting from this point we must not return zero */ 1528 ip.ip_src.s_addr = aaddr; 1529 if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { 1530 DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x", 1531 ip.ip_src.s_addr); 1532 NAT64STAT_INC(&cfg->stats, dropped); 1533 return (NAT64MFREE); 1534 } 1535 1536 ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen); 1537 if (ip.ip_dst.s_addr == 0) { 1538 NAT64STAT_INC(&cfg->stats, dropped); 1539 return (NAT64MFREE); 1540 } 1541 1542 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 1543 nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, 1544 ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata); 1545 return (NAT64RETURN); 1546 } 1547 1548 hlen = 0; 1549 plen = ntohs(ip6->ip6_plen); 1550 proto = nat64_getlasthdr(m, &hlen); 1551 if (proto < 0) { 1552 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1553 NAT64STAT_INC(&cfg->stats, dropped); 1554 return (NAT64MFREE); 1555 } 1556 frag = NULL; 1557 if (proto == IPPROTO_FRAGMENT) { 1558 /* ipfw_chk should m_pullup up to frag header */ 1559 if (m->m_len < hlen + sizeof(*frag)) { 1560 DPRINTF(DP_DROPS, 1561 "dropped due to mbuf isn't contigious"); 1562 NAT64STAT_INC(&cfg->stats, dropped); 1563 return (NAT64MFREE); 1564 } 1565 frag = mtodo(m, hlen); 1566 proto = frag->ip6f_nxt; 1567 hlen += sizeof(*frag); 1568 /* Fragmented ICMPv6 is unsupported */ 1569 if (proto == IPPROTO_ICMPV6) { 1570 DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); 1571 NAT64STAT_INC(&cfg->stats, dropped); 1572 return (NAT64MFREE); 1573 } 1574 /* Fragment length must be multiple of 8 octets */ 1575 if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && 1576 ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { 1577 nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, 1578 ICMP6_PARAMPROB_HEADER, 1579 offsetof(struct ip6_hdr, ip6_plen), &cfg->stats, 1580 logdata); 1581 return (NAT64RETURN); 1582 } 1583 } 1584 plen -= hlen - sizeof(struct ip6_hdr); 1585 if (plen < 0 || m->m_pkthdr.len < plen + hlen) { 1586 DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", 1587 plen, m->m_pkthdr.len, hlen); 1588 NAT64STAT_INC(&cfg->stats, dropped); 1589 return (NAT64MFREE); 1590 } 1591 1592 icmp6 = NULL; /* Make gcc happy */ 1593 if (proto == IPPROTO_ICMPV6) { 1594 icmp6 = mtodo(m, hlen); 1595 if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && 1596 icmp6->icmp6_type != ICMP6_ECHO_REPLY) 1597 return (nat64_handle_icmp6(m, hlen, aaddr, aport, 1598 cfg, logdata)); 1599 } 1600 dst.sin_addr.s_addr = ip.ip_dst.s_addr; 1601 if (nat64_find_route4(&nh, &dst, m) != 0) { 1602 NAT64STAT_INC(&cfg->stats, noroute4); 1603 nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, 1604 ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata); 1605 return (NAT64RETURN); 1606 } 1607 if (nh.nh_mtu < plen + sizeof(ip)) { 1608 nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu, 1609 &cfg->stats, logdata); 1610 return (NAT64RETURN); 1611 } 1612 nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); 1613 1614 /* Handle delayed checksums if needed. */ 1615 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { 1616 in6_delayed_cksum(m, plen, hlen); 1617 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; 1618 } 1619 /* Convert checksums. */ 1620 switch (proto) { 1621 case IPPROTO_TCP: 1622 csum = &TCP(mtodo(m, hlen))->th_sum; 1623 if (aport != 0) { 1624 struct tcphdr *tcp = TCP(mtodo(m, hlen)); 1625 *csum = cksum_adjust(*csum, tcp->th_sport, aport); 1626 tcp->th_sport = aport; 1627 } 1628 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1629 break; 1630 case IPPROTO_UDP: 1631 csum = &UDP(mtodo(m, hlen))->uh_sum; 1632 if (aport != 0) { 1633 struct udphdr *udp = UDP(mtodo(m, hlen)); 1634 *csum = cksum_adjust(*csum, udp->uh_sport, aport); 1635 udp->uh_sport = aport; 1636 } 1637 *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); 1638 break; 1639 case IPPROTO_ICMPV6: 1640 /* Checksum in ICMPv6 covers pseudo header */ 1641 csum = &icmp6->icmp6_cksum; 1642 *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, 1643 IPPROTO_ICMPV6, 0)); 1644 /* Convert ICMPv6 types to ICMP */ 1645 proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ 1646 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) 1647 icmp6->icmp6_type = ICMP_ECHO; 1648 else /* ICMP6_ECHO_REPLY */ 1649 icmp6->icmp6_type = ICMP_ECHOREPLY; 1650 *csum = cksum_adjust(*csum, (uint16_t)proto, 1651 *(uint16_t *)icmp6); 1652 if (aport != 0) { 1653 uint16_t old_id = icmp6->icmp6_id; 1654 icmp6->icmp6_id = aport; 1655 *csum = cksum_adjust(*csum, old_id, aport); 1656 } 1657 break; 1658 }; 1659 1660 m_adj(m, hlen - sizeof(ip)); 1661 bcopy(&ip, mtod(m, void *), sizeof(ip)); 1662 if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst, 1663 &cfg->stats, logdata) == 0) 1664 NAT64STAT_INC(&cfg->stats, opcnt64); 1665 return (NAT64RETURN); 1666} 1667 1668