1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/netpfil/ipfw/nat64/nat64_translate.c 364162 2020-08-12 12:08:50Z ae $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/counter.h>
35#include <sys/errno.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/mbuf.h>
39#include <sys/module.h>
40#include <sys/rmlock.h>
41#include <sys/rwlock.h>
42#include <sys/socket.h>
43#include <sys/queue.h>
44
45#include <net/if.h>
46#include <net/if_var.h>
47#include <net/if_pflog.h>
48#include <net/pfil.h>
49#include <net/netisr.h>
50#include <net/route.h>
51
52#include <netinet/in.h>
53#include <netinet/in_fib.h>
54#include <netinet/ip.h>
55#include <netinet/ip_var.h>
56#include <netinet/ip_fw.h>
57#include <netinet/ip6.h>
58#include <netinet/icmp6.h>
59#include <netinet/ip_icmp.h>
60#include <netinet/tcp.h>
61#include <netinet/udp.h>
62#include <netinet6/in6_var.h>
63#include <netinet6/in6_fib.h>
64#include <netinet6/ip6_var.h>
65#include <netinet6/ip_fw_nat64.h>
66
67#include <netpfil/pf/pf.h>
68#include <netpfil/ipfw/ip_fw_private.h>
69#include <machine/in_cksum.h>
70
71#include "ip_fw_nat64.h"
72#include "nat64_translate.h"
73
74
75typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *,
76    struct sockaddr *, struct nat64_counters *, void *);
77typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *,
78    void *);
79
80static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *,
81    struct mbuf *);
82static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *,
83    struct mbuf *);
84static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *);
85static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *,
86    struct nat64_counters *, void *);
87static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *,
88    void *);
89static int nat64_direct_output(struct ifnet *, struct mbuf *,
90    struct sockaddr *, struct nat64_counters *, void *);
91
92struct nat64_methods {
93	nat64_output_t		output;
94	nat64_output_one_t	output_one;
95};
96static const struct nat64_methods nat64_netisr = {
97	.output = nat64_output,
98	.output_one = nat64_output_one
99};
100static const struct nat64_methods nat64_direct = {
101	.output = nat64_direct_output,
102	.output_one = nat64_direct_output_one
103};
104static VNET_DEFINE(const struct nat64_methods *, nat64out) = &nat64_netisr;
105#define	V_nat64out	VNET(nat64out)
106
107void
108nat64_set_output_method(int direct)
109{
110
111	V_nat64out = direct != 0 ? &nat64_direct: &nat64_netisr;
112}
113
114int
115nat64_get_output_method(void)
116{
117
118	return (V_nat64out == &nat64_direct ? 1: 0);
119}
120
121static void
122nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
123{
124
125	logdata->dir = PF_OUT;
126	logdata->af = family;
127	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
128}
129
130static int
131nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
132    struct nat64_counters *stats, void *logdata)
133{
134	int error;
135
136	if (logdata != NULL)
137		nat64_log(logdata, m, dst->sa_family);
138	error = (*ifp->if_output)(ifp, m, dst, NULL);
139	if (error != 0)
140		NAT64STAT_INC(stats, oerrors);
141	return (error);
142}
143
144static int
145nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats,
146    void *logdata)
147{
148	struct nhop6_basic nh6;
149	struct nhop4_basic nh4;
150	struct sockaddr_in6 dst6;
151	struct sockaddr_in dst4;
152	struct sockaddr *dst;
153	struct ip6_hdr *ip6;
154	struct ip *ip4;
155	struct ifnet *ifp;
156	int error;
157
158	ip4 = mtod(m, struct ip *);
159	switch (ip4->ip_v) {
160	case IPVERSION:
161		dst4.sin_addr = ip4->ip_dst;
162		error = nat64_find_route4(&nh4, &dst4, m);
163		if (error != 0)
164			NAT64STAT_INC(stats, noroute4);
165		else {
166			ifp = nh4.nh_ifp;
167			dst = (struct sockaddr *)&dst4;
168		}
169		break;
170	case (IPV6_VERSION >> 4):
171		ip6 = mtod(m, struct ip6_hdr *);
172		dst6.sin6_addr = ip6->ip6_dst;
173		error = nat64_find_route6(&nh6, &dst6, m);
174		if (error != 0)
175			NAT64STAT_INC(stats, noroute6);
176		else {
177			ifp = nh6.nh_ifp;
178			dst = (struct sockaddr *)&dst6;
179		}
180		break;
181	default:
182		m_freem(m);
183		NAT64STAT_INC(stats, dropped);
184		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
185		return (EAFNOSUPPORT);
186	}
187	if (error != 0) {
188		m_freem(m);
189		return (EHOSTUNREACH);
190	}
191	if (logdata != NULL)
192		nat64_log(logdata, m, dst->sa_family);
193	error = (*ifp->if_output)(ifp, m, dst, NULL);
194	if (error != 0)
195		NAT64STAT_INC(stats, oerrors);
196	return (error);
197}
198
199static int
200nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
201    struct nat64_counters *stats, void *logdata)
202{
203	struct ip *ip4;
204	int ret, af;
205
206	ip4 = mtod(m, struct ip *);
207	switch (ip4->ip_v) {
208	case IPVERSION:
209		af = AF_INET;
210		ret = NETISR_IP;
211		break;
212	case (IPV6_VERSION >> 4):
213		af = AF_INET6;
214		ret = NETISR_IPV6;
215		break;
216	default:
217		m_freem(m);
218		NAT64STAT_INC(stats, dropped);
219		DPRINTF(DP_DROPS, "unknown IP version");
220		return (EAFNOSUPPORT);
221	}
222	if (logdata != NULL)
223		nat64_log(logdata, m, af);
224	if (m->m_pkthdr.rcvif == NULL)
225		m->m_pkthdr.rcvif = V_loif;
226	ret = netisr_queue(ret, m);
227	if (ret != 0)
228		NAT64STAT_INC(stats, oerrors);
229	return (ret);
230}
231
232static int
233nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
234{
235
236	return (nat64_output(NULL, m, NULL, stats, logdata));
237}
238
239/*
240 * Check the given IPv6 prefix and length according to RFC6052:
241 *   The prefixes can only have one of the following lengths:
242 *   32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).
243 * Returns zero on success, otherwise EINVAL.
244 */
245int
246nat64_check_prefixlen(int length)
247{
248
249	switch (length) {
250	case 32:
251	case 40:
252	case 48:
253	case 56:
254	case 64:
255	case 96:
256		return (0);
257	}
258	return (EINVAL);
259}
260
261int
262nat64_check_prefix6(const struct in6_addr *prefix, int length)
263{
264
265	if (nat64_check_prefixlen(length) != 0)
266		return (EINVAL);
267
268	/* Well-known prefix has 96 prefix length */
269	if (IN6_IS_ADDR_WKPFX(prefix) && length != 96)
270		return (EINVAL);
271
272	/* Bits 64 to 71 must be set to zero */
273	if (prefix->__u6_addr.__u6_addr8[8] != 0)
274		return (EINVAL);
275
276	/* Some extra checks */
277	if (IN6_IS_ADDR_MULTICAST(prefix) ||
278	    IN6_IS_ADDR_UNSPECIFIED(prefix) ||
279	    IN6_IS_ADDR_LOOPBACK(prefix))
280		return (EINVAL);
281	return (0);
282}
283
284int
285nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)
286{
287
288	if (cfg->flags & NAT64_ALLOW_PRIVATE)
289		return (0);
290
291	/* WKPFX must not be used to represent non-global IPv4 addresses */
292	if (cfg->flags & NAT64_WKPFX) {
293		/* IN_PRIVATE */
294		if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
295		    (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
296		    (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
297			return (1);
298		/*
299		 * RFC 5735:
300		 *  192.0.0.0/24 - reserved for IETF protocol assignments
301		 *  192.88.99.0/24 - for use as 6to4 relay anycast addresses
302		 *  198.18.0.0/15 - for use in benchmark tests
303		 *  192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
304		 *   in documentation and example code
305		 */
306		if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
307		    (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
308		    (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
309		    (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
310		    (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
311		    (ia & htonl(0xffffff00)) == htonl(0xcb007100))
312			return (1);
313	}
314	return (0);
315}
316
317/*
318 * Embed @ia IPv4 address into @ip6 IPv6 address.
319 * Place to embedding determined from prefix length @plen.
320 */
321void
322nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia)
323{
324
325	switch (plen) {
326	case 32:
327	case 96:
328		ip6->s6_addr32[plen / 32] = ia;
329		break;
330	case 40:
331	case 48:
332	case 56:
333		/*
334		 * Preserve prefix bits.
335		 * Since suffix bits should be zero and reserved for future
336		 * use, we just overwrite the whole word, where they are.
337		 */
338		ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32);
339#if BYTE_ORDER == BIG_ENDIAN
340		ip6->s6_addr32[1] |= ia >> (plen % 32);
341		ip6->s6_addr32[2] = ia << (24 - plen % 32);
342#elif BYTE_ORDER == LITTLE_ENDIAN
343		ip6->s6_addr32[1] |= ia << (plen % 32);
344		ip6->s6_addr32[2] = ia >> (24 - plen % 32);
345#endif
346		break;
347	case 64:
348#if BYTE_ORDER == BIG_ENDIAN
349		ip6->s6_addr32[2] = ia >> 8;
350		ip6->s6_addr32[3] = ia << 24;
351#elif BYTE_ORDER == LITTLE_ENDIAN
352		ip6->s6_addr32[2] = ia << 8;
353		ip6->s6_addr32[3] = ia >> 24;
354#endif
355		break;
356	default:
357		panic("Wrong plen: %d", plen);
358	};
359	/*
360	 * Bits 64 to 71 of the address are reserved for compatibility
361	 * with the host identifier format defined in the IPv6 addressing
362	 * architecture [RFC4291]. These bits MUST be set to zero.
363	 */
364	ip6->s6_addr8[8] = 0;
365}
366
367in_addr_t
368nat64_extract_ip4(const struct in6_addr *ip6, int plen)
369{
370	in_addr_t ia;
371
372	/*
373	 * According to RFC 6052 p2.2:
374	 * IPv4-embedded IPv6 addresses are composed of a variable-length
375	 * prefix, the embedded IPv4 address, and a variable length suffix.
376	 * The suffix bits are reserved for future extensions and SHOULD
377	 * be set to zero.
378	 */
379	switch (plen) {
380	case 32:
381		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
382			goto badip6;
383		break;
384	case 40:
385		if (ip6->s6_addr32[3] != 0 ||
386		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
387			goto badip6;
388		break;
389	case 48:
390		if (ip6->s6_addr32[3] != 0 ||
391		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
392			goto badip6;
393		break;
394	case 56:
395		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
396			goto badip6;
397		break;
398	case 64:
399		if (ip6->s6_addr8[8] != 0 ||
400		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
401			goto badip6;
402	};
403	switch (plen) {
404	case 32:
405	case 96:
406		ia = ip6->s6_addr32[plen / 32];
407		break;
408	case 40:
409	case 48:
410	case 56:
411#if BYTE_ORDER == BIG_ENDIAN
412		ia = (ip6->s6_addr32[1] << (plen % 32)) |
413		    (ip6->s6_addr32[2] >> (24 - plen % 32));
414#elif BYTE_ORDER == LITTLE_ENDIAN
415		ia = (ip6->s6_addr32[1] >> (plen % 32)) |
416		    (ip6->s6_addr32[2] << (24 - plen % 32));
417#endif
418		break;
419	case 64:
420#if BYTE_ORDER == BIG_ENDIAN
421		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
422#elif BYTE_ORDER == LITTLE_ENDIAN
423		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
424#endif
425		break;
426	default:
427		return (0);
428	};
429	if (nat64_check_ip4(ia) == 0)
430		return (ia);
431
432	DPRINTF(DP_GENERIC | DP_DROPS,
433	    "invalid destination address: %08x", ia);
434	return (0);
435badip6:
436	DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");
437	return (0);
438}
439
440/*
441 * According to RFC 1624 the equation for incremental checksum update is:
442 *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
443 *	HC' = HC - ~m - m'	--	[Eqn. 4]
444 * So, when we are replacing IPv4 addresses to IPv6, we
445 * can assume, that new bytes previously were zeros, and vise versa -
446 * when we replacing IPv6 addresses to IPv4, now unused bytes become
447 * zeros. The payload length in pseudo header has bigger size, but one
448 * half of it should be zero. Using the equation 4 we get:
449 *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
450 *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
451 *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
452 *	  = HC - sum(~m[i] + m'[i])
453 *
454 * The function result should be used as follows:
455 *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
456 *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
457 */
458static uint16_t
459nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
460{
461	uint32_t sum;
462	uint16_t *p;
463
464	sum = ~ip->ip_src.s_addr >> 16;
465	sum += ~ip->ip_src.s_addr & 0xffff;
466	sum += ~ip->ip_dst.s_addr >> 16;
467	sum += ~ip->ip_dst.s_addr & 0xffff;
468
469	for (p = (uint16_t *)&ip6->ip6_src;
470	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
471		sum += *p;
472
473	while (sum >> 16)
474		sum = (sum & 0xffff) + (sum >> 16);
475	return (sum);
476}
477
478static void
479nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
480    uint16_t plen, uint8_t proto, struct ip *ip)
481{
482
483	/* assume addresses are already initialized */
484	ip->ip_v = IPVERSION;
485	ip->ip_hl = sizeof(*ip) >> 2;
486	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
487	ip->ip_len = htons(sizeof(*ip) + plen);
488	ip->ip_ttl = ip6->ip6_hlim;
489	/* Forwarding code will decrement TTL for netisr based output. */
490	if (V_nat64out == &nat64_direct)
491		ip->ip_ttl -= IPV6_HLIMDEC;
492	ip->ip_sum = 0;
493	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
494	ip_fillid(ip);
495	if (frag != NULL) {
496		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
497		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
498			ip->ip_off |= htons(IP_MF);
499	} else {
500		ip->ip_off = htons(IP_DF);
501	}
502	ip->ip_sum = in_cksum_hdr(ip);
503}
504
505#define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
506static NAT64NOINLINE int
507nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,
508    struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,
509    uint16_t ip_off)
510{
511	struct ip6_frag ip6f;
512	struct mbuf *n;
513	uint16_t hlen, len, offset;
514	int plen;
515
516	plen = ntohs(ip6->ip6_plen);
517	hlen = sizeof(struct ip6_hdr);
518
519	/* Fragmentation isn't needed */
520	if (ip_off == 0 && plen <= mtu - hlen) {
521		M_PREPEND(m, hlen, M_NOWAIT);
522		if (m == NULL) {
523			NAT64STAT_INC(stats, nomem);
524			return (ENOMEM);
525		}
526		bcopy(ip6, mtod(m, void *), hlen);
527		if (mbufq_enqueue(mq, m) != 0) {
528			m_freem(m);
529			NAT64STAT_INC(stats, dropped);
530			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
531			return (ENOBUFS);
532		}
533		return (0);
534	}
535
536	hlen += sizeof(struct ip6_frag);
537	ip6f.ip6f_reserved = 0;
538	ip6f.ip6f_nxt = ip6->ip6_nxt;
539	ip6->ip6_nxt = IPPROTO_FRAGMENT;
540	if (ip_off != 0) {
541		/*
542		 * We have got an IPv4 fragment.
543		 * Use offset value and ip_id from original fragment.
544		 */
545		ip6f.ip6f_ident = htonl(ntohs(ip_id));
546		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
547		NAT64STAT_INC(stats, ifrags);
548	} else {
549		/* The packet size exceeds interface MTU */
550		ip6f.ip6f_ident = htonl(ip6_randomid());
551		offset = 0; /* First fragment*/
552	}
553	while (plen > 0 && m != NULL) {
554		n = NULL;
555		len = FRAGSZ(mtu) & ~7;
556		if (len > plen)
557			len = plen;
558		ip6->ip6_plen = htons(len + sizeof(ip6f));
559		ip6f.ip6f_offlg = ntohs(offset);
560		if (len < plen || (ip_off & htons(IP_MF)) != 0)
561			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
562		offset += len;
563		plen -= len;
564		if (plen > 0) {
565			n = m_split(m, len, M_NOWAIT);
566			if (n == NULL)
567				goto fail;
568		}
569		M_PREPEND(m, hlen, M_NOWAIT);
570		if (m == NULL)
571			goto fail;
572		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
573		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
574		    sizeof(struct ip6_frag));
575		if (mbufq_enqueue(mq, m) != 0)
576			goto fail;
577		m = n;
578	}
579	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
580	return (0);
581fail:
582	if (m != NULL)
583		m_freem(m);
584	if (n != NULL)
585		m_freem(n);
586	mbufq_drain(mq);
587	NAT64STAT_INC(stats, nomem);
588	return (ENOMEM);
589}
590
591static NAT64NOINLINE int
592nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst,
593    struct mbuf *m)
594{
595
596	if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0,
597	    pnh) != 0)
598		return (EHOSTUNREACH);
599	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
600		return (EHOSTUNREACH);
601	/*
602	 * XXX: we need to use destination address with embedded scope
603	 * zone id, because LLTABLE uses such form of addresses for lookup.
604	 */
605	dst->sin6_family = AF_INET6;
606	dst->sin6_len = sizeof(*dst);
607	dst->sin6_addr = pnh->nh_addr;
608	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
609		dst->sin6_addr.s6_addr16[1] =
610		    htons(pnh->nh_ifp->if_index & 0xffff);
611	dst->sin6_port = 0;
612	dst->sin6_scope_id = 0;
613	dst->sin6_flowinfo = 0;
614
615	return (0);
616}
617
618#define	NAT64_ICMP6_PLEN	64
619static NAT64NOINLINE void
620nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
621    struct nat64_counters *stats, void *logdata)
622{
623	struct icmp6_hdr *icmp6;
624	struct ip6_hdr *ip6, *oip6;
625	struct mbuf *n;
626	int len, plen;
627
628	len = 0;
629	plen = nat64_getlasthdr(m, &len);
630	if (plen < 0) {
631		DPRINTF(DP_DROPS, "mbuf isn't contigious");
632		goto freeit;
633	}
634	/*
635	 * Do not send ICMPv6 in reply to ICMPv6 errors.
636	 */
637	if (plen == IPPROTO_ICMPV6) {
638		if (m->m_len < len + sizeof(*icmp6)) {
639			DPRINTF(DP_DROPS, "mbuf isn't contigious");
640			goto freeit;
641		}
642		icmp6 = mtodo(m, len);
643		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
644		    icmp6->icmp6_type == ND_REDIRECT) {
645			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
646			    "ICMPv6 errors");
647			goto freeit;
648		}
649	}
650	/*
651	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
652		goto freeit;
653		*/
654	ip6 = mtod(m, struct ip6_hdr *);
655	switch (type) {
656	case ICMP6_DST_UNREACH:
657	case ICMP6_PACKET_TOO_BIG:
658	case ICMP6_TIME_EXCEEDED:
659	case ICMP6_PARAM_PROB:
660		break;
661	default:
662		goto freeit;
663	}
664	/* Calculate length of ICMPv6 payload */
665	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
666	    m->m_pkthdr.len;
667
668	/* Create new ICMPv6 datagram */
669	plen = len + sizeof(struct icmp6_hdr);
670	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
671	    MT_HEADER, M_PKTHDR);
672	if (n == NULL) {
673		NAT64STAT_INC(stats, nomem);
674		m_freem(m);
675		return;
676	}
677	/*
678	 * Move pkthdr from original mbuf. We should have initialized some
679	 * fields, because we can reinject this mbuf to netisr and it will
680	 * go trough input path (it requires at least rcvif should be set).
681	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
682	 * in the chain, when we will do M_PREPEND() or make some type of
683	 * tunneling.
684	 */
685	m_move_pkthdr(n, m);
686	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
687
688	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
689	oip6 = mtod(n, struct ip6_hdr *);
690	oip6->ip6_src = ip6->ip6_dst;
691	oip6->ip6_dst = ip6->ip6_src;
692	oip6->ip6_nxt = IPPROTO_ICMPV6;
693	oip6->ip6_flow = 0;
694	oip6->ip6_vfc |= IPV6_VERSION;
695	oip6->ip6_hlim = V_ip6_defhlim;
696	oip6->ip6_plen = htons(plen);
697
698	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
699	icmp6->icmp6_cksum = 0;
700	icmp6->icmp6_type = type;
701	icmp6->icmp6_code = code;
702	icmp6->icmp6_mtu = htonl(mtu);
703
704	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
705	    sizeof(struct icmp6_hdr)));
706	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
707	    sizeof(struct ip6_hdr), plen);
708	m_freem(m);
709	V_nat64out->output_one(n, stats, logdata);
710	return;
711freeit:
712	NAT64STAT_INC(stats, dropped);
713	m_freem(m);
714}
715
716static NAT64NOINLINE int
717nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst,
718    struct mbuf *m)
719{
720
721	if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0)
722		return (EHOSTUNREACH);
723	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
724		return (EHOSTUNREACH);
725
726	dst->sin_family = AF_INET;
727	dst->sin_len = sizeof(*dst);
728	dst->sin_addr = pnh->nh_addr;
729	dst->sin_port = 0;
730	return (0);
731}
732
733#define	NAT64_ICMP_PLEN	64
734static NAT64NOINLINE void
735nat64_icmp_reflect(struct mbuf *m, uint8_t type,
736    uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)
737{
738	struct icmp *icmp;
739	struct ip *ip, *oip;
740	struct mbuf *n;
741	int len, plen;
742
743	ip = mtod(m, struct ip *);
744	/* Do not send ICMP error if packet is not the first fragment */
745	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
746		DPRINTF(DP_DROPS, "not first fragment");
747		goto freeit;
748	}
749	/* Do not send ICMP in reply to ICMP errors */
750	if (ip->ip_p == IPPROTO_ICMP) {
751		if (m->m_len < (ip->ip_hl << 2)) {
752			DPRINTF(DP_DROPS, "mbuf isn't contigious");
753			goto freeit;
754		}
755		icmp = mtodo(m, ip->ip_hl << 2);
756		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
757			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
758			    "ICMP errors");
759			goto freeit;
760		}
761	}
762	switch (type) {
763	case ICMP_UNREACH:
764	case ICMP_TIMXCEED:
765	case ICMP_PARAMPROB:
766		break;
767	default:
768		goto freeit;
769	}
770	/* Calculate length of ICMP payload */
771	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
772	    m->m_pkthdr.len;
773
774	/* Create new ICMPv4 datagram */
775	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
776	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
777	    MT_HEADER, M_PKTHDR);
778	if (n == NULL) {
779		NAT64STAT_INC(stats, nomem);
780		m_freem(m);
781		return;
782	}
783	m_move_pkthdr(n, m);
784	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
785
786	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
787	oip = mtod(n, struct ip *);
788	oip->ip_v = IPVERSION;
789	oip->ip_hl = sizeof(struct ip) >> 2;
790	oip->ip_tos = 0;
791	oip->ip_len = htons(n->m_pkthdr.len);
792	oip->ip_ttl = V_ip_defttl;
793	oip->ip_p = IPPROTO_ICMP;
794	ip_fillid(oip);
795	oip->ip_off = htons(IP_DF);
796	oip->ip_src = ip->ip_dst;
797	oip->ip_dst = ip->ip_src;
798	oip->ip_sum = 0;
799	oip->ip_sum = in_cksum_hdr(oip);
800
801	icmp = mtodo(n, sizeof(struct ip));
802	icmp->icmp_type = type;
803	icmp->icmp_code = code;
804	icmp->icmp_cksum = 0;
805	icmp->icmp_pmvoid = 0;
806	icmp->icmp_nextmtu = htons(mtu);
807	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
808	    sizeof(struct icmphdr) + sizeof(uint32_t)));
809	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
810	    sizeof(struct ip));
811	m_freem(m);
812	V_nat64out->output_one(n, stats, logdata);
813	return;
814freeit:
815	NAT64STAT_INC(stats, dropped);
816	m_freem(m);
817}
818
819/* Translate ICMP echo request/reply into ICMPv6 */
820static void
821nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
822    uint16_t id, uint8_t type)
823{
824	uint16_t old;
825
826	old = *(uint16_t *)icmp6;	/* save type+code in one word */
827	icmp6->icmp6_type = type;
828	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
829	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
830	    old, *(uint16_t *)icmp6);
831	if (id != 0) {
832		old = icmp6->icmp6_id;
833		icmp6->icmp6_id = id;
834		/* Reflect ICMP id translation in the cksum */
835		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
836		    old, id);
837	}
838	/* Reflect IPv6 pseudo header in the cksum */
839	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
840	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
841}
842
843static NAT64NOINLINE struct mbuf *
844nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
845    int offset, struct nat64_config *cfg)
846{
847	struct ip ip;
848	struct icmp *icmp;
849	struct tcphdr *tcp;
850	struct udphdr *udp;
851	struct ip6_hdr *eip6;
852	struct mbuf *n;
853	uint32_t mtu;
854	int len, hlen, plen;
855	uint8_t type, code;
856
857	if (m->m_len < offset + ICMP_MINLEN)
858		m = m_pullup(m, offset + ICMP_MINLEN);
859	if (m == NULL) {
860		NAT64STAT_INC(&cfg->stats, nomem);
861		return (m);
862	}
863	mtu = 0;
864	icmp = mtodo(m, offset);
865	/* RFC 7915 p4.2 */
866	switch (icmp->icmp_type) {
867	case ICMP_ECHOREPLY:
868		type = ICMP6_ECHO_REPLY;
869		code = 0;
870		break;
871	case ICMP_UNREACH:
872		type = ICMP6_DST_UNREACH;
873		switch (icmp->icmp_code) {
874		case ICMP_UNREACH_NET:
875		case ICMP_UNREACH_HOST:
876		case ICMP_UNREACH_SRCFAIL:
877		case ICMP_UNREACH_NET_UNKNOWN:
878		case ICMP_UNREACH_HOST_UNKNOWN:
879		case ICMP_UNREACH_TOSNET:
880		case ICMP_UNREACH_TOSHOST:
881			code = ICMP6_DST_UNREACH_NOROUTE;
882			break;
883		case ICMP_UNREACH_PROTOCOL:
884			type = ICMP6_PARAM_PROB;
885			code = ICMP6_PARAMPROB_NEXTHEADER;
886			break;
887		case ICMP_UNREACH_PORT:
888			code = ICMP6_DST_UNREACH_NOPORT;
889			break;
890		case ICMP_UNREACH_NEEDFRAG:
891			type = ICMP6_PACKET_TOO_BIG;
892			code = 0;
893			/* XXX: needs an additional look */
894			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
895			break;
896		case ICMP_UNREACH_NET_PROHIB:
897		case ICMP_UNREACH_HOST_PROHIB:
898		case ICMP_UNREACH_FILTER_PROHIB:
899		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
900			code = ICMP6_DST_UNREACH_ADMIN;
901			break;
902		default:
903			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
904			    icmp->icmp_type, icmp->icmp_code);
905			goto freeit;
906		}
907		break;
908	case ICMP_TIMXCEED:
909		type = ICMP6_TIME_EXCEEDED;
910		code = icmp->icmp_code;
911		break;
912	case ICMP_ECHO:
913		type = ICMP6_ECHO_REQUEST;
914		code = 0;
915		break;
916	case ICMP_PARAMPROB:
917		type = ICMP6_PARAM_PROB;
918		switch (icmp->icmp_code) {
919		case ICMP_PARAMPROB_ERRATPTR:
920		case ICMP_PARAMPROB_LENGTH:
921			code = ICMP6_PARAMPROB_HEADER;
922			switch (icmp->icmp_pptr) {
923			case 0: /* Version/IHL */
924			case 1: /* Type Of Service */
925				mtu = icmp->icmp_pptr;
926				break;
927			case 2: /* Total Length */
928			case 3: mtu = 4; /* Payload Length */
929				break;
930			case 8: /* Time to Live */
931				mtu = 7; /* Hop Limit */
932				break;
933			case 9: /* Protocol */
934				mtu = 6; /* Next Header */
935				break;
936			case 12: /* Source address */
937			case 13:
938			case 14:
939			case 15:
940				mtu = 8;
941				break;
942			case 16: /* Destination address */
943			case 17:
944			case 18:
945			case 19:
946				mtu = 24;
947				break;
948			default: /* Silently drop */
949				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
950				    " code %d, pptr %d", icmp->icmp_type,
951				    icmp->icmp_code, icmp->icmp_pptr);
952				goto freeit;
953			}
954			break;
955		default:
956			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
957			    " code %d, pptr %d", icmp->icmp_type,
958			    icmp->icmp_code, icmp->icmp_pptr);
959			goto freeit;
960		}
961		break;
962	default:
963		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
964		    icmp->icmp_type, icmp->icmp_code);
965		goto freeit;
966	}
967	/*
968	 * For echo request/reply we can use original payload,
969	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
970	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
971	 */
972	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
973		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
974		return (m);
975	}
976	/*
977	 * For other types of ICMP messages we need to translate inner
978	 * IPv4 header to IPv6 header.
979	 * Assume ICMP src is the same as payload dst
980	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
981	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
982	 * In that case, we already have map for NATIP1 and GWsrc1.
983	 * The only thing we need is to copy IPv6 map prefix to
984	 * Hostdst1.
985	 */
986	hlen = offset + ICMP_MINLEN;
987	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
988		DPRINTF(DP_DROPS, "Message is too short %d",
989		    m->m_pkthdr.len);
990		goto freeit;
991	}
992	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
993	if (ip.ip_v != IPVERSION) {
994		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
995		goto freeit;
996	}
997	hlen += ip.ip_hl << 2; /* Skip inner IP header */
998	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
999	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
1000	    nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||
1001	    nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {
1002		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
1003		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
1004		goto freeit;
1005	}
1006	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
1007		DPRINTF(DP_DROPS, "Message is too short %d",
1008		    m->m_pkthdr.len);
1009		goto freeit;
1010	}
1011#if 0
1012	/*
1013	 * Check that inner source matches the outer destination.
1014	 * XXX: We need some method to convert IPv4 into IPv6 address here,
1015	 *	and compare IPv6 addresses.
1016	 */
1017	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
1018		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
1019		    "%04x vs %04x", ip.ip_src.s_addr,
1020		    nat64_get_ip4(&ip6->ip6_dst));
1021		goto freeit;
1022	}
1023#endif
1024	/*
1025	 * Create new mbuf for ICMPv6 datagram.
1026	 * NOTE: len is data length just after inner IP header.
1027	 */
1028	len = m->m_pkthdr.len - hlen;
1029	if (sizeof(struct ip6_hdr) +
1030	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
1031		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
1032		    sizeof(struct ip6_hdr);
1033	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
1034	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
1035	if (n == NULL) {
1036		NAT64STAT_INC(&cfg->stats, nomem);
1037		m_freem(m);
1038		return (NULL);
1039	}
1040	m_move_pkthdr(n, m);
1041	M_ALIGN(n, offset + plen + max_hdr);
1042	n->m_len = n->m_pkthdr.len = offset + plen;
1043	/* Adjust ip6_plen in outer header */
1044	ip6->ip6_plen = htons(plen);
1045	/* Construct new inner IPv6 header */
1046	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
1047	eip6->ip6_src = ip6->ip6_dst;
1048
1049	/* Use the same prefix that we have in outer header */
1050	eip6->ip6_dst = ip6->ip6_src;
1051	MPASS(cfg->flags & NAT64_PLATPFX);
1052	nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr);
1053
1054	eip6->ip6_flow = htonl(ip.ip_tos << 20);
1055	eip6->ip6_vfc |= IPV6_VERSION;
1056	eip6->ip6_hlim = ip.ip_ttl;
1057	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
1058	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
1059	m_copydata(m, hlen, len, (char *)(eip6 + 1));
1060	/*
1061	 * We need to translate source port in the inner ULP header,
1062	 * and adjust ULP checksum.
1063	 */
1064	switch (ip.ip_p) {
1065	case IPPROTO_TCP:
1066		if (len < offsetof(struct tcphdr, th_sum))
1067			break;
1068		tcp = TCP(eip6 + 1);
1069		if (icmpid != 0) {
1070			tcp->th_sum = cksum_adjust(tcp->th_sum,
1071			    tcp->th_sport, icmpid);
1072			tcp->th_sport = icmpid;
1073		}
1074		tcp->th_sum = cksum_add(tcp->th_sum,
1075		    ~nat64_cksum_convert(eip6, &ip));
1076		break;
1077	case IPPROTO_UDP:
1078		if (len < offsetof(struct udphdr, uh_sum))
1079			break;
1080		udp = UDP(eip6 + 1);
1081		if (icmpid != 0) {
1082			udp->uh_sum = cksum_adjust(udp->uh_sum,
1083			    udp->uh_sport, icmpid);
1084			udp->uh_sport = icmpid;
1085		}
1086		udp->uh_sum = cksum_add(udp->uh_sum,
1087		    ~nat64_cksum_convert(eip6, &ip));
1088		break;
1089	case IPPROTO_ICMP:
1090		/*
1091		 * Check if this is an ICMP error message for echo request
1092		 * that we sent. I.e. ULP in the data containing invoking
1093		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
1094		 */
1095		icmp = (struct icmp *)(eip6 + 1);
1096		if (icmp->icmp_type != ICMP_ECHO) {
1097			m_freem(n);
1098			goto freeit;
1099		}
1100		/*
1101		 * For our client this original datagram should looks
1102		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
1103		 * Thus we need adjust icmp_cksum and convert type from
1104		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
1105		 */
1106		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
1107		    ICMP6_ECHO_REQUEST);
1108	}
1109	m_freem(m);
1110	/* Convert ICMPv4 into ICMPv6 header */
1111	icmp = mtodo(n, offset);
1112	ICMP6(icmp)->icmp6_type = type;
1113	ICMP6(icmp)->icmp6_code = code;
1114	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
1115	ICMP6(icmp)->icmp6_cksum = 0;
1116	ICMP6(icmp)->icmp6_cksum = cksum_add(
1117	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
1118	    in_cksum_skip(n, n->m_pkthdr.len, offset));
1119	return (n);
1120freeit:
1121	m_freem(m);
1122	NAT64STAT_INC(&cfg->stats, dropped);
1123	return (NULL);
1124}
1125
1126int
1127nat64_getlasthdr(struct mbuf *m, int *offset)
1128{
1129	struct ip6_hdr *ip6;
1130	struct ip6_hbh *hbh;
1131	int proto, hlen;
1132
1133	if (offset != NULL)
1134		hlen = *offset;
1135	else
1136		hlen = 0;
1137
1138	if (m->m_len < hlen + sizeof(*ip6))
1139		return (-1);
1140
1141	ip6 = mtodo(m, hlen);
1142	hlen += sizeof(*ip6);
1143	proto = ip6->ip6_nxt;
1144	/* Skip extension headers */
1145	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
1146	    proto == IPPROTO_DSTOPTS) {
1147		hbh = mtodo(m, hlen);
1148		/*
1149		 * We expect mbuf has contigious data up to
1150		 * upper level header.
1151		 */
1152		if (m->m_len < hlen)
1153			return (-1);
1154		/*
1155		 * We doesn't support Jumbo payload option,
1156		 * so return error.
1157		 */
1158		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
1159			return (-1);
1160		proto = hbh->ip6h_nxt;
1161		hlen += (hbh->ip6h_len + 1) << 3;
1162	}
1163	if (offset != NULL)
1164		*offset = hlen;
1165	return (proto);
1166}
1167
1168int
1169nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
1170    struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,
1171    void *logdata)
1172{
1173	struct nhop6_basic nh;
1174	struct ip6_hdr ip6;
1175	struct sockaddr_in6 dst;
1176	struct ip *ip;
1177	struct mbufq mq;
1178	uint16_t ip_id, ip_off;
1179	uint16_t *csum;
1180	int plen, hlen;
1181	uint8_t proto;
1182
1183	ip = mtod(m, struct ip*);
1184
1185	if (ip->ip_ttl <= IPTTLDEC) {
1186		nat64_icmp_reflect(m, ICMP_TIMXCEED,
1187		    ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);
1188		return (NAT64RETURN);
1189	}
1190
1191	ip6.ip6_dst = *daddr;
1192	ip6.ip6_src = *saddr;
1193
1194	hlen = ip->ip_hl << 2;
1195	plen = ntohs(ip->ip_len) - hlen;
1196	proto = ip->ip_p;
1197
1198	/* Save ip_id and ip_off, both are in network byte order */
1199	ip_id = ip->ip_id;
1200	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
1201
1202	/* Fragment length must be multiple of 8 octets */
1203	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
1204		nat64_icmp_reflect(m, ICMP_PARAMPROB,
1205		    ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);
1206		return (NAT64RETURN);
1207	}
1208	/* Fragmented ICMP is unsupported */
1209	if (proto == IPPROTO_ICMP && ip_off != 0) {
1210		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
1211		NAT64STAT_INC(&cfg->stats, dropped);
1212		return (NAT64MFREE);
1213	}
1214
1215	dst.sin6_addr = ip6.ip6_dst;
1216	if (nat64_find_route6(&nh, &dst, m) != 0) {
1217		NAT64STAT_INC(&cfg->stats, noroute6);
1218		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
1219		    &cfg->stats, logdata);
1220		return (NAT64RETURN);
1221	}
1222	if (nh.nh_mtu < plen + sizeof(ip6) &&
1223	    (ip->ip_off & htons(IP_DF)) != 0) {
1224		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
1225		    FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
1226		return (NAT64RETURN);
1227	}
1228
1229	ip6.ip6_flow = htonl(ip->ip_tos << 20);
1230	ip6.ip6_vfc |= IPV6_VERSION;
1231	ip6.ip6_hlim = ip->ip_ttl;
1232	/* Forwarding code will decrement TTL for netisr based output. */
1233	if (V_nat64out == &nat64_direct)
1234		ip6.ip6_hlim -= IPTTLDEC;
1235	ip6.ip6_plen = htons(plen);
1236	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
1237
1238	/* Handle delayed checksums if needed. */
1239	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1240		in_delayed_cksum(m);
1241		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1242	}
1243	/* Convert checksums. */
1244	switch (proto) {
1245	case IPPROTO_TCP:
1246		csum = &TCP(mtodo(m, hlen))->th_sum;
1247		if (lport != 0) {
1248			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1249			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
1250			tcp->th_dport = lport;
1251		}
1252		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1253		break;
1254	case IPPROTO_UDP:
1255		csum = &UDP(mtodo(m, hlen))->uh_sum;
1256		if (lport != 0) {
1257			struct udphdr *udp = UDP(mtodo(m, hlen));
1258			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
1259			udp->uh_dport = lport;
1260		}
1261		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
1262		break;
1263	case IPPROTO_ICMP:
1264		m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);
1265		if (m == NULL)	/* stats already accounted */
1266			return (NAT64RETURN);
1267	}
1268
1269	m_adj(m, hlen);
1270	mbufq_init(&mq, 255);
1271	nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off);
1272	while ((m = mbufq_dequeue(&mq)) != NULL) {
1273		if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1274		    &cfg->stats, logdata) != 0)
1275			break;
1276		NAT64STAT_INC(&cfg->stats, opcnt46);
1277	}
1278	mbufq_drain(&mq);
1279	return (NAT64RETURN);
1280}
1281
1282int
1283nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
1284    struct nat64_config *cfg, void *logdata)
1285{
1286	struct ip ip;
1287	struct icmp6_hdr *icmp6;
1288	struct ip6_frag *ip6f;
1289	struct ip6_hdr *ip6, *ip6i;
1290	uint32_t mtu;
1291	int plen, proto;
1292	uint8_t type, code;
1293
1294	if (hlen == 0) {
1295		ip6 = mtod(m, struct ip6_hdr *);
1296		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1297		    nat64_check_ip6(&ip6->ip6_dst) != 0)
1298			return (NAT64SKIP);
1299
1300		proto = nat64_getlasthdr(m, &hlen);
1301		if (proto != IPPROTO_ICMPV6) {
1302			DPRINTF(DP_DROPS,
1303			    "dropped due to mbuf isn't contigious");
1304			NAT64STAT_INC(&cfg->stats, dropped);
1305			return (NAT64MFREE);
1306		}
1307	}
1308
1309	/*
1310	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
1311	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
1312	 */
1313	icmp6 = mtodo(m, hlen);
1314	mtu = 0;
1315	switch (icmp6->icmp6_type) {
1316	case ICMP6_DST_UNREACH:
1317		type = ICMP_UNREACH;
1318		switch (icmp6->icmp6_code) {
1319		case ICMP6_DST_UNREACH_NOROUTE:
1320		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1321		case ICMP6_DST_UNREACH_ADDR:
1322			code = ICMP_UNREACH_HOST;
1323			break;
1324		case ICMP6_DST_UNREACH_ADMIN:
1325			code = ICMP_UNREACH_HOST_PROHIB;
1326			break;
1327		case ICMP6_DST_UNREACH_NOPORT:
1328			code = ICMP_UNREACH_PORT;
1329			break;
1330		default:
1331			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1332			    " code %d", icmp6->icmp6_type,
1333			    icmp6->icmp6_code);
1334			NAT64STAT_INC(&cfg->stats, dropped);
1335			return (NAT64MFREE);
1336		}
1337		break;
1338	case ICMP6_PACKET_TOO_BIG:
1339		type = ICMP_UNREACH;
1340		code = ICMP_UNREACH_NEEDFRAG;
1341		mtu = ntohl(icmp6->icmp6_mtu);
1342		if (mtu < IPV6_MMTU) {
1343			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
1344			    " code %d", mtu, icmp6->icmp6_type,
1345			    icmp6->icmp6_code);
1346			NAT64STAT_INC(&cfg->stats, dropped);
1347			return (NAT64MFREE);
1348		}
1349		/*
1350		 * Adjust MTU to reflect difference between
1351		 * IPv6 an IPv4 headers.
1352		 */
1353		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
1354		break;
1355	case ICMP6_TIME_EXCEEDED:
1356		type = ICMP_TIMXCEED;
1357		code = icmp6->icmp6_code;
1358		break;
1359	case ICMP6_PARAM_PROB:
1360		switch (icmp6->icmp6_code) {
1361		case ICMP6_PARAMPROB_HEADER:
1362			type = ICMP_PARAMPROB;
1363			code = ICMP_PARAMPROB_ERRATPTR;
1364			mtu = ntohl(icmp6->icmp6_pptr);
1365			switch (mtu) {
1366			case 0: /* Version/Traffic Class */
1367			case 1: /* Traffic Class/Flow Label */
1368				break;
1369			case 4: /* Payload Length */
1370			case 5:
1371				mtu = 2;
1372				break;
1373			case 6: /* Next Header */
1374				mtu = 9;
1375				break;
1376			case 7: /* Hop Limit */
1377				mtu = 8;
1378				break;
1379			default:
1380				if (mtu >= 8 && mtu <= 23) {
1381					mtu = 12; /* Source address */
1382					break;
1383				}
1384				if (mtu >= 24 && mtu <= 39) {
1385					mtu = 16; /* Destination address */
1386					break;
1387				}
1388				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1389				    " code %d, pptr %d", icmp6->icmp6_type,
1390				    icmp6->icmp6_code, mtu);
1391				NAT64STAT_INC(&cfg->stats, dropped);
1392				return (NAT64MFREE);
1393			}
1394		case ICMP6_PARAMPROB_NEXTHEADER:
1395			type = ICMP_UNREACH;
1396			code = ICMP_UNREACH_PROTOCOL;
1397			break;
1398		default:
1399			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
1400			    " code %d, pptr %d", icmp6->icmp6_type,
1401			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
1402			NAT64STAT_INC(&cfg->stats, dropped);
1403			return (NAT64MFREE);
1404		}
1405		break;
1406	default:
1407		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
1408		    icmp6->icmp6_type, icmp6->icmp6_code);
1409		NAT64STAT_INC(&cfg->stats, dropped);
1410		return (NAT64MFREE);
1411	}
1412
1413	hlen += sizeof(struct icmp6_hdr);
1414	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
1415		NAT64STAT_INC(&cfg->stats, dropped);
1416		DPRINTF(DP_DROPS, "Message is too short %d",
1417		    m->m_pkthdr.len);
1418		return (NAT64MFREE);
1419	}
1420	/*
1421	 * We need at least ICMP_MINLEN bytes of original datagram payload
1422	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
1423	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
1424	 * header we will not have to do m_pullup() again.
1425	 *
1426	 * What we have here:
1427	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
1428	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
1429	 * We need to translate it to:
1430	 *
1431	 * Outer header: (alias_host, v4exthost)
1432	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
1433	 *
1434	 * Assume caller function has checked if v4mapPRefix+v4host
1435	 * matches configured prefix.
1436	 * The only two things we should be provided with are mapping between
1437	 * IPv6iHost <> alias_host and between dport and alias_port.
1438	 */
1439	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
1440		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
1441	if (m == NULL) {
1442		NAT64STAT_INC(&cfg->stats, nomem);
1443		return (NAT64RETURN);
1444	}
1445	ip6 = mtod(m, struct ip6_hdr *);
1446	ip6i = mtodo(m, hlen);
1447	ip6f = NULL;
1448	proto = ip6i->ip6_nxt;
1449	plen = ntohs(ip6i->ip6_plen);
1450	hlen += sizeof(struct ip6_hdr);
1451	if (proto == IPPROTO_FRAGMENT) {
1452		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
1453		    ICMP_MINLEN)
1454			goto fail;
1455		ip6f = mtodo(m, hlen);
1456		proto = ip6f->ip6f_nxt;
1457		plen -= sizeof(struct ip6_frag);
1458		hlen += sizeof(struct ip6_frag);
1459		/* Ajust MTU to reflect frag header size */
1460		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
1461			mtu -= sizeof(struct ip6_frag);
1462	}
1463	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1464		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
1465		    proto);
1466		goto fail;
1467	}
1468	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
1469	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
1470		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
1471		goto fail;
1472	}
1473	/* Check if outer dst is the same as inner src */
1474	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
1475		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
1476		goto fail;
1477	}
1478
1479	/* Now we need to make a fake IPv4 packet to generate ICMP message */
1480	ip.ip_dst.s_addr = aaddr;
1481	ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen);
1482	if (ip.ip_src.s_addr == 0)
1483		goto fail;
1484	/* XXX: Make fake ulp header */
1485	if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */
1486		ip6i->ip6_hlim += IPV6_HLIMDEC;
1487	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
1488	m_adj(m, hlen - sizeof(struct ip));
1489	bcopy(&ip, mtod(m, void *), sizeof(ip));
1490	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,
1491	    logdata);
1492	return (NAT64RETURN);
1493fail:
1494	/*
1495	 * We must call m_freem() because mbuf pointer could be
1496	 * changed with m_pullup().
1497	 */
1498	m_freem(m);
1499	NAT64STAT_INC(&cfg->stats, dropped);
1500	return (NAT64RETURN);
1501}
1502
1503int
1504nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
1505    struct nat64_config *cfg, void *logdata)
1506{
1507	struct ip ip;
1508	struct nhop4_basic nh;
1509	struct sockaddr_in dst;
1510	struct ip6_frag *frag;
1511	struct ip6_hdr *ip6;
1512	struct icmp6_hdr *icmp6;
1513	uint16_t *csum;
1514	int plen, hlen, proto;
1515
1516	/*
1517	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
1518	 * protocol's headers. Also we skip some checks, that ip6_input(),
1519	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
1520	 */
1521	ip6 = mtod(m, struct ip6_hdr *);
1522	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
1523	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
1524		return (NAT64SKIP);
1525	}
1526
1527	/* Starting from this point we must not return zero */
1528	ip.ip_src.s_addr = aaddr;
1529	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
1530		DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",
1531		    ip.ip_src.s_addr);
1532		NAT64STAT_INC(&cfg->stats, dropped);
1533		return (NAT64MFREE);
1534	}
1535
1536	ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen);
1537	if (ip.ip_dst.s_addr == 0) {
1538		NAT64STAT_INC(&cfg->stats, dropped);
1539		return (NAT64MFREE);
1540	}
1541
1542	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
1543		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
1544		    ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);
1545		return (NAT64RETURN);
1546	}
1547
1548	hlen = 0;
1549	plen = ntohs(ip6->ip6_plen);
1550	proto = nat64_getlasthdr(m, &hlen);
1551	if (proto < 0) {
1552		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1553		NAT64STAT_INC(&cfg->stats, dropped);
1554		return (NAT64MFREE);
1555	}
1556	frag = NULL;
1557	if (proto == IPPROTO_FRAGMENT) {
1558		/* ipfw_chk should m_pullup up to frag header */
1559		if (m->m_len < hlen + sizeof(*frag)) {
1560			DPRINTF(DP_DROPS,
1561			    "dropped due to mbuf isn't contigious");
1562			NAT64STAT_INC(&cfg->stats, dropped);
1563			return (NAT64MFREE);
1564		}
1565		frag = mtodo(m, hlen);
1566		proto = frag->ip6f_nxt;
1567		hlen += sizeof(*frag);
1568		/* Fragmented ICMPv6 is unsupported */
1569		if (proto == IPPROTO_ICMPV6) {
1570			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
1571			NAT64STAT_INC(&cfg->stats, dropped);
1572			return (NAT64MFREE);
1573		}
1574		/* Fragment length must be multiple of 8 octets */
1575		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
1576		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
1577			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
1578			    ICMP6_PARAMPROB_HEADER,
1579			    offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,
1580			    logdata);
1581			return (NAT64RETURN);
1582		}
1583	}
1584	plen -= hlen - sizeof(struct ip6_hdr);
1585	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
1586		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
1587		    plen, m->m_pkthdr.len, hlen);
1588		NAT64STAT_INC(&cfg->stats, dropped);
1589		return (NAT64MFREE);
1590	}
1591
1592	icmp6 = NULL;	/* Make gcc happy */
1593	if (proto == IPPROTO_ICMPV6) {
1594		icmp6 = mtodo(m, hlen);
1595		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
1596		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
1597			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
1598			    cfg, logdata));
1599	}
1600	dst.sin_addr.s_addr = ip.ip_dst.s_addr;
1601	if (nat64_find_route4(&nh, &dst, m) != 0) {
1602		NAT64STAT_INC(&cfg->stats, noroute4);
1603		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
1604		    ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);
1605		return (NAT64RETURN);
1606	}
1607	if (nh.nh_mtu < plen + sizeof(ip)) {
1608		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu,
1609		    &cfg->stats, logdata);
1610		return (NAT64RETURN);
1611	}
1612	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
1613
1614	/* Handle delayed checksums if needed. */
1615	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1616		in6_delayed_cksum(m, plen, hlen);
1617		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
1618	}
1619	/* Convert checksums. */
1620	switch (proto) {
1621	case IPPROTO_TCP:
1622		csum = &TCP(mtodo(m, hlen))->th_sum;
1623		if (aport != 0) {
1624			struct tcphdr *tcp = TCP(mtodo(m, hlen));
1625			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
1626			tcp->th_sport = aport;
1627		}
1628		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1629		break;
1630	case IPPROTO_UDP:
1631		csum = &UDP(mtodo(m, hlen))->uh_sum;
1632		if (aport != 0) {
1633			struct udphdr *udp = UDP(mtodo(m, hlen));
1634			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
1635			udp->uh_sport = aport;
1636		}
1637		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
1638		break;
1639	case IPPROTO_ICMPV6:
1640		/* Checksum in ICMPv6 covers pseudo header */
1641		csum = &icmp6->icmp6_cksum;
1642		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
1643		    IPPROTO_ICMPV6, 0));
1644		/* Convert ICMPv6 types to ICMP */
1645		proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
1646		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
1647			icmp6->icmp6_type = ICMP_ECHO;
1648		else /* ICMP6_ECHO_REPLY */
1649			icmp6->icmp6_type = ICMP_ECHOREPLY;
1650		*csum = cksum_adjust(*csum, (uint16_t)proto,
1651		    *(uint16_t *)icmp6);
1652		if (aport != 0) {
1653			uint16_t old_id = icmp6->icmp6_id;
1654			icmp6->icmp6_id = aport;
1655			*csum = cksum_adjust(*csum, old_id, aport);
1656		}
1657		break;
1658	};
1659
1660	m_adj(m, hlen - sizeof(ip));
1661	bcopy(&ip, mtod(m, void *), sizeof(ip));
1662	if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst,
1663	    &cfg->stats, logdata) == 0)
1664		NAT64STAT_INC(&cfg->stats, opcnt64);
1665	return (NAT64RETURN);
1666}
1667
1668