1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/counter.h>
36#include <sys/ck.h>
37#include <sys/epoch.h>
38#include <sys/errno.h>
39#include <sys/hash.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/module.h>
45#include <sys/rmlock.h>
46#include <sys/socket.h>
47#include <sys/syslog.h>
48#include <sys/sysctl.h>
49
50#include <net/if.h>
51#include <net/if_var.h>
52#include <net/if_pflog.h>
53#include <net/pfil.h>
54
55#include <netinet/in.h>
56#include <netinet/ip.h>
57#include <netinet/ip_var.h>
58#include <netinet/ip_fw.h>
59#include <netinet/ip6.h>
60#include <netinet/icmp6.h>
61#include <netinet/ip_icmp.h>
62#include <netinet/tcp.h>
63#include <netinet/udp.h>
64#include <netinet6/in6_var.h>
65#include <netinet6/ip6_var.h>
66#include <netinet6/ip_fw_nat64.h>
67
68#include <netpfil/ipfw/ip_fw_private.h>
69#include <netpfil/pf/pf.h>
70
71#include "nat64lsn.h"
72
73MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
74
75#define	NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
76#define	NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
77#define	NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
78#define	NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
79
80static uma_zone_t nat64lsn_host_zone;
81static uma_zone_t nat64lsn_pgchunk_zone;
82static uma_zone_t nat64lsn_pg_zone;
83static uma_zone_t nat64lsn_aliaslink_zone;
84static uma_zone_t nat64lsn_state_zone;
85static uma_zone_t nat64lsn_job_zone;
86
87static void nat64lsn_periodic(void *data);
88#define	PERIODIC_DELAY		4
89#define	NAT64_LOOKUP(chain, cmd)	\
90	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
91/*
92 * Delayed job queue, used to create new hosts
93 * and new portgroups
94 */
95enum nat64lsn_jtype {
96	JTYPE_NEWHOST = 1,
97	JTYPE_NEWPORTGROUP,
98	JTYPE_DESTROY,
99};
100
101struct nat64lsn_job_item {
102	STAILQ_ENTRY(nat64lsn_job_item)	entries;
103	enum nat64lsn_jtype	jtype;
104
105	union {
106		struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
107			struct mbuf		*m;
108			struct nat64lsn_host	*host;
109			struct nat64lsn_state	*state;
110			uint32_t		src6_hval;
111			uint32_t		state_hval;
112			struct ipfw_flow_id	f_id;
113			in_addr_t		faddr;
114			uint16_t		port;
115			uint8_t			proto;
116			uint8_t			done;
117		};
118		struct { /* used by JTYPE_DESTROY */
119			struct nat64lsn_hosts_slist	hosts;
120			struct nat64lsn_pg_slist	portgroups;
121			struct nat64lsn_pgchunk		*pgchunk;
122			struct epoch_context		epoch_ctx;
123		};
124	};
125};
126
127static struct mtx jmtx;
128#define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
129#define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
130#define	JQUEUE_LOCK()		mtx_lock(&jmtx)
131#define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
132
133static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
134    struct nat64lsn_job_item *ji);
135static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
136    struct nat64lsn_job_item *ji);
137static struct nat64lsn_job_item *nat64lsn_create_job(
138    struct nat64lsn_cfg *cfg, int jtype);
139static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
140    struct nat64lsn_job_item *ji);
141static void nat64lsn_job_destroy(epoch_context_t ctx);
142static void nat64lsn_destroy_host(struct nat64lsn_host *host);
143static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
144
145static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
146    const struct ipfw_flow_id *f_id, struct mbuf **mp);
147static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
148    struct ipfw_flow_id *f_id, struct mbuf **mp);
149static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
150    struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
151
152#define	NAT64_BIT_TCP_FIN	0	/* FIN was seen */
153#define	NAT64_BIT_TCP_SYN	1	/* First syn in->out */
154#define	NAT64_BIT_TCP_ESTAB	2	/* Packet with Ack */
155#define	NAT64_BIT_READY_IPV4	6	/* state is ready for translate4 */
156#define	NAT64_BIT_STALE		7	/* state is going to be expired */
157
158#define	NAT64_FLAG_FIN		(1 << NAT64_BIT_TCP_FIN)
159#define	NAT64_FLAG_SYN		(1 << NAT64_BIT_TCP_SYN)
160#define	NAT64_FLAG_ESTAB	(1 << NAT64_BIT_TCP_ESTAB)
161#define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
162
163#define	NAT64_FLAG_READY	(1 << NAT64_BIT_READY_IPV4)
164#define	NAT64_FLAG_STALE	(1 << NAT64_BIT_STALE)
165
166static inline uint8_t
167convert_tcp_flags(uint8_t flags)
168{
169	uint8_t result;
170
171	result = flags & (TH_FIN|TH_SYN);
172	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
173	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
174
175	return (result);
176}
177
178static void
179nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
180    struct nat64lsn_state *state)
181{
182
183	memset(plog, 0, sizeof(*plog));
184	plog->length = PFLOG_REAL_HDRLEN;
185	plog->af = family;
186	plog->action = PF_NAT;
187	plog->dir = PF_IN;
188	plog->rulenr = htonl(state->ip_src);
189	plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
190	    (state->proto << 8) | (state->ip_dst & 0xff));
191	plog->ruleset[0] = '\0';
192	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
193	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
194}
195
196#define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
197#define	HOST_HVAL(c, a)	HVAL((a),\
198    sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
199#define	HOSTS(c, v)	((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
200
201#define	ALIASLINK_HVAL(c, f)	HVAL(&(f)->dst_ip6,\
202    sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
203#define	ALIAS_BYHASH(c, v)	\
204    ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
205static struct nat64lsn_aliaslink*
206nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
207    struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
208{
209
210	/*
211	 * We can implement some different algorithms how
212	 * select an alias address.
213	 * XXX: for now we use first available.
214	 */
215	return (CK_SLIST_FIRST(&host->aliases));
216}
217
218#define	STATE_HVAL(c, d)	HVAL((d), 2, (c)->hash_seed)
219#define	STATE_HASH(h, v)	\
220    ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
221#define	STATES_CHUNK(p, v)	\
222    ((p)->chunks_count == 1 ? (p)->states : \
223	((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
224
225#ifdef __LP64__
226#define	FREEMASK_FFSLL(pg, faddr)		\
227    ffsll(*FREEMASK_CHUNK((pg), (faddr)))
228#define	FREEMASK_BTR(pg, faddr, bit)	\
229    ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230#define	FREEMASK_BTS(pg, faddr, bit)	\
231    ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232#define	FREEMASK_ISSET(pg, faddr, bit)	\
233    ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
234#define	FREEMASK_COPY(pg, n, out)	\
235    (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
236#else
237static inline int
238freemask_ffsll(uint32_t *freemask)
239{
240	int i;
241
242	if ((i = ffsl(freemask[0])) != 0)
243		return (i);
244	if ((i = ffsl(freemask[1])) != 0)
245		return (i + 32);
246	return (0);
247}
248#define	FREEMASK_FFSLL(pg, faddr)		\
249    freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
250#define	FREEMASK_BTR(pg, faddr, bit)	\
251    ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252#define	FREEMASK_BTS(pg, faddr, bit)	\
253    ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254#define	FREEMASK_ISSET(pg, faddr, bit)	\
255    ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
256#define	FREEMASK_COPY(pg, n, out)	\
257    (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
258	((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
259#endif /* !__LP64__ */
260
261#define	NAT64LSN_TRY_PGCNT	32
262static struct nat64lsn_pg*
263nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
264    struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
265    uint32_t *pgidx, in_addr_t faddr)
266{
267	struct nat64lsn_pg *pg, *oldpg;
268	uint32_t idx, oldidx;
269	int cnt;
270
271	cnt = 0;
272	/* First try last used PG */
273	oldpg = pg = ck_pr_load_ptr(pgptr);
274	idx = oldidx = ck_pr_load_32(pgidx);
275	/* If pgidx is out of range, reset it to the first pgchunk */
276	if (!ISSET32(*chunkmask, idx / 32))
277		idx = 0;
278	do {
279		ck_pr_fence_load();
280		if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
281			/*
282			 * If last used PG has not free states,
283			 * try to update pointer.
284			 * NOTE: it can be already updated by jobs handler,
285			 *	 thus we use CAS operation.
286			 */
287			if (cnt > 0)
288				ck_pr_cas_ptr(pgptr, oldpg, pg);
289			return (pg);
290		}
291		/* Stop if idx is out of range */
292		if (!ISSET32(*chunkmask, idx / 32))
293			break;
294
295		if (ISSET32(pgmask[idx / 32], idx % 32))
296			pg = ck_pr_load_ptr(
297			    &chunks[idx / 32]->pgptr[idx % 32]);
298		else
299			pg = NULL;
300
301		idx++;
302	} while (++cnt < NAT64LSN_TRY_PGCNT);
303
304	/* If pgidx is out of range, reset it to the first pgchunk */
305	if (!ISSET32(*chunkmask, idx / 32))
306		idx = 0;
307	ck_pr_cas_32(pgidx, oldidx, idx);
308	return (NULL);
309}
310
311static struct nat64lsn_state*
312nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
313    const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
314    uint16_t port, uint8_t proto)
315{
316	struct nat64lsn_aliaslink *link;
317	struct nat64lsn_state *state;
318	struct nat64lsn_pg *pg;
319	int i, offset;
320
321	NAT64LSN_EPOCH_ASSERT();
322
323	/* Check that we already have state for given arguments */
324	CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
325		if (state->proto == proto && state->ip_dst == faddr &&
326		    state->sport == port && state->dport == f_id->dst_port)
327			return (state);
328	}
329
330	link = nat64lsn_get_aliaslink(cfg, host, f_id);
331	if (link == NULL)
332		return (NULL);
333
334	switch (proto) {
335	case IPPROTO_TCP:
336		pg = nat64lsn_get_pg(
337		    &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
338		    link->alias->tcp, &link->alias->tcp_pg,
339		    &link->alias->tcp_pgidx, faddr);
340		break;
341	case IPPROTO_UDP:
342		pg = nat64lsn_get_pg(
343		    &link->alias->udp_chunkmask, link->alias->udp_pgmask,
344		    link->alias->udp, &link->alias->udp_pg,
345		    &link->alias->udp_pgidx, faddr);
346		break;
347	case IPPROTO_ICMP:
348		pg = nat64lsn_get_pg(
349		    &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
350		    link->alias->icmp, &link->alias->icmp_pg,
351		    &link->alias->icmp_pgidx, faddr);
352		break;
353	default:
354		panic("%s: wrong proto %d", __func__, proto);
355	}
356	if (pg == NULL)
357		return (NULL);
358
359	/* Check that PG has some free states */
360	state = NULL;
361	i = FREEMASK_BITCOUNT(pg, faddr);
362	while (i-- > 0) {
363		offset = FREEMASK_FFSLL(pg, faddr);
364		if (offset == 0) {
365			/*
366			 * We lost the race.
367			 * No more free states in this PG.
368			 */
369			break;
370		}
371
372		/* Lets try to atomically grab the state */
373		if (FREEMASK_BTR(pg, faddr, offset - 1)) {
374			state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
375			/* Initialize */
376			state->flags = proto != IPPROTO_TCP ? 0 :
377			    convert_tcp_flags(f_id->_flags);
378			state->proto = proto;
379			state->aport = pg->base_port + offset - 1;
380			state->dport = f_id->dst_port;
381			state->sport = port;
382			state->ip6_dst = f_id->dst_ip6;
383			state->ip_dst = faddr;
384			state->ip_src = link->alias->addr;
385			state->hval = hval;
386			state->host = host;
387			SET_AGE(state->timestamp);
388
389			/* Insert new state into host's hash table */
390			HOST_LOCK(host);
391			CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
392			    state, entries);
393			host->states_count++;
394			/*
395			 * XXX: In case if host is going to be expired,
396			 * reset NAT64LSN_DEADHOST flag.
397			 */
398			host->flags &= ~NAT64LSN_DEADHOST;
399			HOST_UNLOCK(host);
400			NAT64STAT_INC(&cfg->base.stats, screated);
401			/* Mark the state as ready for translate4 */
402			ck_pr_fence_store();
403			ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
404			break;
405		}
406	}
407	return (state);
408}
409
410/*
411 * Inspects icmp packets to see if the message contains different
412 * packet header so we need to alter @addr and @port.
413 */
414static int
415inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
416    uint16_t *port)
417{
418	struct icmp *icmp;
419	struct ip *ip;
420	int off;
421	uint8_t inner_proto;
422
423	ip = mtod(*mp, struct ip *); /* Outer IP header */
424	off = (ip->ip_hl << 2) + ICMP_MINLEN;
425	if ((*mp)->m_len < off)
426		*mp = m_pullup(*mp, off);
427	if (*mp == NULL)
428		return (ENOMEM);
429
430	ip = mtod(*mp, struct ip *); /* Outer IP header */
431	icmp = L3HDR(ip, struct icmp *);
432	switch (icmp->icmp_type) {
433	case ICMP_ECHO:
434	case ICMP_ECHOREPLY:
435		/* Use icmp ID as distinguisher */
436		*port = ntohs(icmp->icmp_id);
437		return (0);
438	case ICMP_UNREACH:
439	case ICMP_TIMXCEED:
440		break;
441	default:
442		return (EOPNOTSUPP);
443	}
444	/*
445	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
446	 * of ULP header.
447	 */
448	if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
449		return (EINVAL);
450	if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
451		*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
452	if (*mp == NULL)
453		return (ENOMEM);
454	ip = mtodo(*mp, off); /* Inner IP header */
455	inner_proto = ip->ip_p;
456	off += ip->ip_hl << 2; /* Skip inner IP header */
457	*addr = ntohl(ip->ip_src.s_addr);
458	if ((*mp)->m_len < off + ICMP_MINLEN)
459		*mp = m_pullup(*mp, off + ICMP_MINLEN);
460	if (*mp == NULL)
461		return (ENOMEM);
462	switch (inner_proto) {
463	case IPPROTO_TCP:
464	case IPPROTO_UDP:
465		/* Copy source port from the header */
466		*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
467		*proto = inner_proto;
468		return (0);
469	case IPPROTO_ICMP:
470		/*
471		 * We will translate only ICMP errors for our ICMP
472		 * echo requests.
473		 */
474		icmp = mtodo(*mp, off);
475		if (icmp->icmp_type != ICMP_ECHO)
476			return (EOPNOTSUPP);
477		*port = ntohs(icmp->icmp_id);
478		return (0);
479	};
480	return (EOPNOTSUPP);
481}
482
483static struct nat64lsn_state*
484nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
485    in_addr_t faddr, uint16_t port, uint8_t proto)
486{
487	struct nat64lsn_state *state;
488	struct nat64lsn_pg *pg;
489	int chunk_idx, pg_idx, state_idx;
490
491	NAT64LSN_EPOCH_ASSERT();
492
493	if (port < NAT64_MIN_PORT)
494		return (NULL);
495	/*
496	 * Alias keeps 32 pgchunks for each protocol.
497	 * Each pgchunk has 32 pointers to portgroup.
498	 * Each portgroup has 64 states for ports.
499	 */
500	port -= NAT64_MIN_PORT;
501	chunk_idx = port / 2048;
502
503	port -= chunk_idx * 2048;
504	pg_idx = port / 64;
505	state_idx = port % 64;
506
507	/*
508	 * First check in proto_chunkmask that we have allocated PG chunk.
509	 * Then check in proto_pgmask that we have valid PG pointer.
510	 */
511	pg = NULL;
512	switch (proto) {
513	case IPPROTO_TCP:
514		if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
515		    ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
516			pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
517			break;
518		}
519		return (NULL);
520	case IPPROTO_UDP:
521		if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
522		    ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
523			pg = alias->udp[chunk_idx]->pgptr[pg_idx];
524			break;
525		}
526		return (NULL);
527	case IPPROTO_ICMP:
528		if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
529		    ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
530			pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
531			break;
532		}
533		return (NULL);
534	default:
535		panic("%s: wrong proto %d", __func__, proto);
536	}
537	if (pg == NULL)
538		return (NULL);
539
540	if (FREEMASK_ISSET(pg, faddr, state_idx))
541		return (NULL);
542
543	state = &STATES_CHUNK(pg, faddr)->state[state_idx];
544	ck_pr_fence_load();
545	if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
546		return (state);
547	return (NULL);
548}
549
550/*
551 * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
552 * that might be unknown until reassembling is completed.
553 */
554static struct mbuf*
555nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
556    uint16_t *port)
557{
558	struct ip *ip;
559	int len;
560
561	m = ip_reass(m);
562	if (m == NULL)
563		return (NULL);
564	/* IP header must be contigious after ip_reass() */
565	ip = mtod(m, struct ip *);
566	len = ip->ip_hl << 2;
567	switch (ip->ip_p) {
568	case IPPROTO_ICMP:
569		len += ICMP_MINLEN; /* Enough to get icmp_id */
570		break;
571	case IPPROTO_TCP:
572		len += sizeof(struct tcphdr);
573		break;
574	case IPPROTO_UDP:
575		len += sizeof(struct udphdr);
576		break;
577	default:
578		m_freem(m);
579		NAT64STAT_INC(&cfg->base.stats, noproto);
580		return (NULL);
581	}
582	if (m->m_len < len) {
583		m = m_pullup(m, len);
584		if (m == NULL) {
585			NAT64STAT_INC(&cfg->base.stats, nomem);
586			return (NULL);
587		}
588		ip = mtod(m, struct ip *);
589	}
590	switch (ip->ip_p) {
591	case IPPROTO_TCP:
592		*port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
593		break;
594	case IPPROTO_UDP:
595		*port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
596		break;
597	}
598	return (m);
599}
600
601static int
602nat64lsn_translate4(struct nat64lsn_cfg *cfg,
603    const struct ipfw_flow_id *f_id, struct mbuf **mp)
604{
605	struct pfloghdr loghdr, *logdata;
606	struct in6_addr src6;
607	struct nat64lsn_state *state;
608	struct nat64lsn_alias *alias;
609	uint32_t addr, flags;
610	uint16_t port, ts;
611	int ret;
612	uint8_t proto;
613
614	addr = f_id->dst_ip;
615	port = f_id->dst_port;
616	proto = f_id->proto;
617	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
618		NAT64STAT_INC(&cfg->base.stats, nomatch4);
619		return (cfg->nomatch_verdict);
620	}
621
622	/* Reassemble fragments if needed */
623	ret = ntohs(mtod(*mp, struct ip *)->ip_off);
624	if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
625		*mp = nat64lsn_reassemble4(cfg, *mp, &port);
626		if (*mp == NULL)
627			return (IP_FW_DENY);
628	}
629
630	/* Check if protocol is supported */
631	switch (proto) {
632	case IPPROTO_ICMP:
633		ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
634		if (ret != 0) {
635			if (ret == ENOMEM) {
636				NAT64STAT_INC(&cfg->base.stats, nomem);
637				return (IP_FW_DENY);
638			}
639			NAT64STAT_INC(&cfg->base.stats, noproto);
640			return (cfg->nomatch_verdict);
641		}
642		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
643			NAT64STAT_INC(&cfg->base.stats, nomatch4);
644			return (cfg->nomatch_verdict);
645		}
646		/* FALLTHROUGH */
647	case IPPROTO_TCP:
648	case IPPROTO_UDP:
649		break;
650	default:
651		NAT64STAT_INC(&cfg->base.stats, noproto);
652		return (cfg->nomatch_verdict);
653	}
654
655	alias = &ALIAS_BYHASH(cfg, addr);
656	MPASS(addr == alias->addr);
657
658	/* Check that we have state for this port */
659	state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
660	    port, proto);
661	if (state == NULL) {
662		NAT64STAT_INC(&cfg->base.stats, nomatch4);
663		return (cfg->nomatch_verdict);
664	}
665
666	/* TODO: Check flags to see if we need to do some static mapping */
667
668	/* Update some state fields if need */
669	SET_AGE(ts);
670	if (f_id->proto == IPPROTO_TCP)
671		flags = convert_tcp_flags(f_id->_flags);
672	else
673		flags = 0;
674	if (state->timestamp != ts)
675		state->timestamp = ts;
676	if ((state->flags & flags) != flags)
677		state->flags |= flags;
678
679	port = htons(state->sport);
680	src6 = state->ip6_dst;
681
682	if (cfg->base.flags & NAT64_LOG) {
683		logdata = &loghdr;
684		nat64lsn_log(logdata, *mp, AF_INET, state);
685	} else
686		logdata = NULL;
687
688	/*
689	 * We already have src6 with embedded address, but it is possible,
690	 * that src_ip is different than state->ip_dst, this is why we
691	 * do embedding again.
692	 */
693	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
694	ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
695	    &cfg->base, logdata);
696	if (ret == NAT64SKIP)
697		return (cfg->nomatch_verdict);
698	if (ret == NAT64RETURN)
699		*mp = NULL;
700	return (IP_FW_DENY);
701}
702
703/*
704 * Check if particular state is stale and should be deleted.
705 * Return 1 if true, 0 otherwise.
706 */
707static int
708nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
709{
710	int age, ttl;
711
712	/* State was marked as stale in previous pass. */
713	if (ISSET32(state->flags, NAT64_BIT_STALE))
714		return (1);
715
716	/* State is not yet initialized, it is going to be READY */
717	if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
718		return (0);
719
720	age = GET_AGE(state->timestamp);
721	switch (state->proto) {
722	case IPPROTO_TCP:
723		if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
724			ttl = cfg->st_close_ttl;
725		else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
726			ttl = cfg->st_estab_ttl;
727		else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
728			ttl = cfg->st_syn_ttl;
729		else
730			ttl = cfg->st_syn_ttl;
731		if (age > ttl)
732			return (1);
733		break;
734	case IPPROTO_UDP:
735		if (age > cfg->st_udp_ttl)
736			return (1);
737		break;
738	case IPPROTO_ICMP:
739		if (age > cfg->st_icmp_ttl)
740			return (1);
741		break;
742	}
743	return (0);
744}
745
746static int
747nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
748{
749	struct nat64lsn_state *state;
750	struct nat64lsn_host *host;
751	uint64_t freemask;
752	int c, i, update_age;
753
754	update_age = 0;
755	for (c = 0; c < pg->chunks_count; c++) {
756		FREEMASK_COPY(pg, c, freemask);
757		for (i = 0; i < 64; i++) {
758			if (ISSET64(freemask, i))
759				continue;
760			state = &STATES_CHUNK(pg, c)->state[i];
761			if (nat64lsn_check_state(cfg, state) == 0) {
762				update_age = 1;
763				continue;
764			}
765			/*
766			 * Expire state:
767			 * 1. Mark as STALE and unlink from host's hash.
768			 * 2. Set bit in freemask.
769			 */
770			if (ISSET32(state->flags, NAT64_BIT_STALE)) {
771				/*
772				 * State was marked as STALE in previous
773				 * pass. Now it is safe to release it.
774				 */
775				state->flags = 0;
776				ck_pr_fence_store();
777				FREEMASK_BTS(pg, c, i);
778				NAT64STAT_INC(&cfg->base.stats, sdeleted);
779				continue;
780			}
781			MPASS(state->flags & NAT64_FLAG_READY);
782
783			host = state->host;
784			HOST_LOCK(host);
785			CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
786			    state, nat64lsn_state, entries);
787			host->states_count--;
788			HOST_UNLOCK(host);
789
790			/* Reset READY flag */
791			ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
792			/* And set STALE flag */
793			ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
794			ck_pr_fence_store();
795			/*
796			 * Now translate6 will not use this state, wait
797			 * until it become safe for translate4, then mark
798			 * state as free.
799			 */
800		}
801	}
802
803	/*
804	 * We have some alive states, update timestamp.
805	 */
806	if (update_age)
807		SET_AGE(pg->timestamp);
808
809	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
810		return (0);
811
812	return (1);
813}
814
815static void
816nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
817    struct nat64lsn_pg_slist *portgroups)
818{
819	struct nat64lsn_alias *alias;
820	struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
821	uint32_t *pgmask, *pgidx;
822	int i, idx;
823
824	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
825		alias = &cfg->aliases[i];
826		CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
827			if (nat64lsn_maintain_pg(cfg, pg) == 0)
828				continue;
829			/* Always keep first PG */
830			if (pg->base_port == NAT64_MIN_PORT)
831				continue;
832			/*
833			 * PG is expired, unlink it and schedule for
834			 * deferred destroying.
835			 */
836			idx = (pg->base_port - NAT64_MIN_PORT) / 64;
837			switch (pg->proto) {
838			case IPPROTO_TCP:
839				pgmask = alias->tcp_pgmask;
840				pgptr = &alias->tcp_pg;
841				pgidx = &alias->tcp_pgidx;
842				firstpg = alias->tcp[0]->pgptr[0];
843				break;
844			case IPPROTO_UDP:
845				pgmask = alias->udp_pgmask;
846				pgptr = &alias->udp_pg;
847				pgidx = &alias->udp_pgidx;
848				firstpg = alias->udp[0]->pgptr[0];
849				break;
850			case IPPROTO_ICMP:
851				pgmask = alias->icmp_pgmask;
852				pgptr = &alias->icmp_pg;
853				pgidx = &alias->icmp_pgidx;
854				firstpg = alias->icmp[0]->pgptr[0];
855				break;
856			}
857			/* Reset the corresponding bit in pgmask array. */
858			ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
859			ck_pr_fence_store();
860			/* If last used PG points to this PG, reset it. */
861			ck_pr_cas_ptr(pgptr, pg, firstpg);
862			ck_pr_cas_32(pgidx, idx, 0);
863			/* Unlink PG from alias's chain */
864			ALIAS_LOCK(alias);
865			CK_SLIST_REMOVE(&alias->portgroups, pg,
866			    nat64lsn_pg, entries);
867			alias->portgroups_count--;
868			ALIAS_UNLOCK(alias);
869			/* And link to job's chain for deferred destroying */
870			NAT64STAT_INC(&cfg->base.stats, spgdeleted);
871			CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
872		}
873	}
874}
875
876static void
877nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
878    struct nat64lsn_hosts_slist *hosts)
879{
880	struct nat64lsn_host *host, *tmp;
881	int i;
882
883	for (i = 0; i < cfg->hosts_hashsize; i++) {
884		CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
885		    entries, tmp) {
886			/* Is host was marked in previous call? */
887			if (host->flags & NAT64LSN_DEADHOST) {
888				if (host->states_count > 0) {
889					host->flags &= ~NAT64LSN_DEADHOST;
890					continue;
891				}
892				/*
893				 * Unlink host from hash table and schedule
894				 * it for deferred destroying.
895				 */
896				CFG_LOCK(cfg);
897				CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
898				    nat64lsn_host, entries);
899				cfg->hosts_count--;
900				CFG_UNLOCK(cfg);
901				CK_SLIST_INSERT_HEAD(hosts, host, entries);
902				continue;
903			}
904			if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
905				continue;
906			if (host->states_count > 0)
907				continue;
908			/* Mark host as going to be expired in next pass */
909			host->flags |= NAT64LSN_DEADHOST;
910			ck_pr_fence_store();
911		}
912	}
913}
914
915static struct nat64lsn_pgchunk*
916nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
917{
918#if 0
919	struct nat64lsn_alias *alias;
920	struct nat64lsn_pgchunk *chunk;
921	uint32_t pgmask;
922	int i, c;
923
924	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
925		alias = &cfg->aliases[i];
926		if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
927			continue;
928		/* Always keep single chunk allocated */
929		for (c = 1; c < 32; c++) {
930			if ((alias->tcp_chunkmask & (1 << c)) == 0)
931				break;
932			chunk = ck_pr_load_ptr(&alias->tcp[c]);
933			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
934				continue;
935			ck_pr_btr_32(&alias->tcp_chunkmask, c);
936			ck_pr_fence_load();
937			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
938				continue;
939		}
940	}
941#endif
942	return (NULL);
943}
944
945#if 0
946static void
947nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
948{
949	struct nat64lsn_host *h;
950	struct nat64lsn_states_slist *hash;
951	int i, j, hsize;
952
953	for (i = 0; i < cfg->hosts_hashsize; i++) {
954		CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
955			 if (h->states_count / 2 < h->states_hashsize ||
956			     h->states_hashsize >= NAT64LSN_MAX_HSIZE)
957				 continue;
958			 hsize = h->states_hashsize * 2;
959			 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
960			 if (hash == NULL)
961				 continue;
962			 for (j = 0; j < hsize; j++)
963				CK_SLIST_INIT(&hash[i]);
964
965			 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
966		}
967	}
968}
969#endif
970
971/*
972 * This procedure is used to perform various maintance
973 * on dynamic hash list. Currently it is called every 4 seconds.
974 */
975static void
976nat64lsn_periodic(void *data)
977{
978	struct nat64lsn_job_item *ji;
979	struct nat64lsn_cfg *cfg;
980
981	cfg = (struct nat64lsn_cfg *) data;
982	CURVNET_SET(cfg->vp);
983	if (cfg->hosts_count > 0) {
984		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
985		if (ji != NULL) {
986			ji->jtype = JTYPE_DESTROY;
987			CK_SLIST_INIT(&ji->hosts);
988			CK_SLIST_INIT(&ji->portgroups);
989			nat64lsn_expire_hosts(cfg, &ji->hosts);
990			nat64lsn_expire_portgroups(cfg, &ji->portgroups);
991			ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
992			NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
993			    nat64lsn_job_destroy);
994		} else
995			NAT64STAT_INC(&cfg->base.stats, jnomem);
996	}
997	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
998	CURVNET_RESTORE();
999}
1000
1001#define	ALLOC_ERROR(stage, type)	((stage) ? 10 * (type) + (stage): 0)
1002#define	HOST_ERROR(stage)		ALLOC_ERROR(stage, 1)
1003#define	PG_ERROR(stage)			ALLOC_ERROR(stage, 2)
1004static int
1005nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1006{
1007	char a[INET6_ADDRSTRLEN];
1008	struct nat64lsn_aliaslink *link;
1009	struct nat64lsn_host *host;
1010	struct nat64lsn_state *state;
1011	uint32_t hval, data[2];
1012	int i;
1013
1014	/* Check that host was not yet added. */
1015	NAT64LSN_EPOCH_ASSERT();
1016	CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1017		if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1018			/* The host was allocated in previous call. */
1019			ji->host = host;
1020			goto get_state;
1021		}
1022	}
1023
1024	host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1025	if (ji->host == NULL)
1026		return (HOST_ERROR(1));
1027
1028	host->states_hashsize = NAT64LSN_HSIZE;
1029	host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1030	    host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1031	if (host->states_hash == NULL) {
1032		uma_zfree(nat64lsn_host_zone, host);
1033		return (HOST_ERROR(2));
1034	}
1035
1036	link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1037	if (link == NULL) {
1038		free(host->states_hash, M_NAT64LSN);
1039		uma_zfree(nat64lsn_host_zone, host);
1040		return (HOST_ERROR(3));
1041	}
1042
1043	/* Initialize */
1044	HOST_LOCK_INIT(host);
1045	SET_AGE(host->timestamp);
1046	host->addr = ji->f_id.src_ip6;
1047	host->hval = ji->src6_hval;
1048	host->flags = 0;
1049	host->states_count = 0;
1050	host->states_hashsize = NAT64LSN_HSIZE;
1051	CK_SLIST_INIT(&host->aliases);
1052	for (i = 0; i < host->states_hashsize; i++)
1053		CK_SLIST_INIT(&host->states_hash[i]);
1054
1055	/* Determine alias from flow hash. */
1056	hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1057	link->alias = &ALIAS_BYHASH(cfg, hval);
1058	CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1059
1060	ALIAS_LOCK(link->alias);
1061	CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1062	link->alias->hosts_count++;
1063	ALIAS_UNLOCK(link->alias);
1064
1065	CFG_LOCK(cfg);
1066	CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1067	cfg->hosts_count++;
1068	CFG_UNLOCK(cfg);
1069
1070get_state:
1071	data[0] = ji->faddr;
1072	data[1] = (ji->f_id.dst_port << 16) | ji->port;
1073	ji->state_hval = hval = STATE_HVAL(cfg, data);
1074	state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1075	    ji->faddr, ji->port, ji->proto);
1076	/*
1077	 * We failed to obtain new state, used alias needs new PG.
1078	 * XXX: or another alias should be used.
1079	 */
1080	if (state == NULL) {
1081		/* Try to allocate new PG */
1082		if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1083			return (HOST_ERROR(4));
1084		/* We assume that nat64lsn_alloc_pg() got state */
1085	} else
1086		ji->state = state;
1087
1088	ji->done = 1;
1089	DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1090	    inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1091	return (HOST_ERROR(0));
1092}
1093
1094static int
1095nat64lsn_find_pg_place(uint32_t *data)
1096{
1097	int i;
1098
1099	for (i = 0; i < 32; i++) {
1100		if (~data[i] == 0)
1101			continue;
1102		return (i * 32 + ffs(~data[i]) - 1);
1103	}
1104	return (-1);
1105}
1106
1107static int
1108nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1109    struct nat64lsn_alias *alias, uint32_t *chunkmask,
1110    uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1111    struct nat64lsn_pg **pgptr, uint8_t proto)
1112{
1113	struct nat64lsn_pg *pg;
1114	int i, pg_idx, chunk_idx;
1115
1116	/* Find place in pgchunk where PG can be added */
1117	pg_idx = nat64lsn_find_pg_place(pgmask);
1118	if (pg_idx < 0)	/* no more PGs */
1119		return (PG_ERROR(1));
1120	/* Check that we have allocated pgchunk for given PG index */
1121	chunk_idx = pg_idx / 32;
1122	if (!ISSET32(*chunkmask, chunk_idx)) {
1123		chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1124		    M_NOWAIT);
1125		if (chunks[chunk_idx] == NULL)
1126			return (PG_ERROR(2));
1127		ck_pr_bts_32(chunkmask, chunk_idx);
1128		ck_pr_fence_store();
1129	}
1130	/* Allocate PG and states chunks */
1131	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1132	if (pg == NULL)
1133		return (PG_ERROR(3));
1134	pg->chunks_count = cfg->states_chunks;
1135	if (pg->chunks_count > 1) {
1136		pg->freemask_chunk = malloc(pg->chunks_count *
1137		    sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1138		if (pg->freemask_chunk == NULL) {
1139			uma_zfree(nat64lsn_pg_zone, pg);
1140			return (PG_ERROR(4));
1141		}
1142		pg->states_chunk = malloc(pg->chunks_count *
1143		    sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1144		    M_NOWAIT | M_ZERO);
1145		if (pg->states_chunk == NULL) {
1146			free(pg->freemask_chunk, M_NAT64LSN);
1147			uma_zfree(nat64lsn_pg_zone, pg);
1148			return (PG_ERROR(5));
1149		}
1150		for (i = 0; i < pg->chunks_count; i++) {
1151			pg->states_chunk[i] = uma_zalloc(
1152			    nat64lsn_state_zone, M_NOWAIT);
1153			if (pg->states_chunk[i] == NULL)
1154				goto states_failed;
1155		}
1156		memset(pg->freemask_chunk, 0xff,
1157		    sizeof(uint64_t) * pg->chunks_count);
1158	} else {
1159		pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1160		if (pg->states == NULL) {
1161			uma_zfree(nat64lsn_pg_zone, pg);
1162			return (PG_ERROR(6));
1163		}
1164		memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1165	}
1166
1167	/* Initialize PG and hook it to pgchunk */
1168	SET_AGE(pg->timestamp);
1169	pg->proto = proto;
1170	pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1171	ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1172	ck_pr_fence_store();
1173	ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1174	ck_pr_store_ptr(pgptr, pg);
1175
1176	ALIAS_LOCK(alias);
1177	CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1178	SET_AGE(alias->timestamp);
1179	alias->portgroups_count++;
1180	ALIAS_UNLOCK(alias);
1181	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1182	return (PG_ERROR(0));
1183
1184states_failed:
1185	for (i = 0; i < pg->chunks_count; i++)
1186		uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1187	free(pg->freemask_chunk, M_NAT64LSN);
1188	free(pg->states_chunk, M_NAT64LSN);
1189	uma_zfree(nat64lsn_pg_zone, pg);
1190	return (PG_ERROR(7));
1191}
1192
1193static int
1194nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1195{
1196	struct nat64lsn_aliaslink *link;
1197	struct nat64lsn_alias *alias;
1198	int ret;
1199
1200	link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1201	if (link == NULL)
1202		return (PG_ERROR(1));
1203
1204	/*
1205	 * TODO: check that we did not already allocated PG in
1206	 *	 previous call.
1207	 */
1208
1209	ret = 0;
1210	alias = link->alias;
1211	/* Find place in pgchunk where PG can be added */
1212	switch (ji->proto) {
1213	case IPPROTO_TCP:
1214		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1215		    &alias->tcp_chunkmask, alias->tcp_pgmask,
1216		    alias->tcp, &alias->tcp_pg, ji->proto);
1217		break;
1218	case IPPROTO_UDP:
1219		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1220		    &alias->udp_chunkmask, alias->udp_pgmask,
1221		    alias->udp, &alias->udp_pg, ji->proto);
1222		break;
1223	case IPPROTO_ICMP:
1224		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1225		    &alias->icmp_chunkmask, alias->icmp_pgmask,
1226		    alias->icmp, &alias->icmp_pg, ji->proto);
1227		break;
1228	default:
1229		panic("%s: wrong proto %d", __func__, ji->proto);
1230	}
1231	if (ret == PG_ERROR(1)) {
1232		/*
1233		 * PG_ERROR(1) means that alias lacks free PGs
1234		 * XXX: try next alias.
1235		 */
1236		printf("NAT64LSN: %s: failed to obtain PG\n",
1237		    __func__);
1238		return (ret);
1239	}
1240	if (ret == PG_ERROR(0)) {
1241		ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1242		    ji->state_hval, ji->faddr, ji->port, ji->proto);
1243		if (ji->state == NULL)
1244			ret = PG_ERROR(8);
1245		else
1246			ji->done = 1;
1247	}
1248	return (ret);
1249}
1250
1251static void
1252nat64lsn_do_request(void *data)
1253{
1254	struct epoch_tracker et;
1255	struct nat64lsn_job_head jhead;
1256	struct nat64lsn_job_item *ji, *ji2;
1257	struct nat64lsn_cfg *cfg;
1258	int jcount;
1259	uint8_t flags;
1260
1261	cfg = (struct nat64lsn_cfg *)data;
1262	if (cfg->jlen == 0)
1263		return;
1264
1265	CURVNET_SET(cfg->vp);
1266	STAILQ_INIT(&jhead);
1267
1268	/* Grab queue */
1269	JQUEUE_LOCK();
1270	STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1271	jcount = cfg->jlen;
1272	cfg->jlen = 0;
1273	JQUEUE_UNLOCK();
1274
1275	/* TODO: check if we need to resize hash */
1276
1277	NAT64STAT_INC(&cfg->base.stats, jcalls);
1278	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1279
1280	/*
1281	 * TODO:
1282	 * What we should do here is to build a hash
1283	 * to ensure we don't have lots of duplicate requests.
1284	 * Skip this for now.
1285	 *
1286	 * TODO: Limit per-call number of items
1287	 */
1288
1289	NAT64LSN_EPOCH_ENTER(et);
1290	STAILQ_FOREACH(ji, &jhead, entries) {
1291		switch (ji->jtype) {
1292		case JTYPE_NEWHOST:
1293			if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1294				NAT64STAT_INC(&cfg->base.stats, jhostfails);
1295			break;
1296		case JTYPE_NEWPORTGROUP:
1297			if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1298				NAT64STAT_INC(&cfg->base.stats, jportfails);
1299			break;
1300		default:
1301			continue;
1302		}
1303		if (ji->done != 0) {
1304			flags = ji->proto != IPPROTO_TCP ? 0 :
1305			    convert_tcp_flags(ji->f_id._flags);
1306			nat64lsn_translate6_internal(cfg, &ji->m,
1307			    ji->state, flags);
1308			NAT64STAT_INC(&cfg->base.stats, jreinjected);
1309		}
1310	}
1311	NAT64LSN_EPOCH_EXIT(et);
1312
1313	ji = STAILQ_FIRST(&jhead);
1314	while (ji != NULL) {
1315		ji2 = STAILQ_NEXT(ji, entries);
1316		/*
1317		 * In any case we must free mbuf if
1318		 * translator did not consumed it.
1319		 */
1320		m_freem(ji->m);
1321		uma_zfree(nat64lsn_job_zone, ji);
1322		ji = ji2;
1323	}
1324	CURVNET_RESTORE();
1325}
1326
1327static struct nat64lsn_job_item *
1328nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1329{
1330	struct nat64lsn_job_item *ji;
1331
1332	/*
1333	 * Do not try to lock possibly contested mutex if we're near the
1334	 * limit. Drop packet instead.
1335	 */
1336	ji = NULL;
1337	if (cfg->jlen >= cfg->jmaxlen)
1338		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1339	else {
1340		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1341		if (ji == NULL)
1342			NAT64STAT_INC(&cfg->base.stats, jnomem);
1343	}
1344	if (ji == NULL) {
1345		NAT64STAT_INC(&cfg->base.stats, dropped);
1346		DPRINTF(DP_DROPS, "failed to create job");
1347	} else {
1348		ji->jtype = jtype;
1349		ji->done = 0;
1350	}
1351	return (ji);
1352}
1353
1354static void
1355nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1356{
1357
1358	JQUEUE_LOCK();
1359	STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1360	NAT64STAT_INC(&cfg->base.stats, jrequests);
1361	cfg->jlen++;
1362
1363	if (callout_pending(&cfg->jcallout) == 0)
1364		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1365	JQUEUE_UNLOCK();
1366}
1367
1368static void
1369nat64lsn_job_destroy(epoch_context_t ctx)
1370{
1371	struct nat64lsn_job_item *ji;
1372	struct nat64lsn_host *host;
1373	struct nat64lsn_pg *pg;
1374	int i;
1375
1376	ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1377	MPASS(ji->jtype == JTYPE_DESTROY);
1378	while (!CK_SLIST_EMPTY(&ji->hosts)) {
1379		host = CK_SLIST_FIRST(&ji->hosts);
1380		CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1381		if (host->states_count > 0) {
1382			/*
1383			 * XXX: The state has been created
1384			 * during host deletion.
1385			 */
1386			printf("NAT64LSN: %s: destroying host with %d "
1387			    "states\n", __func__, host->states_count);
1388		}
1389		nat64lsn_destroy_host(host);
1390	}
1391	while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1392		pg = CK_SLIST_FIRST(&ji->portgroups);
1393		CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1394		for (i = 0; i < pg->chunks_count; i++) {
1395			if (FREEMASK_BITCOUNT(pg, i) != 64) {
1396				/*
1397				 * XXX: The state has been created during
1398				 * PG deletion.
1399				 */
1400				printf("NAT64LSN: %s: destroying PG %p "
1401				    "with non-empty chunk %d\n", __func__,
1402				    pg, i);
1403			}
1404		}
1405		nat64lsn_destroy_pg(pg);
1406	}
1407	uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1408	uma_zfree(nat64lsn_job_zone, ji);
1409}
1410
1411static int
1412nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1413    const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1414    in_addr_t faddr, uint16_t port, uint8_t proto)
1415{
1416	struct nat64lsn_job_item *ji;
1417
1418	ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1419	if (ji != NULL) {
1420		ji->m = *mp;
1421		ji->f_id = *f_id;
1422		ji->faddr = faddr;
1423		ji->port = port;
1424		ji->proto = proto;
1425		ji->src6_hval = hval;
1426
1427		nat64lsn_enqueue_job(cfg, ji);
1428		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1429		*mp = NULL;
1430	}
1431	return (IP_FW_DENY);
1432}
1433
1434static int
1435nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1436    const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1437    in_addr_t faddr, uint16_t port, uint8_t proto)
1438{
1439	struct nat64lsn_job_item *ji;
1440
1441	ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1442	if (ji != NULL) {
1443		ji->m = *mp;
1444		ji->f_id = *f_id;
1445		ji->faddr = faddr;
1446		ji->port = port;
1447		ji->proto = proto;
1448		ji->state_hval = hval;
1449		ji->host = host;
1450
1451		nat64lsn_enqueue_job(cfg, ji);
1452		NAT64STAT_INC(&cfg->base.stats, jportreq);
1453		*mp = NULL;
1454	}
1455	return (IP_FW_DENY);
1456}
1457
1458static int
1459nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1460    struct nat64lsn_state *state, uint8_t flags)
1461{
1462	struct pfloghdr loghdr, *logdata;
1463	int ret;
1464	uint16_t ts;
1465
1466	/* Update timestamp and flags if needed */
1467	SET_AGE(ts);
1468	if (state->timestamp != ts)
1469		state->timestamp = ts;
1470	if ((state->flags & flags) != 0)
1471		state->flags |= flags;
1472
1473	if (cfg->base.flags & NAT64_LOG) {
1474		logdata = &loghdr;
1475		nat64lsn_log(logdata, *mp, AF_INET6, state);
1476	} else
1477		logdata = NULL;
1478
1479	ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1480	    htons(state->aport), &cfg->base, logdata);
1481	if (ret == NAT64SKIP)
1482		return (cfg->nomatch_verdict);
1483	if (ret == NAT64RETURN)
1484		*mp = NULL;
1485	return (IP_FW_DENY);
1486}
1487
1488static int
1489nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1490    struct mbuf **mp)
1491{
1492	struct nat64lsn_state *state;
1493	struct nat64lsn_host *host;
1494	struct icmp6_hdr *icmp6;
1495	uint32_t addr, hval, data[2];
1496	int offset, proto;
1497	uint16_t port;
1498	uint8_t flags;
1499
1500	/* Check if protocol is supported */
1501	port = f_id->src_port;
1502	proto = f_id->proto;
1503	switch (f_id->proto) {
1504	case IPPROTO_ICMPV6:
1505		/*
1506		 * For ICMPv6 echo reply/request we use icmp6_id as
1507		 * local port.
1508		 */
1509		offset = 0;
1510		proto = nat64_getlasthdr(*mp, &offset);
1511		if (proto < 0) {
1512			NAT64STAT_INC(&cfg->base.stats, dropped);
1513			DPRINTF(DP_DROPS, "mbuf isn't contigious");
1514			return (IP_FW_DENY);
1515		}
1516		if (proto == IPPROTO_ICMPV6) {
1517			icmp6 = mtodo(*mp, offset);
1518			if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1519			    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1520				port = ntohs(icmp6->icmp6_id);
1521		}
1522		proto = IPPROTO_ICMP;
1523		/* FALLTHROUGH */
1524	case IPPROTO_TCP:
1525	case IPPROTO_UDP:
1526		break;
1527	default:
1528		NAT64STAT_INC(&cfg->base.stats, noproto);
1529		return (cfg->nomatch_verdict);
1530	}
1531
1532	/* Extract IPv4 from destination IPv6 address */
1533	addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1534	if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1535		char a[INET_ADDRSTRLEN];
1536
1537		NAT64STAT_INC(&cfg->base.stats, dropped);
1538		DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1539		    inet_ntop(AF_INET, &addr, a, sizeof(a)));
1540		return (IP_FW_DENY); /* XXX: add extra stats? */
1541	}
1542
1543	/* Try to find host */
1544	hval = HOST_HVAL(cfg, &f_id->src_ip6);
1545	CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1546		if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1547			break;
1548	}
1549	/* We use IPv4 address in host byte order */
1550	addr = ntohl(addr);
1551	if (host == NULL)
1552		return (nat64lsn_request_host(cfg, f_id, mp,
1553		    hval, addr, port, proto));
1554
1555	flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1556
1557	data[0] = addr;
1558	data[1] = (f_id->dst_port << 16) | port;
1559	hval = STATE_HVAL(cfg, data);
1560	state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1561	    port, proto);
1562	if (state == NULL)
1563		return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1564		    port, proto));
1565	return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1566}
1567
1568/*
1569 * Main dataplane entry point.
1570 */
1571int
1572ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1573    ipfw_insn *cmd, int *done)
1574{
1575	struct nat64lsn_cfg *cfg;
1576	ipfw_insn *icmd;
1577	int ret;
1578
1579	IPFW_RLOCK_ASSERT(ch);
1580
1581	*done = 0;	/* continue the search in case of failure */
1582	icmd = cmd + 1;
1583	if (cmd->opcode != O_EXTERNAL_ACTION ||
1584	    cmd->arg1 != V_nat64lsn_eid ||
1585	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1586	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1587		return (IP_FW_DENY);
1588
1589	*done = 1;	/* terminate the search */
1590
1591	switch (args->f_id.addr_type) {
1592	case 4:
1593		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1594		break;
1595	case 6:
1596		/*
1597		 * Check that destination IPv6 address matches our prefix6.
1598		 */
1599		if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1600		    memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1601		    cfg->base.plat_plen / 8) != 0) {
1602			ret = cfg->nomatch_verdict;
1603			break;
1604		}
1605		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1606		break;
1607	default:
1608		ret = cfg->nomatch_verdict;
1609	}
1610
1611	if (ret != IP_FW_PASS && args->m != NULL) {
1612		m_freem(args->m);
1613		args->m = NULL;
1614	}
1615	return (ret);
1616}
1617
1618static int
1619nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1620{
1621	struct nat64lsn_states_chunk *chunk;
1622	int i;
1623
1624	chunk = (struct nat64lsn_states_chunk *)mem;
1625	for (i = 0; i < 64; i++)
1626		chunk->state[i].flags = 0;
1627	return (0);
1628}
1629
1630void
1631nat64lsn_init_internal(void)
1632{
1633
1634	nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1635	    sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1636	    UMA_ALIGN_PTR, 0);
1637	nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1638	    sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1639	    UMA_ALIGN_PTR, 0);
1640	nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1641	    sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1642	    UMA_ALIGN_PTR, 0);
1643	nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1644	    sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1645	    UMA_ALIGN_PTR, 0);
1646	nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1647	    sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1648	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1649	nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1650	    sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1651	    UMA_ALIGN_PTR, 0);
1652	JQUEUE_LOCK_INIT();
1653}
1654
1655void
1656nat64lsn_uninit_internal(void)
1657{
1658
1659	/* XXX: epoch_task drain */
1660	JQUEUE_LOCK_DESTROY();
1661	uma_zdestroy(nat64lsn_host_zone);
1662	uma_zdestroy(nat64lsn_pgchunk_zone);
1663	uma_zdestroy(nat64lsn_pg_zone);
1664	uma_zdestroy(nat64lsn_aliaslink_zone);
1665	uma_zdestroy(nat64lsn_state_zone);
1666	uma_zdestroy(nat64lsn_job_zone);
1667}
1668
1669void
1670nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1671{
1672
1673	CALLOUT_LOCK(cfg);
1674	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1675	    nat64lsn_periodic, cfg);
1676	CALLOUT_UNLOCK(cfg);
1677}
1678
1679struct nat64lsn_cfg *
1680nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1681{
1682	struct nat64lsn_cfg *cfg;
1683	struct nat64lsn_alias *alias;
1684	int i, naddr;
1685
1686	cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1687	    M_WAITOK | M_ZERO);
1688
1689	CFG_LOCK_INIT(cfg);
1690	CALLOUT_LOCK_INIT(cfg);
1691	STAILQ_INIT(&cfg->jhead);
1692	cfg->vp = curvnet;
1693	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1694
1695	cfg->hash_seed = arc4random();
1696	cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1697	cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1698	    cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1699	for (i = 0; i < cfg->hosts_hashsize; i++)
1700		CK_SLIST_INIT(&cfg->hosts_hash[i]);
1701
1702	naddr = 1 << (32 - plen);
1703	cfg->prefix4 = prefix;
1704	cfg->pmask4 = prefix | (naddr - 1);
1705	cfg->plen4 = plen;
1706	cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1707	    M_NAT64LSN, M_WAITOK | M_ZERO);
1708	for (i = 0; i < naddr; i++) {
1709		alias = &cfg->aliases[i];
1710		alias->addr = prefix + i; /* host byte order */
1711		CK_SLIST_INIT(&alias->hosts);
1712		ALIAS_LOCK_INIT(alias);
1713	}
1714
1715        callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1716        callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1717
1718	return (cfg);
1719}
1720
1721static void
1722nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1723{
1724	int i;
1725
1726	if (pg->chunks_count == 1) {
1727		uma_zfree(nat64lsn_state_zone, pg->states);
1728	} else {
1729		for (i = 0; i < pg->chunks_count; i++)
1730			uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1731		free(pg->states_chunk, M_NAT64LSN);
1732		free(pg->freemask_chunk, M_NAT64LSN);
1733	}
1734	uma_zfree(nat64lsn_pg_zone, pg);
1735}
1736
1737static void
1738nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1739    struct nat64lsn_alias *alias)
1740{
1741	struct nat64lsn_pg *pg;
1742	int i;
1743
1744	while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1745		pg = CK_SLIST_FIRST(&alias->portgroups);
1746		CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1747		nat64lsn_destroy_pg(pg);
1748	}
1749	for (i = 0; i < 32; i++) {
1750		if (ISSET32(alias->tcp_chunkmask, i))
1751			uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1752		if (ISSET32(alias->udp_chunkmask, i))
1753			uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1754		if (ISSET32(alias->icmp_chunkmask, i))
1755			uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1756	}
1757	ALIAS_LOCK_DESTROY(alias);
1758}
1759
1760static void
1761nat64lsn_destroy_host(struct nat64lsn_host *host)
1762{
1763	struct nat64lsn_aliaslink *link;
1764
1765	while (!CK_SLIST_EMPTY(&host->aliases)) {
1766		link = CK_SLIST_FIRST(&host->aliases);
1767		CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1768
1769		ALIAS_LOCK(link->alias);
1770		CK_SLIST_REMOVE(&link->alias->hosts, link,
1771		    nat64lsn_aliaslink, alias_entries);
1772		link->alias->hosts_count--;
1773		ALIAS_UNLOCK(link->alias);
1774
1775		uma_zfree(nat64lsn_aliaslink_zone, link);
1776	}
1777	HOST_LOCK_DESTROY(host);
1778	free(host->states_hash, M_NAT64LSN);
1779	uma_zfree(nat64lsn_host_zone, host);
1780}
1781
1782void
1783nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1784{
1785	struct nat64lsn_host *host;
1786	int i;
1787
1788	CALLOUT_LOCK(cfg);
1789	callout_drain(&cfg->periodic);
1790	CALLOUT_UNLOCK(cfg);
1791	callout_drain(&cfg->jcallout);
1792
1793	for (i = 0; i < cfg->hosts_hashsize; i++) {
1794		while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1795			host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1796			CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1797			nat64lsn_destroy_host(host);
1798		}
1799	}
1800
1801	for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1802		nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1803
1804	CALLOUT_LOCK_DESTROY(cfg);
1805	CFG_LOCK_DESTROY(cfg);
1806	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1807	free(cfg->hosts_hash, M_NAT64LSN);
1808	free(cfg->aliases, M_NAT64LSN);
1809	free(cfg, M_NAT64LSN);
1810}
1811