1304046Sae/*-
2346211Sae * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3346211Sae *
4346211Sae * Copyright (c) 2015-2019 Yandex LLC
5304046Sae * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6346211Sae * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7304046Sae *
8304046Sae * Redistribution and use in source and binary forms, with or without
9304046Sae * modification, are permitted provided that the following conditions
10304046Sae * are met:
11304046Sae *
12304046Sae * 1. Redistributions of source code must retain the above copyright
13304046Sae *    notice, this list of conditions and the following disclaimer.
14304046Sae * 2. Redistributions in binary form must reproduce the above copyright
15304046Sae *    notice, this list of conditions and the following disclaimer in the
16304046Sae *    documentation and/or other materials provided with the distribution.
17304046Sae *
18304046Sae * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19304046Sae * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20304046Sae * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21304046Sae * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22304046Sae * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23304046Sae * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24304046Sae * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25304046Sae * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26304046Sae * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27304046Sae * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28304046Sae */
29304046Sae
30304046Sae#include <sys/cdefs.h>
31304046Sae__FBSDID("$FreeBSD: stable/11/sys/netpfil/ipfw/nat64/nat64lsn.c 349411 2019-06-26 12:26:38Z ae $");
32304046Sae
33304046Sae#include <sys/param.h>
34304046Sae#include <sys/systm.h>
35304046Sae#include <sys/counter.h>
36304046Sae#include <sys/errno.h>
37304046Sae#include <sys/kernel.h>
38304046Sae#include <sys/lock.h>
39304046Sae#include <sys/malloc.h>
40304046Sae#include <sys/mbuf.h>
41304046Sae#include <sys/module.h>
42304046Sae#include <sys/rmlock.h>
43304046Sae#include <sys/rwlock.h>
44304046Sae#include <sys/socket.h>
45304046Sae#include <sys/queue.h>
46304046Sae#include <sys/syslog.h>
47304046Sae#include <sys/sysctl.h>
48304046Sae
49304046Sae#include <net/if.h>
50304046Sae#include <net/if_var.h>
51304046Sae#include <net/if_pflog.h>
52304046Sae#include <net/pfil.h>
53304046Sae
54304046Sae#include <netinet/in.h>
55304046Sae#include <netinet/ip.h>
56304046Sae#include <netinet/ip_var.h>
57304046Sae#include <netinet/ip_fw.h>
58304046Sae#include <netinet/ip6.h>
59304046Sae#include <netinet/icmp6.h>
60304046Sae#include <netinet/ip_icmp.h>
61304046Sae#include <netinet/tcp.h>
62304046Sae#include <netinet/udp.h>
63304046Sae#include <netinet6/in6_var.h>
64304046Sae#include <netinet6/ip6_var.h>
65304046Sae#include <netinet6/ip_fw_nat64.h>
66304046Sae
67304046Sae#include <netpfil/ipfw/ip_fw_private.h>
68304046Sae#include <netpfil/pf/pf.h>
69304046Sae
70334836Sae#include "nat64lsn.h"
71334836Sae
72304046SaeMALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
73304046Sae
74304046Saestatic void nat64lsn_periodic(void *data);
75304046Sae#define	PERIODIC_DELAY	4
76304046Saestatic uint8_t nat64lsn_proto_map[256];
77304046Saeuint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
78304046Sae
79304046Sae#define	NAT64_FLAG_FIN		0x01	/* FIN was seen */
80304046Sae#define	NAT64_FLAG_SYN		0x02	/* First syn in->out */
81304046Sae#define	NAT64_FLAG_ESTAB	0x04	/* Packet with Ack */
82304046Sae#define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
83304046Sae
84304046Sae#define	NAT64_FLAG_RDR		0x80	/* Port redirect */
85304046Sae#define	NAT64_LOOKUP(chain, cmd)	\
86304046Sae	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
87304046Sae/*
88304046Sae * Delayed job queue, used to create new hosts
89304046Sae * and new portgroups
90304046Sae */
91304046Saeenum nat64lsn_jtype {
92304046Sae	JTYPE_NEWHOST = 1,
93304046Sae	JTYPE_NEWPORTGROUP,
94304046Sae	JTYPE_DELPORTGROUP,
95304046Sae};
96304046Sae
97304046Saestruct nat64lsn_job_item {
98304046Sae	TAILQ_ENTRY(nat64lsn_job_item)	next;
99304046Sae	enum nat64lsn_jtype	jtype;
100304046Sae	struct nat64lsn_host	*nh;
101304046Sae	struct nat64lsn_portgroup	*pg;
102304046Sae	void			*spare_idx;
103304046Sae	struct in6_addr		haddr;
104304046Sae	uint8_t			nat_proto;
105304046Sae	uint8_t			done;
106304046Sae	int			needs_idx;
107304046Sae	int			delcount;
108304046Sae	unsigned int		fhash;	/* Flow hash */
109304046Sae	uint32_t		aaddr;	/* Last used address (net) */
110304046Sae	struct mbuf		*m;
111304046Sae	struct ipfw_flow_id	f_id;
112304046Sae	uint64_t		delmask[NAT64LSN_PGPTRNMASK];
113304046Sae};
114304046Sae
115304046Saestatic struct mtx jmtx;
116304046Sae#define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
117304046Sae#define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
118304046Sae#define	JQUEUE_LOCK()		mtx_lock(&jmtx)
119304046Sae#define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
120304046Sae
121304046Saestatic void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
122304046Sae    struct nat64lsn_job_item *ji);
123304046Saestatic void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
124304046Sae    struct nat64lsn_job_head *jhead, int jlen);
125304046Sae
126304046Saestatic struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
127304046Sae    const struct ipfw_flow_id *f_id, int jtype);
128304046Saestatic int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
129304046Sae    const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
130304046Sae    int needs_idx);
131304046Saestatic int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
132304046Sae    const struct ipfw_flow_id *f_id, struct mbuf **pm);
133304046Saestatic int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
134304046Sae    const struct ipfw_flow_id *f_id, struct mbuf **pm);
135304046Saestatic int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
136304046Sae    struct ipfw_flow_id *f_id, struct mbuf **pm);
137304046Sae
138304046Saestatic int alloc_portgroup(struct nat64lsn_job_item *ji);
139304046Saestatic void destroy_portgroup(struct nat64lsn_portgroup *pg);
140304046Saestatic void destroy_host6(struct nat64lsn_host *nh);
141304046Saestatic int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
142304046Sae
143304046Saestatic int attach_portgroup(struct nat64lsn_cfg *cfg,
144304046Sae    struct nat64lsn_job_item *ji);
145304046Saestatic int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
146304046Sae
147304046Sae
148304046Sae/* XXX tmp */
149304046Saestatic uma_zone_t nat64lsn_host_zone;
150304046Saestatic uma_zone_t nat64lsn_pg_zone;
151304046Saestatic uma_zone_t nat64lsn_pgidx_zone;
152304046Sae
153304046Saestatic unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
154304046Sae    struct nat64lsn_host *nh);
155304046Sae
156304046Sae#define	I6_hash(x)		(djb_hash((const unsigned char *)(x), 16))
157304046Sae#define	I6_first(_ph, h)	(_ph)[h]
158304046Sae#define	I6_next(x)		(x)->next
159304046Sae#define	I6_val(x)		(&(x)->addr)
160304046Sae#define	I6_cmp(a, b)		IN6_ARE_ADDR_EQUAL(a, b)
161304046Sae#define	I6_lock(a, b)
162304046Sae#define	I6_unlock(a, b)
163304046Sae
164304046Sae#define	I6HASH_FIND(_cfg, _res, _a) \
165304046Sae	CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
166304046Sae#define	I6HASH_INSERT(_cfg, _i)	\
167304046Sae	CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
168304046Sae#define	I6HASH_REMOVE(_cfg, _res, _tmp, _a)	\
169304046Sae	CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
170304046Sae
171304046Sae#define	I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg)	\
172304046Sae	CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
173304046Sae
174304046Sae#define	HASH_IN4(x)	djb_hash((const unsigned char *)(x), 8)
175304046Sae
176304046Saestatic unsigned
177304046Saedjb_hash(const unsigned char *h, const int len)
178304046Sae{
179304046Sae	unsigned int result = 0;
180304046Sae	int i;
181304046Sae
182304046Sae	for (i = 0; i < len; i++)
183304046Sae		result = 33 * result ^ h[i];
184304046Sae
185304046Sae	return (result);
186304046Sae}
187304046Sae
188304046Sae/*
189304046Saestatic size_t
190304046Saebitmask_size(size_t num, int *level)
191304046Sae{
192304046Sae	size_t x;
193304046Sae	int c;
194304046Sae
195304046Sae	for (c = 0, x = num; num > 1; num /= 64, c++)
196304046Sae		;
197304046Sae
198304046Sae	return (x);
199304046Sae}
200304046Sae
201304046Saestatic void
202304046Saebitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
203304046Sae{
204304046Sae	size_t x, z;
205304046Sae
206304046Sae	memset(pmask, 0xFF, bufsize);
207304046Sae	for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
208304046Sae		;
209304046Sae	pmask[x] ~= 0x01;
210304046Sae}
211304046Sae*/
212304046Sae
213304046Saestatic void
214304046Saenat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
215304046Sae    uint32_t n, uint32_t sn)
216304046Sae{
217304046Sae
218316446Sae	memset(plog, 0, sizeof(*plog));
219304046Sae	plog->length = PFLOG_REAL_HDRLEN;
220304046Sae	plog->af = family;
221304046Sae	plog->action = PF_NAT;
222304046Sae	plog->dir = PF_IN;
223304046Sae	plog->rulenr = htonl(n);
224304046Sae	plog->subrulenr = htonl(sn);
225304046Sae	plog->ruleset[0] = '\0';
226304046Sae	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
227304046Sae	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
228304046Sae}
229304046Sae/*
230304046Sae * Inspects icmp packets to see if the message contains different
231304046Sae * packet header so we need to alter @addr and @port.
232304046Sae */
233304046Saestatic int
234304046Saeinspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
235304046Sae    uint16_t *port)
236304046Sae{
237304046Sae	struct ip *ip;
238304046Sae	struct tcphdr *tcp;
239304046Sae	struct udphdr *udp;
240304046Sae	struct icmphdr *icmp;
241304046Sae	int off;
242304046Sae	uint8_t proto;
243304046Sae
244304046Sae	ip = mtod(*m, struct ip *); /* Outer IP header */
245304046Sae	off = (ip->ip_hl << 2) + ICMP_MINLEN;
246304046Sae	if ((*m)->m_len < off)
247304046Sae		*m = m_pullup(*m, off);
248304046Sae	if (*m == NULL)
249304046Sae		return (ENOMEM);
250304046Sae
251304046Sae	ip = mtod(*m, struct ip *); /* Outer IP header */
252304046Sae	icmp = L3HDR(ip, struct icmphdr *);
253304046Sae	switch (icmp->icmp_type) {
254304046Sae	case ICMP_ECHO:
255304046Sae	case ICMP_ECHOREPLY:
256304046Sae		/* Use icmp ID as distinguisher */
257304046Sae		*port = ntohs(*((uint16_t *)(icmp + 1)));
258304046Sae		return (0);
259304046Sae	case ICMP_UNREACH:
260304046Sae	case ICMP_TIMXCEED:
261304046Sae		break;
262304046Sae	default:
263304046Sae		return (EOPNOTSUPP);
264304046Sae	}
265304046Sae	/*
266304046Sae	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
267304046Sae	 * of ULP header.
268304046Sae	 */
269304046Sae	if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
270304046Sae		return (EINVAL);
271304046Sae	if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
272304046Sae		*m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
273304046Sae	if (*m == NULL)
274304046Sae		return (ENOMEM);
275304046Sae	ip = mtodo(*m, off); /* Inner IP header */
276304046Sae	proto = ip->ip_p;
277304046Sae	off += ip->ip_hl << 2; /* Skip inner IP header */
278304046Sae	*addr = ntohl(ip->ip_src.s_addr);
279304046Sae	if ((*m)->m_len < off + ICMP_MINLEN)
280304046Sae		*m = m_pullup(*m, off + ICMP_MINLEN);
281304046Sae	if (*m == NULL)
282304046Sae		return (ENOMEM);
283304046Sae	switch (proto) {
284304046Sae	case IPPROTO_TCP:
285304046Sae		tcp = mtodo(*m, off);
286304046Sae		*nat_proto = NAT_PROTO_TCP;
287304046Sae		*port = ntohs(tcp->th_sport);
288304046Sae		return (0);
289304046Sae	case IPPROTO_UDP:
290304046Sae		udp = mtodo(*m, off);
291304046Sae		*nat_proto = NAT_PROTO_UDP;
292304046Sae		*port = ntohs(udp->uh_sport);
293304046Sae		return (0);
294304046Sae	case IPPROTO_ICMP:
295304046Sae		/*
296304046Sae		 * We will translate only ICMP errors for our ICMP
297304046Sae		 * echo requests.
298304046Sae		 */
299304046Sae		icmp = mtodo(*m, off);
300304046Sae		if (icmp->icmp_type != ICMP_ECHO)
301304046Sae			return (EOPNOTSUPP);
302304046Sae		*port = ntohs(*((uint16_t *)(icmp + 1)));
303304046Sae		return (0);
304304046Sae	};
305304046Sae	return (EOPNOTSUPP);
306304046Sae}
307304046Sae
308304046Saestatic inline uint8_t
309304046Saeconvert_tcp_flags(uint8_t flags)
310304046Sae{
311304046Sae	uint8_t result;
312304046Sae
313304046Sae	result = flags & (TH_FIN|TH_SYN);
314304046Sae	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
315304046Sae	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
316304046Sae
317304046Sae	return (result);
318304046Sae}
319304046Sae
320304046Saestatic NAT64NOINLINE int
321304046Saenat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
322304046Sae    struct mbuf **pm)
323304046Sae{
324304046Sae	struct pfloghdr loghdr, *logdata;
325304046Sae	struct in6_addr src6;
326304046Sae	struct nat64lsn_portgroup *pg;
327304046Sae	struct nat64lsn_host *nh;
328304046Sae	struct nat64lsn_state *st;
329304046Sae	struct ip *ip;
330304046Sae	uint32_t addr;
331304046Sae	uint16_t state_flags, state_ts;
332304046Sae	uint16_t port, lport;
333304046Sae	uint8_t nat_proto;
334304046Sae	int ret;
335304046Sae
336304046Sae	addr = f_id->dst_ip;
337304046Sae	port = f_id->dst_port;
338304046Sae	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
339334836Sae		NAT64STAT_INC(&cfg->base.stats, nomatch4);
340304046Sae		return (cfg->nomatch_verdict);
341304046Sae	}
342304046Sae
343304046Sae	/* Check if protocol is supported and get its short id */
344304046Sae	nat_proto = nat64lsn_proto_map[f_id->proto];
345304046Sae	if (nat_proto == 0) {
346334836Sae		NAT64STAT_INC(&cfg->base.stats, noproto);
347304046Sae		return (cfg->nomatch_verdict);
348304046Sae	}
349304046Sae
350304046Sae	/* We might need to handle icmp differently */
351304046Sae	if (nat_proto == NAT_PROTO_ICMP) {
352304046Sae		ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
353304046Sae		if (ret != 0) {
354332767Sae			if (ret == ENOMEM) {
355334836Sae				NAT64STAT_INC(&cfg->base.stats, nomem);
356332767Sae				return (IP_FW_DENY);
357332767Sae			}
358334836Sae			NAT64STAT_INC(&cfg->base.stats, noproto);
359304046Sae			return (cfg->nomatch_verdict);
360304046Sae		}
361304046Sae		/* XXX: Check addr for validity */
362304046Sae		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
363334836Sae			NAT64STAT_INC(&cfg->base.stats, nomatch4);
364304046Sae			return (cfg->nomatch_verdict);
365304046Sae		}
366304046Sae	}
367304046Sae
368304046Sae	/* Calc portgroup offset w.r.t protocol */
369304046Sae	pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
370304046Sae
371304046Sae	/* Check if this port is occupied by any portgroup */
372304046Sae	if (pg == NULL) {
373334836Sae		NAT64STAT_INC(&cfg->base.stats, nomatch4);
374304046Sae#if 0
375304046Sae		DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
376304046Sae		    _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
377304046Sae#endif
378304046Sae		return (cfg->nomatch_verdict);
379304046Sae	}
380304046Sae
381304046Sae	/* TODO: Check flags to see if we need to do some static mapping */
382304046Sae	nh = pg->host;
383304046Sae
384304046Sae	/* Prepare some fields we might need to update */
385304046Sae	SET_AGE(state_ts);
386304046Sae	ip = mtod(*pm, struct ip *);
387304046Sae	if (ip->ip_p == IPPROTO_TCP)
388304046Sae		state_flags = convert_tcp_flags(
389304046Sae		    L3HDR(ip, struct tcphdr *)->th_flags);
390304046Sae	else
391304046Sae		state_flags = 0;
392304046Sae
393304046Sae	/* Lock host and get port mapping */
394304046Sae	NAT64_LOCK(nh);
395304046Sae
396304046Sae	st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
397304046Sae	if (st->timestamp != state_ts)
398304046Sae		st->timestamp = state_ts;
399304046Sae	if ((st->flags & state_flags) != state_flags)
400304046Sae		st->flags |= state_flags;
401304046Sae	lport = htons(st->u.s.lport);
402304046Sae
403304046Sae	NAT64_UNLOCK(nh);
404304046Sae
405334836Sae	if (cfg->base.flags & NAT64_LOG) {
406304046Sae		logdata = &loghdr;
407304046Sae		nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
408304046Sae	} else
409304046Sae		logdata = NULL;
410304046Sae
411349411Sae	src6 = cfg->base.plat_prefix;
412346210Sae	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
413304046Sae	ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
414334836Sae	    &cfg->base, logdata);
415304046Sae
416304046Sae	if (ret == NAT64SKIP)
417332767Sae		return (cfg->nomatch_verdict);
418304046Sae	if (ret == NAT64MFREE)
419304046Sae		m_freem(*pm);
420304046Sae	*pm = NULL;
421304046Sae
422304046Sae	return (IP_FW_DENY);
423304046Sae}
424304046Sae
425304046Saevoid
426304046Saenat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
427304046Sae   const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
428304046Sae   const char *px, int off)
429304046Sae{
430304046Sae	char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
431304046Sae
432334836Sae	if ((V_nat64_debug & DP_STATE) == 0)
433304046Sae		return;
434304046Sae	inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
435304046Sae	inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
436304046Sae	inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
437304046Sae
438304046Sae	DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
439304046Sae	    "%s:%d AGE %d", px, pg->idx, st, off,
440304046Sae	    s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
441304046Sae	    d, st->u.s.fport, GET_AGE(st->timestamp));
442304046Sae}
443304046Sae
444304046Sae/*
445304046Sae * Check if particular TCP state is stale and should be deleted.
446304046Sae * Return 1 if true, 0 otherwise.
447304046Sae */
448304046Saestatic int
449304046Saenat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
450304046Sae    const struct nat64lsn_state *st, int age)
451304046Sae{
452304046Sae	int ttl;
453304046Sae
454304046Sae	if (st->flags & NAT64_FLAG_FIN)
455304046Sae		ttl = cfg->st_close_ttl;
456304046Sae	else if (st->flags & NAT64_FLAG_ESTAB)
457304046Sae		ttl = cfg->st_estab_ttl;
458304046Sae	else if (st->flags & NAT64_FLAG_SYN)
459304046Sae		ttl = cfg->st_syn_ttl;
460304046Sae	else
461304046Sae		ttl = cfg->st_syn_ttl;
462304046Sae
463304046Sae	if (age > ttl)
464304046Sae		return (1);
465304046Sae	return (0);
466304046Sae}
467304046Sae
468304046Sae/*
469304046Sae * Check if nat state @st is stale and should be deleted.
470304046Sae * Return 1 if true, 0 otherwise.
471304046Sae */
472304046Saestatic NAT64NOINLINE int
473304046Saenat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
474304046Sae    const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
475304046Sae{
476304046Sae	int age, delete;
477304046Sae
478304046Sae	age = GET_AGE(st->timestamp);
479304046Sae	delete = 0;
480304046Sae
481304046Sae	/* Skip immutable records */
482304046Sae	if (st->flags & NAT64_FLAG_RDR)
483304046Sae		return (0);
484304046Sae
485304046Sae	switch (pg->nat_proto) {
486304046Sae		case NAT_PROTO_TCP:
487304046Sae			delete = nat64lsn_periodic_check_tcp(cfg, st, age);
488304046Sae			break;
489304046Sae		case NAT_PROTO_UDP:
490304046Sae			if (age > cfg->st_udp_ttl)
491304046Sae				delete = 1;
492304046Sae			break;
493304046Sae		case NAT_PROTO_ICMP:
494304046Sae			if (age > cfg->st_icmp_ttl)
495304046Sae				delete = 1;
496304046Sae			break;
497304046Sae	}
498304046Sae
499304046Sae	return (delete);
500304046Sae}
501304046Sae
502304046Sae
503304046Sae/*
504304046Sae * The following structures and functions
505304046Sae * are used to perform SLIST_FOREACH_SAFE()
506304046Sae * analog for states identified by struct st_ptr.
507304046Sae */
508304046Sae
509304046Saestruct st_idx {
510304046Sae	struct nat64lsn_portgroup *pg;
511304046Sae	struct nat64lsn_state *st;
512304046Sae	struct st_ptr sidx_next;
513304046Sae};
514304046Sae
515304046Saestatic struct st_idx *
516304046Saest_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
517304046Sae    struct st_ptr *sidx, struct st_idx *si)
518304046Sae{
519304046Sae	struct nat64lsn_portgroup *pg;
520304046Sae	struct nat64lsn_state *st;
521304046Sae
522304046Sae	if (sidx->idx == 0) {
523304046Sae		memset(si, 0, sizeof(*si));
524304046Sae		return (si);
525304046Sae	}
526304046Sae
527304046Sae	pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
528304046Sae	st = &pg->states[sidx->off];
529304046Sae
530304046Sae	si->pg = pg;
531304046Sae	si->st = st;
532304046Sae	si->sidx_next = st->next;
533304046Sae
534304046Sae	return (si);
535304046Sae}
536304046Sae
537304046Saestatic struct st_idx *
538304046Saest_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
539304046Sae    struct st_idx *si)
540304046Sae{
541304046Sae	struct st_ptr sidx;
542304046Sae	struct nat64lsn_portgroup *pg;
543304046Sae	struct nat64lsn_state *st;
544304046Sae
545304046Sae	sidx = si->sidx_next;
546304046Sae	if (sidx.idx == 0) {
547304046Sae		memset(si, 0, sizeof(*si));
548304046Sae		si->st = NULL;
549304046Sae		si->pg = NULL;
550304046Sae		return (si);
551304046Sae	}
552304046Sae
553304046Sae	pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
554304046Sae	st = &pg->states[sidx.off];
555304046Sae
556304046Sae	si->pg = pg;
557304046Sae	si->st = st;
558304046Sae	si->sidx_next = st->next;
559304046Sae
560304046Sae	return (si);
561304046Sae}
562304046Sae
563304046Saestatic struct st_idx *
564304046Saest_save_cond(struct st_idx *si_dst, struct st_idx *si)
565304046Sae{
566304046Sae	if (si->st != NULL)
567304046Sae		*si_dst = *si;
568304046Sae
569304046Sae	return (si_dst);
570304046Sae}
571304046Sae
572304046Saeunsigned int
573304046Saenat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
574304046Sae{
575304046Sae	struct st_idx si, si_prev;
576304046Sae	int i;
577304046Sae	unsigned int delcount;
578304046Sae
579304046Sae	delcount = 0;
580304046Sae	for (i = 0; i < nh->hsize; i++) {
581304046Sae		memset(&si_prev, 0, sizeof(si_prev));
582304046Sae		for (st_first(cfg, nh, &nh->phash[i], &si);
583304046Sae		    si.st != NULL;
584304046Sae		    st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
585304046Sae			if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
586304046Sae				continue;
587304046Sae			nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
588304046Sae			    si.st->cur.off);
589304046Sae			/* Unlink from hash */
590304046Sae			if (si_prev.st != NULL)
591304046Sae				si_prev.st->next = si.st->next;
592304046Sae			else
593304046Sae				nh->phash[i] = si.st->next;
594304046Sae			/* Delete state and free its data */
595304046Sae			PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
596304046Sae			memset(si.st, 0, sizeof(struct nat64lsn_state));
597304046Sae			si.st = NULL;
598304046Sae			delcount++;
599304046Sae
600304046Sae			/* Update portgroup timestamp */
601304046Sae			SET_AGE(si.pg->timestamp);
602304046Sae		}
603304046Sae	}
604334836Sae	NAT64STAT_ADD(&cfg->base.stats, sdeleted, delcount);
605304046Sae	return (delcount);
606304046Sae}
607304046Sae
608304046Sae/*
609304046Sae * Checks if portgroup is not used and can be deleted,
610304046Sae * Returns 1 if stale, 0 otherwise
611304046Sae */
612304046Saestatic int
613304046Saestale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
614304046Sae{
615304046Sae
616304046Sae	if (!PG_IS_EMPTY(pg))
617304046Sae		return (0);
618304046Sae	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
619304046Sae		return (0);
620304046Sae	return (1);
621304046Sae}
622304046Sae
623304046Sae/*
624304046Sae * Checks if host record is not used and can be deleted,
625304046Sae * Returns 1 if stale, 0 otherwise
626304046Sae */
627304046Saestatic int
628304046Saestale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
629304046Sae{
630304046Sae
631304046Sae	if (nh->pg_used != 0)
632304046Sae		return (0);
633304046Sae	if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
634304046Sae		return (0);
635304046Sae	return (1);
636304046Sae}
637304046Sae
638304046Saestruct nat64lsn_periodic_data {
639304046Sae	struct nat64lsn_cfg *cfg;
640304046Sae	struct nat64lsn_job_head jhead;
641304046Sae	int jlen;
642304046Sae};
643304046Sae
644304046Saestatic NAT64NOINLINE int
645304046Saenat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
646304046Sae    struct nat64lsn_periodic_data *d)
647304046Sae{
648304046Sae	struct nat64lsn_portgroup *pg;
649304046Sae	struct nat64lsn_job_item *ji;
650304046Sae	uint64_t delmask[NAT64LSN_PGPTRNMASK];
651304046Sae	int delcount, i;
652304046Sae
653304046Sae	delcount = 0;
654304046Sae	memset(delmask, 0, sizeof(delmask));
655304046Sae
656340542Sae	if (V_nat64_debug & DP_JQUEUE) {
657340542Sae		char a[INET6_ADDRSTRLEN];
658340542Sae
659340542Sae		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
660340542Sae		DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
661340542Sae		    stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
662340542Sae	}
663304046Sae	if (!stale_nh(d->cfg, nh)) {
664304046Sae		/* Non-stale host. Inspect internals */
665304046Sae		NAT64_LOCK(nh);
666304046Sae
667304046Sae		/* Stage 1: Check&expire states */
668304046Sae		if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
669304046Sae			SET_AGE(nh->timestamp);
670304046Sae
671304046Sae		/* Stage 2: Check if we need to expire */
672304046Sae		for (i = 0; i < nh->pg_used; i++) {
673304046Sae			pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
674304046Sae			if (pg == NULL)
675304046Sae				continue;
676304046Sae
677304046Sae			/* Check if we can delete portgroup */
678304046Sae			if (stale_pg(d->cfg, pg) == 0)
679304046Sae				continue;
680304046Sae
681304046Sae			DPRINTF(DP_JQUEUE, "Check PG %d", i);
682304046Sae			delmask[i / 64] |= ((uint64_t)1 << (i % 64));
683304046Sae			delcount++;
684304046Sae		}
685304046Sae
686304046Sae		NAT64_UNLOCK(nh);
687304046Sae		if (delcount == 0)
688304046Sae			return (0);
689304046Sae	}
690304046Sae
691304046Sae	DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
692304046Sae	/* We have something to delete - add it to queue */
693304046Sae	ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
694304046Sae	if (ji == NULL)
695304046Sae		return (0);
696304046Sae
697304046Sae	ji->haddr = nh->addr;
698304046Sae	ji->delcount = delcount;
699304046Sae	memcpy(ji->delmask, delmask, sizeof(ji->delmask));
700304046Sae
701304046Sae	TAILQ_INSERT_TAIL(&d->jhead, ji, next);
702304046Sae	d->jlen++;
703304046Sae	return (0);
704304046Sae}
705304046Sae
706304046Sae/*
707304046Sae * This procedure is used to perform various maintance
708304046Sae * on dynamic hash list. Currently it is called every second.
709304046Sae */
710304046Saestatic void
711304046Saenat64lsn_periodic(void *data)
712304046Sae{
713304046Sae	struct ip_fw_chain *ch;
714304046Sae	IPFW_RLOCK_TRACKER;
715304046Sae	struct nat64lsn_cfg *cfg;
716304046Sae	struct nat64lsn_periodic_data d;
717304046Sae	struct nat64lsn_host *nh, *tmp;
718304046Sae
719304046Sae	cfg = (struct nat64lsn_cfg *) data;
720304046Sae	ch = cfg->ch;
721304046Sae	CURVNET_SET(cfg->vp);
722304046Sae
723304046Sae	memset(&d, 0, sizeof(d));
724304046Sae	d.cfg = cfg;
725304046Sae	TAILQ_INIT(&d.jhead);
726304046Sae
727304046Sae	IPFW_RLOCK(ch);
728304046Sae
729304046Sae	/* Stage 1: foreach host, check all its portgroups */
730304046Sae	I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
731304046Sae
732304046Sae	/* Enqueue everything we have requested */
733304046Sae	nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
734304046Sae
735304046Sae	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
736304046Sae
737304046Sae	IPFW_RUNLOCK(ch);
738304046Sae
739304046Sae	CURVNET_RESTORE();
740304046Sae}
741304046Sae
742304046Saestatic NAT64NOINLINE void
743304046Saereinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
744304046Sae{
745304046Sae
746304046Sae	if (ji->m == NULL)
747304046Sae		return;
748304046Sae
749304046Sae	/* Request has failed or packet type is wrong */
750304046Sae	if (ji->f_id.addr_type != 6 || ji->done == 0) {
751304046Sae		m_freem(ji->m);
752304046Sae		ji->m = NULL;
753334836Sae		NAT64STAT_INC(&cfg->base.stats, dropped);
754304046Sae		DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
755304046Sae		    ji->jtype, ji->done);
756304046Sae		return;
757304046Sae	}
758304046Sae
759304046Sae	/*
760304046Sae	 * XXX: Limit recursion level
761304046Sae	 */
762304046Sae
763334836Sae	NAT64STAT_INC(&cfg->base.stats, jreinjected);
764304046Sae	DPRINTF(DP_JQUEUE, "Reinject mbuf");
765304046Sae	nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
766304046Sae}
767304046Sae
768304046Saestatic void
769304046Saedestroy_portgroup(struct nat64lsn_portgroup *pg)
770304046Sae{
771304046Sae
772304046Sae	DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
773304046Sae	uma_zfree(nat64lsn_pg_zone, pg);
774304046Sae}
775304046Sae
776304046Saestatic NAT64NOINLINE int
777304046Saealloc_portgroup(struct nat64lsn_job_item *ji)
778304046Sae{
779304046Sae	struct nat64lsn_portgroup *pg;
780304046Sae
781304046Sae	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
782304046Sae	if (pg == NULL)
783304046Sae		return (1);
784304046Sae
785304046Sae	if (ji->needs_idx != 0) {
786304046Sae		ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
787304046Sae		/* Failed alloc isn't always fatal, so don't check */
788304046Sae	}
789304046Sae	memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
790304046Sae	pg->nat_proto = ji->nat_proto;
791304046Sae	ji->pg = pg;
792304046Sae	return (0);
793304046Sae
794304046Sae}
795304046Sae
796304046Saestatic void
797304046Saedestroy_host6(struct nat64lsn_host *nh)
798304046Sae{
799304046Sae	char a[INET6_ADDRSTRLEN];
800304046Sae	int i;
801304046Sae
802304046Sae	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
803304046Sae	DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
804304046Sae	    nh->pg_used);
805304046Sae	NAT64_LOCK_DESTROY(nh);
806304046Sae	for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
807304046Sae		uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
808304046Sae	uma_zfree(nat64lsn_host_zone, nh);
809304046Sae}
810304046Sae
811304046Saestatic NAT64NOINLINE int
812304046Saealloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
813304046Sae{
814304046Sae	struct nat64lsn_host *nh;
815304046Sae	char a[INET6_ADDRSTRLEN];
816304046Sae
817304046Sae	nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
818304046Sae	if (nh == NULL)
819304046Sae		return (1);
820304046Sae	PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
821304046Sae	if (PORTGROUP_CHUNK(nh, 0) == NULL) {
822304046Sae		uma_zfree(nat64lsn_host_zone, nh);
823304046Sae		return (2);
824304046Sae	}
825304046Sae	if (alloc_portgroup(ji) != 0) {
826334836Sae		NAT64STAT_INC(&cfg->base.stats, jportfails);
827304046Sae		uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
828304046Sae		uma_zfree(nat64lsn_host_zone, nh);
829304046Sae		return (3);
830304046Sae	}
831304046Sae
832304046Sae	NAT64_LOCK_INIT(nh);
833304046Sae	nh->addr = ji->haddr;
834304046Sae	nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
835304046Sae	nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
836304046Sae	nh->pg_used = 0;
837304046Sae	ji->nh = nh;
838304046Sae
839304046Sae	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
840304046Sae	DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
841304046Sae	return (0);
842304046Sae}
843304046Sae
844304046Sae/*
845304046Sae * Finds free @pg index inside @nh
846304046Sae */
847304046Saestatic NAT64NOINLINE int
848304046Saefind_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
849304046Sae{
850304046Sae	int i;
851304046Sae
852304046Sae	for (i = 0; i < nh->pg_allocated; i++) {
853304046Sae		if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
854304046Sae			*idx = i;
855304046Sae			return (0);
856304046Sae		}
857304046Sae	}
858304046Sae	return (1);
859304046Sae}
860304046Sae
861304046Saestatic NAT64NOINLINE int
862304046Saeattach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
863304046Sae{
864304046Sae	char a[INET6_ADDRSTRLEN];
865304046Sae	struct nat64lsn_host *nh;
866304046Sae
867304046Sae	I6HASH_FIND(cfg, nh, &ji->haddr);
868304046Sae	if (nh == NULL) {
869304046Sae		/* Add new host to list */
870304046Sae		nh = ji->nh;
871304046Sae		I6HASH_INSERT(cfg, nh);
872304046Sae		cfg->ihcount++;
873304046Sae		ji->nh = NULL;
874304046Sae
875304046Sae		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
876304046Sae		DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
877304046Sae		/*
878304046Sae		 * Try to add portgroup.
879304046Sae		 * Note it will automatically set
880304046Sae		 * 'done' on ji if successful.
881304046Sae		 */
882304046Sae		if (attach_portgroup(cfg, ji) != 0) {
883304046Sae			DPRINTF(DP_DROPS, "%s %p failed to attach PG",
884304046Sae			    a, nh);
885334836Sae			NAT64STAT_INC(&cfg->base.stats, jportfails);
886304046Sae			return (1);
887304046Sae		}
888304046Sae		return (0);
889304046Sae	}
890304046Sae
891304046Sae	/*
892304046Sae	 * nh isn't NULL. This probably means we had several simultaneous
893304046Sae	 * host requests. The previous one request has already attached
894304046Sae	 * this host. Requeue attached mbuf and mark job as done, but
895304046Sae	 * leave nh and pg pointers not changed, so nat64lsn_do_request()
896304046Sae	 * will release all allocated resources.
897304046Sae	 */
898304046Sae	inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
899304046Sae	DPRINTF(DP_OBJ, "%s %p is already attached as %p",
900304046Sae	    a, ji->nh, nh);
901304046Sae	ji->done = 1;
902304046Sae	return (0);
903304046Sae}
904304046Sae
905304046Saestatic NAT64NOINLINE int
906304046Saefind_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
907304046Sae    int nat_proto, uint16_t *aport, int *ppg_idx)
908304046Sae{
909304046Sae	int j, pg_idx;
910304046Sae
911304046Sae	pg_idx = addr_off * _ADDR_PG_COUNT +
912304046Sae	    (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
913304046Sae
914304046Sae	for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
915304046Sae		if (cfg->pg[pg_idx + j] != NULL)
916304046Sae			continue;
917304046Sae
918304046Sae		*aport = j * NAT64_CHUNK_SIZE;
919304046Sae		*ppg_idx = pg_idx + j;
920304046Sae		return (1);
921304046Sae	}
922304046Sae
923304046Sae	return (0);
924304046Sae}
925304046Sae
926304046Sae/*
927304046Sae * XXX: This function needs to be rewritten to
928304046Sae * use free bitmask for faster pg finding,
929304046Sae * additionally, it should take into consideration
930304046Sae * a) randomization and
931304046Sae * b) previous addresses allocated to given nat instance
932304046Sae *
933304046Sae */
934304046Saestatic NAT64NOINLINE int
935304046Saefind_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
936304046Sae    uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
937304046Sae{
938304046Sae	int i, nat_proto;
939304046Sae
940304046Sae	/*
941304046Sae	 * XXX: Use bitmask index to be able to find/check if IP address
942304046Sae	 * has some spare pg's
943304046Sae	 */
944304046Sae	nat_proto = ji->nat_proto;
945304046Sae
946304046Sae	/* First, try to use same address */
947304046Sae	if (ji->aaddr != 0) {
948304046Sae		i = ntohl(ji->aaddr) - cfg->prefix4;
949304046Sae		if (find_pg_place_addr(cfg, i, nat_proto, aport,
950304046Sae		    ppg_idx) != 0){
951304046Sae			/* Found! */
952304046Sae			*aaddr = htonl(cfg->prefix4 + i);
953304046Sae			return (0);
954304046Sae		}
955304046Sae	}
956304046Sae
957304046Sae	/* Next, try to use random address based on flow hash */
958304046Sae	i = ji->fhash % (1 << (32 - cfg->plen4));
959304046Sae	if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
960304046Sae		/* Found! */
961304046Sae		*aaddr = htonl(cfg->prefix4 + i);
962304046Sae		return (0);
963304046Sae	}
964304046Sae
965304046Sae
966304046Sae	/* Last one: simply find ANY available */
967304046Sae	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
968304046Sae		if (find_pg_place_addr(cfg, i, nat_proto, aport,
969304046Sae		    ppg_idx) != 0){
970304046Sae			/* Found! */
971304046Sae			*aaddr = htonl(cfg->prefix4 + i);
972304046Sae			return (0);
973304046Sae		}
974304046Sae	}
975304046Sae
976304046Sae	return (1);
977304046Sae}
978304046Sae
979304046Saestatic NAT64NOINLINE int
980304046Saeattach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
981304046Sae{
982304046Sae	char a[INET6_ADDRSTRLEN];
983304046Sae	struct nat64lsn_portgroup *pg;
984304046Sae	struct nat64lsn_host *nh;
985304046Sae	uint32_t aaddr;
986304046Sae	uint16_t aport;
987304046Sae	int nh_pg_idx, pg_idx;
988304046Sae
989304046Sae	pg = ji->pg;
990304046Sae
991304046Sae	/*
992304046Sae	 * Find source host and bind: we can't rely on
993304046Sae	 * pg->host
994304046Sae	 */
995304046Sae	I6HASH_FIND(cfg, nh, &ji->haddr);
996304046Sae	if (nh == NULL)
997304046Sae		return (1);
998304046Sae
999304046Sae	/* Find spare port chunk */
1000304046Sae	if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
1001304046Sae		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
1002304046Sae		DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
1003304046Sae		return (2);
1004304046Sae	}
1005304046Sae
1006304046Sae	/* Expand PG indexes if needed */
1007304046Sae	if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
1008304046Sae		PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
1009304046Sae		    ji->spare_idx;
1010304046Sae		nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
1011304046Sae		ji->spare_idx = NULL;
1012304046Sae	}
1013304046Sae
1014304046Sae	/* Find empty index to store PG in the @nh */
1015304046Sae	if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
1016304046Sae		inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
1017304046Sae		DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
1018304046Sae		    a);
1019304046Sae		return (3);
1020304046Sae	}
1021304046Sae
1022304046Sae	cfg->pg[pg_idx] = pg;
1023304046Sae	cfg->protochunks[pg->nat_proto]++;
1024334836Sae	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1025304046Sae
1026304046Sae	pg->aaddr = aaddr;
1027304046Sae	pg->aport = aport;
1028304046Sae	pg->host = nh;
1029304046Sae	pg->idx = pg_idx;
1030304046Sae	SET_AGE(pg->timestamp);
1031304046Sae
1032304046Sae	PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
1033304046Sae	if (nh->pg_used == nh_pg_idx)
1034304046Sae		nh->pg_used++;
1035304046Sae	SET_AGE(nh->timestamp);
1036304046Sae
1037304046Sae	ji->pg = NULL;
1038304046Sae	ji->done = 1;
1039304046Sae
1040304046Sae	return (0);
1041304046Sae}
1042304046Sae
1043304046Saestatic NAT64NOINLINE void
1044304046Saeconsider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1045304046Sae{
1046304046Sae	struct nat64lsn_host *nh, *nh_tmp;
1047304046Sae	struct nat64lsn_portgroup *pg, *pg_list[256];
1048304046Sae	int i, pg_lidx, idx;
1049304046Sae
1050304046Sae	/* Find source host */
1051304046Sae	I6HASH_FIND(cfg, nh, &ji->haddr);
1052304046Sae	if (nh == NULL || nh->pg_used == 0)
1053304046Sae		return;
1054304046Sae
1055304046Sae	memset(pg_list, 0, sizeof(pg_list));
1056304046Sae	pg_lidx = 0;
1057304046Sae
1058304046Sae	NAT64_LOCK(nh);
1059304046Sae
1060304046Sae	for (i = nh->pg_used - 1; i >= 0; i--) {
1061304046Sae		if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
1062304046Sae			continue;
1063304046Sae		pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
1064304046Sae
1065304046Sae		/* Check that PG isn't busy. */
1066304046Sae		if (stale_pg(cfg, pg) == 0)
1067304046Sae			continue;
1068304046Sae
1069304046Sae		/* DO delete */
1070304046Sae		pg_list[pg_lidx++] = pg;
1071304046Sae		PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
1072304046Sae
1073304046Sae		idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
1074304046Sae		    pg->aport);
1075304046Sae		KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
1076304046Sae		cfg->pg[idx] = NULL;
1077304046Sae		cfg->protochunks[pg->nat_proto]--;
1078334836Sae		NAT64STAT_INC(&cfg->base.stats, spgdeleted);
1079304046Sae
1080304046Sae		/* Decrease pg_used */
1081304046Sae		while (nh->pg_used > 0 &&
1082304046Sae		    PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
1083304046Sae			nh->pg_used--;
1084304046Sae
1085304046Sae		/* Check if on-stack buffer has ended */
1086304046Sae		if (pg_lidx == nitems(pg_list))
1087304046Sae			break;
1088304046Sae	}
1089304046Sae
1090304046Sae	NAT64_UNLOCK(nh);
1091304046Sae
1092304046Sae	if (stale_nh(cfg, nh)) {
1093304046Sae		I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
1094304046Sae		KASSERT(nh != NULL, ("Unable to find address"));
1095304046Sae		cfg->ihcount--;
1096304046Sae		ji->nh = nh;
1097304046Sae		I6HASH_FIND(cfg, nh, &ji->haddr);
1098304046Sae		KASSERT(nh == NULL, ("Failed to delete address"));
1099304046Sae	}
1100304046Sae
1101304046Sae	/* TODO: Delay freeing portgroups */
1102304046Sae	while (pg_lidx > 0) {
1103304046Sae		pg_lidx--;
1104334836Sae		NAT64STAT_INC(&cfg->base.stats, spgdeleted);
1105304046Sae		destroy_portgroup(pg_list[pg_lidx]);
1106304046Sae	}
1107304046Sae}
1108304046Sae
1109304046Sae/*
1110304046Sae * Main request handler.
1111304046Sae * Responsible for handling jqueue, e.g.
1112304046Sae * creating new hosts, addind/deleting portgroups.
1113304046Sae */
1114304046Saestatic NAT64NOINLINE void
1115304046Saenat64lsn_do_request(void *data)
1116304046Sae{
1117304046Sae	IPFW_RLOCK_TRACKER;
1118304046Sae	struct nat64lsn_job_head jhead;
1119304046Sae	struct nat64lsn_job_item *ji;
1120304046Sae	int jcount, nhsize;
1121304046Sae	struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
1122304046Sae	struct ip_fw_chain *ch;
1123304046Sae	int delcount;
1124304046Sae
1125304046Sae	CURVNET_SET(cfg->vp);
1126304046Sae
1127304046Sae	TAILQ_INIT(&jhead);
1128304046Sae
1129304046Sae	/* XXX: We're running unlocked here */
1130304046Sae
1131304046Sae	ch = cfg->ch;
1132304046Sae	delcount = 0;
1133304046Sae	IPFW_RLOCK(ch);
1134304046Sae
1135304046Sae	/* Grab queue */
1136304046Sae	JQUEUE_LOCK();
1137304046Sae	TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
1138304046Sae	jcount = cfg->jlen;
1139304046Sae	cfg->jlen = 0;
1140304046Sae	JQUEUE_UNLOCK();
1141304046Sae
1142304046Sae	/* check if we need to resize hash */
1143304046Sae	nhsize = 0;
1144304046Sae	if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
1145304046Sae		nhsize = cfg->ihsize;
1146304046Sae		for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
1147304046Sae			;
1148304046Sae	} else if (cfg->ihcount < cfg->ihsize * 4) {
1149304046Sae		nhsize = cfg->ihsize;
1150304046Sae		for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
1151304046Sae			;
1152304046Sae	}
1153304046Sae
1154304046Sae	IPFW_RUNLOCK(ch);
1155304046Sae
1156304046Sae	if (TAILQ_EMPTY(&jhead)) {
1157304046Sae		CURVNET_RESTORE();
1158304046Sae		return;
1159304046Sae	}
1160304046Sae
1161334836Sae	NAT64STAT_INC(&cfg->base.stats, jcalls);
1162304046Sae	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1163304046Sae
1164304046Sae	/*
1165304046Sae	 * TODO:
1166304046Sae	 * What we should do here is to build a hash
1167304046Sae	 * to ensure we don't have lots of duplicate requests.
1168304046Sae	 * Skip this for now.
1169304046Sae	 *
1170304046Sae	 * TODO: Limit per-call number of items
1171304046Sae	 */
1172304046Sae
1173304046Sae	/* Pre-allocate everything for entire chain */
1174304046Sae	TAILQ_FOREACH(ji, &jhead,  next) {
1175304046Sae		switch (ji->jtype) {
1176304046Sae			case JTYPE_NEWHOST:
1177304046Sae				if (alloc_host6(cfg, ji) != 0)
1178334836Sae					NAT64STAT_INC(&cfg->base.stats,
1179334836Sae					    jhostfails);
1180304046Sae				break;
1181304046Sae			case JTYPE_NEWPORTGROUP:
1182304046Sae				if (alloc_portgroup(ji) != 0)
1183334836Sae					NAT64STAT_INC(&cfg->base.stats,
1184334836Sae					    jportfails);
1185304046Sae				break;
1186304046Sae			case JTYPE_DELPORTGROUP:
1187304046Sae				delcount += ji->delcount;
1188304046Sae				break;
1189304046Sae			default:
1190304046Sae				break;
1191304046Sae		}
1192304046Sae	}
1193304046Sae
1194304046Sae	/*
1195304046Sae	 * TODO: Alloc hew hash
1196304046Sae	 */
1197304046Sae	nhsize = 0;
1198304046Sae	if (nhsize > 0) {
1199304046Sae		/* XXX: */
1200304046Sae	}
1201304046Sae
1202304046Sae	/* Apply all changes in batch */
1203304046Sae	IPFW_UH_WLOCK(ch);
1204304046Sae	IPFW_WLOCK(ch);
1205304046Sae
1206304046Sae	TAILQ_FOREACH(ji, &jhead,  next) {
1207304046Sae		switch (ji->jtype) {
1208304046Sae			case JTYPE_NEWHOST:
1209304046Sae				if (ji->nh != NULL)
1210304046Sae					attach_host6(cfg, ji);
1211304046Sae				break;
1212304046Sae			case JTYPE_NEWPORTGROUP:
1213304046Sae				if (ji->pg != NULL &&
1214304046Sae				    attach_portgroup(cfg, ji) != 0)
1215334836Sae					NAT64STAT_INC(&cfg->base.stats,
1216334836Sae					    jportfails);
1217304046Sae				break;
1218304046Sae			case JTYPE_DELPORTGROUP:
1219304046Sae				consider_del_portgroup(cfg, ji);
1220304046Sae				break;
1221304046Sae		}
1222304046Sae	}
1223304046Sae
1224304046Sae	if (nhsize > 0) {
1225304046Sae		/* XXX: Move everything to new hash */
1226304046Sae	}
1227304046Sae
1228304046Sae	IPFW_WUNLOCK(ch);
1229304046Sae	IPFW_UH_WUNLOCK(ch);
1230304046Sae
1231304046Sae	/* Flush unused entries */
1232304046Sae	while (!TAILQ_EMPTY(&jhead)) {
1233304046Sae		ji = TAILQ_FIRST(&jhead);
1234304046Sae		TAILQ_REMOVE(&jhead, ji, next);
1235304046Sae		if (ji->nh != NULL)
1236304046Sae			destroy_host6(ji->nh);
1237304046Sae		if (ji->pg != NULL)
1238304046Sae			destroy_portgroup(ji->pg);
1239304046Sae		if (ji->m != NULL)
1240304046Sae			reinject_mbuf(cfg, ji);
1241304046Sae		if (ji->spare_idx != NULL)
1242304046Sae			uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
1243304046Sae		free(ji, M_IPFW);
1244304046Sae	}
1245304046Sae	CURVNET_RESTORE();
1246304046Sae}
1247304046Sae
1248304046Saestatic NAT64NOINLINE struct nat64lsn_job_item *
1249304046Saenat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
1250304046Sae    int jtype)
1251304046Sae{
1252304046Sae	struct nat64lsn_job_item *ji;
1253304046Sae	struct in6_addr haddr;
1254304046Sae	uint8_t nat_proto;
1255304046Sae
1256304046Sae	/*
1257304046Sae	 * Do not try to lock possibly contested mutex if we're near the limit.
1258304046Sae	 * Drop packet instead.
1259304046Sae	 */
1260304046Sae	if (cfg->jlen >= cfg->jmaxlen) {
1261334836Sae		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1262304046Sae		return (NULL);
1263304046Sae	}
1264304046Sae
1265304046Sae	memset(&haddr, 0, sizeof(haddr));
1266304046Sae	nat_proto = 0;
1267304046Sae	if (f_id != NULL) {
1268304046Sae		haddr = f_id->src_ip6;
1269304046Sae		nat_proto = nat64lsn_proto_map[f_id->proto];
1270304046Sae
1271304046Sae		DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
1272304046Sae		    nat_proto, f_id->proto);
1273304046Sae
1274304046Sae		if (nat_proto == 0)
1275304046Sae			return (NULL);
1276304046Sae	}
1277304046Sae
1278304046Sae	ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
1279304046Sae	    M_NOWAIT | M_ZERO);
1280304046Sae
1281304046Sae	if (ji == NULL) {
1282334836Sae		NAT64STAT_INC(&cfg->base.stats, jnomem);
1283304046Sae		return (NULL);
1284304046Sae	}
1285304046Sae
1286304046Sae	ji->jtype = jtype;
1287304046Sae
1288304046Sae	if (f_id != NULL) {
1289304046Sae		ji->f_id = *f_id;
1290304046Sae		ji->haddr = haddr;
1291304046Sae		ji->nat_proto = nat_proto;
1292304046Sae	}
1293304046Sae
1294304046Sae	return (ji);
1295304046Sae}
1296304046Sae
1297304046Saestatic NAT64NOINLINE void
1298304046Saenat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1299304046Sae{
1300304046Sae
1301304046Sae	if (ji == NULL)
1302304046Sae		return;
1303304046Sae
1304304046Sae	JQUEUE_LOCK();
1305304046Sae	TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
1306304046Sae	cfg->jlen++;
1307334836Sae	NAT64STAT_INC(&cfg->base.stats, jrequests);
1308304046Sae
1309304046Sae	if (callout_pending(&cfg->jcallout) == 0)
1310304046Sae		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1311304046Sae	JQUEUE_UNLOCK();
1312304046Sae}
1313304046Sae
1314304046Saestatic NAT64NOINLINE void
1315304046Saenat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
1316304046Sae    struct nat64lsn_job_head *jhead, int jlen)
1317304046Sae{
1318304046Sae
1319304046Sae	if (TAILQ_EMPTY(jhead))
1320304046Sae		return;
1321304046Sae
1322304046Sae	/* Attach current queue to execution one */
1323304046Sae	JQUEUE_LOCK();
1324304046Sae	TAILQ_CONCAT(&cfg->jhead, jhead, next);
1325304046Sae	cfg->jlen += jlen;
1326334836Sae	NAT64STAT_ADD(&cfg->base.stats, jrequests, jlen);
1327304046Sae
1328304046Sae	if (callout_pending(&cfg->jcallout) == 0)
1329304046Sae		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1330304046Sae	JQUEUE_UNLOCK();
1331304046Sae}
1332304046Sae
1333304046Saestatic unsigned int
1334304046Saeflow6_hash(const struct ipfw_flow_id *f_id)
1335304046Sae{
1336304046Sae	unsigned char hbuf[36];
1337304046Sae
1338304046Sae	memcpy(hbuf, &f_id->dst_ip6, 16);
1339304046Sae	memcpy(&hbuf[16], &f_id->src_ip6, 16);
1340304046Sae	memcpy(&hbuf[32], &f_id->dst_port, 2);
1341304046Sae	memcpy(&hbuf[32], &f_id->src_port, 2);
1342304046Sae
1343304046Sae	return (djb_hash(hbuf, sizeof(hbuf)));
1344304046Sae}
1345304046Sae
1346304046Saestatic NAT64NOINLINE int
1347304046Saenat64lsn_request_host(struct nat64lsn_cfg *cfg,
1348304046Sae    const struct ipfw_flow_id *f_id, struct mbuf **pm)
1349304046Sae{
1350304046Sae	struct nat64lsn_job_item *ji;
1351304046Sae	struct mbuf *m;
1352304046Sae
1353304046Sae	m = *pm;
1354304046Sae	*pm = NULL;
1355304046Sae
1356304046Sae	ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
1357304046Sae	if (ji == NULL) {
1358304046Sae		m_freem(m);
1359334836Sae		NAT64STAT_INC(&cfg->base.stats, dropped);
1360304046Sae		DPRINTF(DP_DROPS, "failed to create job");
1361304046Sae	} else {
1362304046Sae		ji->m = m;
1363304046Sae		/* Provide pseudo-random value based on flow */
1364304046Sae		ji->fhash = flow6_hash(f_id);
1365304046Sae		nat64lsn_enqueue_job(cfg, ji);
1366334836Sae		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1367304046Sae	}
1368304046Sae
1369332767Sae	return (IP_FW_DENY);
1370304046Sae}
1371304046Sae
1372304046Saestatic NAT64NOINLINE int
1373304046Saenat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
1374304046Sae    const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
1375304046Sae    int needs_idx)
1376304046Sae{
1377304046Sae	struct nat64lsn_job_item *ji;
1378304046Sae	struct mbuf *m;
1379304046Sae
1380304046Sae	m = *pm;
1381304046Sae	*pm = NULL;
1382304046Sae
1383304046Sae	ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
1384304046Sae	if (ji == NULL) {
1385304046Sae		m_freem(m);
1386334836Sae		NAT64STAT_INC(&cfg->base.stats, dropped);
1387304046Sae		DPRINTF(DP_DROPS, "failed to create job");
1388304046Sae	} else {
1389304046Sae		ji->m = m;
1390304046Sae		/* Provide pseudo-random value based on flow */
1391304046Sae		ji->fhash = flow6_hash(f_id);
1392304046Sae		ji->aaddr = aaddr;
1393304046Sae		ji->needs_idx = needs_idx;
1394304046Sae		nat64lsn_enqueue_job(cfg, ji);
1395334836Sae		NAT64STAT_INC(&cfg->base.stats, jportreq);
1396304046Sae	}
1397304046Sae
1398332767Sae	return (IP_FW_DENY);
1399304046Sae}
1400304046Sae
1401304046Saestatic NAT64NOINLINE struct nat64lsn_state *
1402304046Saenat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
1403304046Sae    int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
1404304046Sae{
1405304046Sae	struct nat64lsn_portgroup *pg;
1406304046Sae	struct nat64lsn_state *st;
1407304046Sae	int i, hval, off;
1408304046Sae
1409304046Sae	/* XXX: create additional bitmask for selecting proper portgroup */
1410304046Sae	for (i = 0; i < nh->pg_used; i++) {
1411304046Sae		pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
1412304046Sae		if (pg == NULL)
1413304046Sae			continue;
1414304046Sae		if (*aaddr == 0)
1415304046Sae			*aaddr = pg->aaddr;
1416304046Sae		if (pg->nat_proto != nat_proto)
1417304046Sae			continue;
1418304046Sae
1419304046Sae		off = PG_GET_FREE_IDX(pg);
1420304046Sae		if (off != 0) {
1421304046Sae			/* We have found spare state. Use it */
1422304046Sae			off--;
1423304046Sae			PG_MARK_BUSY_IDX(pg, off);
1424304046Sae			st = &pg->states[off];
1425304046Sae
1426304046Sae			/*
1427304046Sae			 * Fill in new info. Assume state was zeroed.
1428304046Sae			 * Timestamp and flags will be filled by caller.
1429304046Sae			 */
1430304046Sae			st->u.s = kst->u.s;
1431304046Sae			st->cur.idx = i + 1;
1432304046Sae			st->cur.off = off;
1433304046Sae
1434304046Sae			/* Insert into host hash table */
1435304046Sae			hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
1436304046Sae			st->next = nh->phash[hval];
1437304046Sae			nh->phash[hval] = st->cur;
1438304046Sae
1439304046Sae			nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
1440304046Sae
1441334836Sae			NAT64STAT_INC(&cfg->base.stats, screated);
1442304046Sae
1443304046Sae			return (st);
1444304046Sae		}
1445304046Sae		/* Saev last used alias affress */
1446304046Sae		*aaddr = pg->aaddr;
1447304046Sae	}
1448304046Sae
1449304046Sae	return (NULL);
1450304046Sae}
1451304046Sae
1452304046Saestatic NAT64NOINLINE int
1453304046Saenat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1454304046Sae    struct mbuf **pm)
1455304046Sae{
1456304046Sae	struct pfloghdr loghdr, *logdata;
1457304046Sae	char a[INET6_ADDRSTRLEN];
1458304046Sae	struct nat64lsn_host *nh;
1459304046Sae	struct st_ptr sidx;
1460304046Sae	struct nat64lsn_state *st, kst;
1461304046Sae	struct nat64lsn_portgroup *pg;
1462304046Sae	struct icmp6_hdr *icmp6;
1463304046Sae	uint32_t aaddr;
1464304046Sae	int action, hval, nat_proto, proto;
1465304046Sae	uint16_t aport, state_ts, state_flags;
1466304046Sae
1467304046Sae	/* Check if af/protocol is supported and get it short id */
1468304046Sae	nat_proto = nat64lsn_proto_map[f_id->proto];
1469304046Sae	if (nat_proto == 0) {
1470304046Sae		/*
1471304046Sae		 * Since we can be called from jobs handler, we need
1472304046Sae		 * to free mbuf by self, do not leave this task to
1473304046Sae		 * ipfw_check_packet().
1474304046Sae		 */
1475334836Sae		NAT64STAT_INC(&cfg->base.stats, noproto);
1476334836Sae		goto drop;
1477304046Sae	}
1478304046Sae
1479304046Sae	/* Try to find host first */
1480304046Sae	I6HASH_FIND(cfg, nh, &f_id->src_ip6);
1481304046Sae
1482304046Sae	if (nh == NULL)
1483304046Sae		return (nat64lsn_request_host(cfg, f_id, pm));
1484304046Sae
1485304046Sae	/* Fill-in on-stack state structure */
1486346210Sae	kst.u.s.faddr = nat64_extract_ip4(&f_id->dst_ip6,
1487346210Sae	    cfg->base.plat_plen);
1488346210Sae	if (kst.u.s.faddr == 0 ||
1489346210Sae	    nat64_check_private_ip4(&cfg->base, kst.u.s.faddr) != 0) {
1490334836Sae		NAT64STAT_INC(&cfg->base.stats, dropped);
1491334836Sae		goto drop;
1492334836Sae	}
1493304046Sae	kst.u.s.fport = f_id->dst_port;
1494304046Sae	kst.u.s.lport = f_id->src_port;
1495304046Sae
1496304046Sae	/* Prepare some fields we might need to update */
1497304046Sae	hval = 0;
1498304046Sae	proto = nat64_getlasthdr(*pm, &hval);
1499304046Sae	if (proto < 0) {
1500334836Sae		NAT64STAT_INC(&cfg->base.stats, dropped);
1501304046Sae		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
1502334836Sae		goto drop;
1503304046Sae	}
1504304046Sae
1505304046Sae	SET_AGE(state_ts);
1506304046Sae	if (proto == IPPROTO_TCP)
1507304046Sae		state_flags = convert_tcp_flags(
1508304046Sae		    TCP(mtodo(*pm, hval))->th_flags);
1509304046Sae	else
1510304046Sae		state_flags = 0;
1511304046Sae	if (proto == IPPROTO_ICMPV6) {
1512304046Sae		/* Alter local port data */
1513304046Sae		icmp6 = mtodo(*pm, hval);
1514304046Sae		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1515304046Sae		    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1516304046Sae			kst.u.s.lport = ntohs(icmp6->icmp6_id);
1517304046Sae	}
1518304046Sae
1519304046Sae	hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
1520304046Sae	pg = NULL;
1521304046Sae	st = NULL;
1522304046Sae
1523304046Sae	/* OK, let's find state in host hash */
1524304046Sae	NAT64_LOCK(nh);
1525304046Sae	sidx = nh->phash[hval];
1526304046Sae	int k = 0;
1527304046Sae	while (sidx.idx != 0) {
1528304046Sae		pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
1529304046Sae		st = &pg->states[sidx.off];
1530304046Sae		//DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
1531304046Sae		//st->next.idx, st->next.off);
1532304046Sae		if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
1533304046Sae			break;
1534304046Sae		if (k++ > 1000) {
1535304046Sae			DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
1536304046Sae			    sidx.idx, sidx.off, st->next.idx, st->next.off);
1537304046Sae			DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
1538340542Sae			    inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)),
1539340542Sae			    nh, curcpu);
1540304046Sae			k = 0;
1541304046Sae		}
1542304046Sae		sidx = st->next;
1543304046Sae	}
1544304046Sae
1545304046Sae	if (sidx.idx == 0) {
1546304046Sae		aaddr = 0;
1547304046Sae		st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
1548304046Sae		if (st == NULL) {
1549304046Sae			/* No free states. Request more if we can */
1550304046Sae			if (nh->pg_used >= cfg->max_chunks) {
1551304046Sae				/* Limit reached */
1552304046Sae				DPRINTF(DP_DROPS, "PG limit reached "
1553304046Sae				    " for host %s (used %u, allocated %u, "
1554340542Sae				    "limit %u)", inet_ntop(AF_INET6,
1555340542Sae				    &nh->addr, a, sizeof(a)),
1556304046Sae				    nh->pg_used * NAT64_CHUNK_SIZE,
1557304046Sae				    nh->pg_allocated * NAT64_CHUNK_SIZE,
1558304046Sae				    cfg->max_chunks * NAT64_CHUNK_SIZE);
1559304046Sae				NAT64_UNLOCK(nh);
1560334836Sae				NAT64STAT_INC(&cfg->base.stats, dropped);
1561334836Sae				goto drop;
1562304046Sae			}
1563304046Sae			if ((nh->pg_allocated <=
1564304046Sae			    nh->pg_used + NAT64LSN_REMAININGPG) &&
1565304046Sae			    nh->pg_allocated < cfg->max_chunks)
1566304046Sae				action = 1; /* Request new indexes */
1567304046Sae			else
1568304046Sae				action = 0;
1569304046Sae			NAT64_UNLOCK(nh);
1570304046Sae			//DPRINTF("No state, unlock for %p", nh);
1571304046Sae			return (nat64lsn_request_portgroup(cfg, f_id,
1572304046Sae			    pm, aaddr, action));
1573304046Sae		}
1574304046Sae
1575304046Sae		/* We've got new state. */
1576304046Sae		sidx = st->cur;
1577304046Sae		pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
1578304046Sae	}
1579304046Sae
1580304046Sae	/* Okay, state found */
1581304046Sae
1582304046Sae	/* Update necessary fileds */
1583304046Sae	if (st->timestamp != state_ts)
1584304046Sae		st->timestamp = state_ts;
1585304046Sae	if ((st->flags & state_flags) != 0)
1586304046Sae		st->flags |= state_flags;
1587304046Sae
1588304046Sae	/* Copy needed state data */
1589304046Sae	aaddr = pg->aaddr;
1590304046Sae	aport = htons(pg->aport + sidx.off);
1591304046Sae
1592304046Sae	NAT64_UNLOCK(nh);
1593304046Sae
1594334836Sae	if (cfg->base.flags & NAT64_LOG) {
1595304046Sae		logdata = &loghdr;
1596304046Sae		nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
1597304046Sae	} else
1598304046Sae		logdata = NULL;
1599304046Sae
1600334836Sae	action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->base, logdata);
1601304046Sae	if (action == NAT64SKIP)
1602332767Sae		return (cfg->nomatch_verdict);
1603334836Sae	if (action == NAT64MFREE) {
1604334836Saedrop:
1605304046Sae		m_freem(*pm);
1606334836Sae	}
1607304046Sae	*pm = NULL;	/* mark mbuf as consumed */
1608304046Sae	return (IP_FW_DENY);
1609304046Sae}
1610304046Sae
1611304046Sae/*
1612304046Sae * Main dataplane entry point.
1613304046Sae */
1614304046Saeint
1615304046Saeipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1616304046Sae    ipfw_insn *cmd, int *done)
1617304046Sae{
1618304046Sae	ipfw_insn *icmd;
1619304046Sae	struct nat64lsn_cfg *cfg;
1620304046Sae	int ret;
1621304046Sae
1622304046Sae	IPFW_RLOCK_ASSERT(ch);
1623304046Sae
1624304046Sae	*done = 1; /* terminate the search */
1625304046Sae	icmd = cmd + 1;
1626304046Sae	if (cmd->opcode != O_EXTERNAL_ACTION ||
1627304046Sae	    cmd->arg1 != V_nat64lsn_eid ||
1628304046Sae	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1629304046Sae	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1630304046Sae		return (0);
1631304046Sae
1632304046Sae	switch (args->f_id.addr_type) {
1633304046Sae	case 4:
1634304046Sae		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1635304046Sae		break;
1636304046Sae	case 6:
1637304046Sae		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1638304046Sae		break;
1639304046Sae	default:
1640332767Sae		return (cfg->nomatch_verdict);
1641304046Sae	}
1642304046Sae	return (ret);
1643304046Sae}
1644304046Sae
1645304046Saestatic int
1646304046Saenat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
1647304046Sae{
1648304046Sae	struct nat64lsn_host *nh;
1649304046Sae
1650304046Sae	nh = (struct nat64lsn_host *)mem;
1651304046Sae	memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
1652304046Sae	memset(nh->phash, 0, sizeof(nh->phash));
1653304046Sae	return (0);
1654304046Sae}
1655304046Sae
1656304046Saestatic int
1657304046Saenat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
1658304046Sae{
1659304046Sae
1660304046Sae	memset(mem, 0, size);
1661304046Sae	return (0);
1662304046Sae}
1663304046Sae
1664304046Saevoid
1665304046Saenat64lsn_init_internal(void)
1666304046Sae{
1667304046Sae
1668304046Sae	memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
1669304046Sae	/* Set up supported protocol map */
1670304046Sae	nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
1671304046Sae	nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
1672304046Sae	nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
1673304046Sae	nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
1674304046Sae	/* Fill in reverse proto map */
1675304046Sae	memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
1676304046Sae	nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
1677304046Sae	nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
1678304046Sae	nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
1679304046Sae
1680304046Sae	JQUEUE_LOCK_INIT();
1681304046Sae	nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
1682304046Sae	    sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
1683304046Sae	    NULL, NULL, UMA_ALIGN_PTR, 0);
1684304046Sae	nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
1685304046Sae	    sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
1686304046Sae	    UMA_ALIGN_PTR, 0);
1687304046Sae	nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
1688304046Sae	    sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
1689304046Sae	    nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1690304046Sae}
1691304046Sae
1692304046Saevoid
1693304046Saenat64lsn_uninit_internal(void)
1694304046Sae{
1695304046Sae
1696304046Sae	JQUEUE_LOCK_DESTROY();
1697304046Sae	uma_zdestroy(nat64lsn_host_zone);
1698304046Sae	uma_zdestroy(nat64lsn_pg_zone);
1699304046Sae	uma_zdestroy(nat64lsn_pgidx_zone);
1700304046Sae}
1701304046Sae
1702304046Saevoid
1703304046Saenat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1704304046Sae{
1705304046Sae
1706304046Sae	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1707304046Sae	    nat64lsn_periodic, cfg);
1708304046Sae}
1709304046Sae
1710304046Saestruct nat64lsn_cfg *
1711304046Saenat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
1712304046Sae{
1713304046Sae	struct nat64lsn_cfg *cfg;
1714304046Sae
1715304046Sae	cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
1716304046Sae	TAILQ_INIT(&cfg->jhead);
1717304046Sae	cfg->vp = curvnet;
1718304046Sae	cfg->ch = ch;
1719334836Sae	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1720304046Sae
1721304046Sae	cfg->ihsize = NAT64LSN_HSIZE;
1722304046Sae	cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
1723304046Sae	    M_WAITOK | M_ZERO);
1724304046Sae
1725304046Sae	cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
1726304046Sae	    M_WAITOK | M_ZERO);
1727304046Sae
1728304046Sae        callout_init(&cfg->periodic, CALLOUT_MPSAFE);
1729304046Sae        callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1730304046Sae
1731304046Sae	return (cfg);
1732304046Sae}
1733304046Sae
1734304046Sae/*
1735304046Sae * Destroy all hosts callback.
1736304046Sae * Called on module unload when all activity already finished, so
1737304046Sae * can work without any locks.
1738304046Sae */
1739304046Saestatic NAT64NOINLINE int
1740304046Saenat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
1741304046Sae{
1742304046Sae	struct nat64lsn_portgroup *pg;
1743304046Sae	int i;
1744304046Sae
1745304046Sae	for (i = nh->pg_used; i > 0; i--) {
1746304046Sae		pg = PORTGROUP_BYSIDX(cfg, nh, i);
1747304046Sae		if (pg == NULL)
1748304046Sae			continue;
1749304046Sae		cfg->pg[pg->idx] = NULL;
1750304046Sae		destroy_portgroup(pg);
1751304046Sae		nh->pg_used--;
1752304046Sae	}
1753304046Sae	destroy_host6(nh);
1754304046Sae	cfg->ihcount--;
1755304046Sae	return (0);
1756304046Sae}
1757304046Sae
1758304046Saevoid
1759304046Saenat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1760304046Sae{
1761304046Sae	struct nat64lsn_host *nh, *tmp;
1762304046Sae
1763304046Sae	callout_drain(&cfg->jcallout);
1764304046Sae	callout_drain(&cfg->periodic);
1765304046Sae	I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
1766304046Sae	DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
1767304046Sae
1768334836Sae	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1769304046Sae	free(cfg->ih, M_IPFW);
1770304046Sae	free(cfg->pg, M_IPFW);
1771304046Sae	free(cfg, M_IPFW);
1772304046Sae}
1773304046Sae
1774