1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/counter.h>
36#include <sys/ck.h>
37#include <sys/epoch.h>
38#include <sys/errno.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/module.h>
44#include <sys/rmlock.h>
45#include <sys/rwlock.h>
46#include <sys/socket.h>
47#include <sys/sockopt.h>
48
49#include <net/if.h>
50
51#include <netinet/in.h>
52#include <netinet/ip.h>
53#include <netinet/ip_var.h>
54#include <netinet/ip_fw.h>
55#include <netinet6/ip_fw_nat64.h>
56
57#include <netpfil/ipfw/ip_fw_private.h>
58
59#include "nat64lsn.h"
60
61VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
62
63static struct nat64lsn_cfg *
64nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
65{
66	struct nat64lsn_cfg *cfg;
67
68	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
69	    IPFW_TLV_NAT64LSN_NAME, name);
70
71	return (cfg);
72}
73
74static void
75nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
76{
77
78	if (uc->jmaxlen == 0)
79		uc->jmaxlen = NAT64LSN_JMAXLEN;
80	if (uc->jmaxlen > 65536)
81		uc->jmaxlen = 65536;
82	if (uc->nh_delete_delay == 0)
83		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
84	if (uc->pg_delete_delay == 0)
85		uc->pg_delete_delay = NAT64LSN_PG_AGE;
86	if (uc->st_syn_ttl == 0)
87		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
88	if (uc->st_close_ttl == 0)
89		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
90	if (uc->st_estab_ttl == 0)
91		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
92	if (uc->st_udp_ttl == 0)
93		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
94	if (uc->st_icmp_ttl == 0)
95		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
96
97	if (uc->states_chunks == 0)
98		uc->states_chunks = 1;
99	else if (uc->states_chunks >= 128)
100		uc->states_chunks = 128;
101	else if (!powerof2(uc->states_chunks))
102		uc->states_chunks = 1 << fls(uc->states_chunks);
103}
104
105/*
106 * Creates new nat64lsn instance.
107 * Data layout (v0)(current):
108 * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
109 *
110 * Returns 0 on success
111 */
112static int
113nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
114    struct sockopt_data *sd)
115{
116	ipfw_obj_lheader *olh;
117	ipfw_nat64lsn_cfg *uc;
118	struct nat64lsn_cfg *cfg;
119	struct namedobj_instance *ni;
120	uint32_t addr4, mask4;
121
122	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
123		return (EINVAL);
124
125	olh = (ipfw_obj_lheader *)sd->kbuf;
126	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
127
128	if (ipfw_check_object_name_generic(uc->name) != 0)
129		return (EINVAL);
130
131	if (uc->set >= IPFW_MAX_SETS)
132		return (EINVAL);
133
134	if (uc->plen4 > 32)
135		return (EINVAL);
136
137	/*
138	 * Unspecified address has special meaning. But it must
139	 * have valid prefix length. This length will be used to
140	 * correctly extract and embedd IPv4 address into IPv6.
141	 */
142	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
143	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
144	    nat64_check_prefixlen(uc->plen6) != 0)
145		return (EINVAL);
146
147	/* XXX: Check prefix4 to be global */
148	addr4 = ntohl(uc->prefix4.s_addr);
149	mask4 = ~((1 << (32 - uc->plen4)) - 1);
150	if ((addr4 & mask4) != addr4)
151		return (EINVAL);
152
153	nat64lsn_default_config(uc);
154
155	ni = CHAIN_TO_SRV(ch);
156	IPFW_UH_RLOCK(ch);
157	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
158		IPFW_UH_RUNLOCK(ch);
159		return (EEXIST);
160	}
161	IPFW_UH_RUNLOCK(ch);
162
163	cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
164	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
165	cfg->no.name = cfg->name;
166	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
167	cfg->no.set = uc->set;
168
169	cfg->base.plat_prefix = uc->prefix6;
170	cfg->base.plat_plen = uc->plen6;
171	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
172	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
173		cfg->base.flags |= NAT64_WKPFX;
174	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
175		cfg->base.flags |= NAT64LSN_ANYPREFIX;
176
177	cfg->states_chunks = uc->states_chunks;
178	cfg->jmaxlen = uc->jmaxlen;
179	cfg->host_delete_delay = uc->nh_delete_delay;
180	cfg->pg_delete_delay = uc->pg_delete_delay;
181	cfg->st_syn_ttl = uc->st_syn_ttl;
182	cfg->st_close_ttl = uc->st_close_ttl;
183	cfg->st_estab_ttl = uc->st_estab_ttl;
184	cfg->st_udp_ttl = uc->st_udp_ttl;
185	cfg->st_icmp_ttl = uc->st_icmp_ttl;
186
187	cfg->nomatch_verdict = IP_FW_DENY;
188
189	IPFW_UH_WLOCK(ch);
190
191	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
192		IPFW_UH_WUNLOCK(ch);
193		nat64lsn_destroy_instance(cfg);
194		return (EEXIST);
195	}
196
197	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
198		IPFW_UH_WUNLOCK(ch);
199		nat64lsn_destroy_instance(cfg);
200		return (ENOSPC);
201	}
202	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
203
204	/* Okay, let's link data */
205	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
206	nat64lsn_start_instance(cfg);
207
208	IPFW_UH_WUNLOCK(ch);
209	return (0);
210}
211
212static void
213nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
214{
215
216	IPFW_UH_WLOCK_ASSERT(ch);
217
218	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
219	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
220}
221
222/*
223 * Destroys nat64 instance.
224 * Data layout (v0)(current):
225 * Request: [ ipfw_obj_header ]
226 *
227 * Returns 0 on success
228 */
229static int
230nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
231    struct sockopt_data *sd)
232{
233	struct nat64lsn_cfg *cfg;
234	ipfw_obj_header *oh;
235
236	if (sd->valsize != sizeof(*oh))
237		return (EINVAL);
238
239	oh = (ipfw_obj_header *)op3;
240
241	IPFW_UH_WLOCK(ch);
242	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
243	if (cfg == NULL) {
244		IPFW_UH_WUNLOCK(ch);
245		return (ENOENT);
246	}
247
248	if (cfg->no.refcnt > 0) {
249		IPFW_UH_WUNLOCK(ch);
250		return (EBUSY);
251	}
252
253	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
254	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
255	nat64lsn_detach_config(ch, cfg);
256	IPFW_UH_WUNLOCK(ch);
257
258	nat64lsn_destroy_instance(cfg);
259	return (0);
260}
261
262#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
263	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
264static void
265export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
266    struct ipfw_nat64lsn_stats *stats)
267{
268	struct nat64lsn_alias *alias;
269	int i, j;
270
271	__COPY_STAT_FIELD(cfg, stats, opcnt64);
272	__COPY_STAT_FIELD(cfg, stats, opcnt46);
273	__COPY_STAT_FIELD(cfg, stats, ofrags);
274	__COPY_STAT_FIELD(cfg, stats, ifrags);
275	__COPY_STAT_FIELD(cfg, stats, oerrors);
276	__COPY_STAT_FIELD(cfg, stats, noroute4);
277	__COPY_STAT_FIELD(cfg, stats, noroute6);
278	__COPY_STAT_FIELD(cfg, stats, nomatch4);
279	__COPY_STAT_FIELD(cfg, stats, noproto);
280	__COPY_STAT_FIELD(cfg, stats, nomem);
281	__COPY_STAT_FIELD(cfg, stats, dropped);
282
283	__COPY_STAT_FIELD(cfg, stats, jcalls);
284	__COPY_STAT_FIELD(cfg, stats, jrequests);
285	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
286	__COPY_STAT_FIELD(cfg, stats, jportreq);
287	__COPY_STAT_FIELD(cfg, stats, jhostfails);
288	__COPY_STAT_FIELD(cfg, stats, jportfails);
289	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
290	__COPY_STAT_FIELD(cfg, stats, jnomem);
291	__COPY_STAT_FIELD(cfg, stats, jreinjected);
292	__COPY_STAT_FIELD(cfg, stats, screated);
293	__COPY_STAT_FIELD(cfg, stats, sdeleted);
294	__COPY_STAT_FIELD(cfg, stats, spgcreated);
295	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
296
297	stats->hostcount = cfg->hosts_count;
298	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
299		alias = &cfg->aliases[i];
300		for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
301			stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
302		for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
303			stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
304		for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
305			stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
306	}
307}
308#undef	__COPY_STAT_FIELD
309
310static void
311nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
312    ipfw_nat64lsn_cfg *uc)
313{
314
315	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
316	uc->states_chunks = cfg->states_chunks;
317	uc->jmaxlen = cfg->jmaxlen;
318	uc->nh_delete_delay = cfg->host_delete_delay;
319	uc->pg_delete_delay = cfg->pg_delete_delay;
320	uc->st_syn_ttl = cfg->st_syn_ttl;
321	uc->st_close_ttl = cfg->st_close_ttl;
322	uc->st_estab_ttl = cfg->st_estab_ttl;
323	uc->st_udp_ttl = cfg->st_udp_ttl;
324	uc->st_icmp_ttl = cfg->st_icmp_ttl;
325	uc->prefix4.s_addr = htonl(cfg->prefix4);
326	uc->prefix6 = cfg->base.plat_prefix;
327	uc->plen4 = cfg->plen4;
328	uc->plen6 = cfg->base.plat_plen;
329	uc->set = cfg->no.set;
330	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
331}
332
333struct nat64_dump_arg {
334	struct ip_fw_chain *ch;
335	struct sockopt_data *sd;
336};
337
338static int
339export_config_cb(struct namedobj_instance *ni, struct named_object *no,
340    void *arg)
341{
342	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
343	ipfw_nat64lsn_cfg *uc;
344
345	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
346	    sizeof(*uc));
347	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
348	return (0);
349}
350
351/*
352 * Lists all nat64 lsn instances currently available in kernel.
353 * Data layout (v0)(current):
354 * Request: [ ipfw_obj_lheader ]
355 * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
356 *
357 * Returns 0 on success
358 */
359static int
360nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
361    struct sockopt_data *sd)
362{
363	ipfw_obj_lheader *olh;
364	struct nat64_dump_arg da;
365
366	/* Check minimum header size */
367	if (sd->valsize < sizeof(ipfw_obj_lheader))
368		return (EINVAL);
369
370	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
371
372	IPFW_UH_RLOCK(ch);
373	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
374	    IPFW_TLV_NAT64LSN_NAME);
375	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
376	olh->size = sizeof(*olh) + olh->count * olh->objsize;
377
378	if (sd->valsize < olh->size) {
379		IPFW_UH_RUNLOCK(ch);
380		return (ENOMEM);
381	}
382	memset(&da, 0, sizeof(da));
383	da.ch = ch;
384	da.sd = sd;
385	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
386	    IPFW_TLV_NAT64LSN_NAME);
387	IPFW_UH_RUNLOCK(ch);
388
389	return (0);
390}
391
392/*
393 * Change existing nat64lsn instance configuration.
394 * Data layout (v0)(current):
395 * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
396 * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
397 *
398 * Returns 0 on success
399 */
400static int
401nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
402    struct sockopt_data *sd)
403{
404	ipfw_obj_header *oh;
405	ipfw_nat64lsn_cfg *uc;
406	struct nat64lsn_cfg *cfg;
407	struct namedobj_instance *ni;
408
409	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
410		return (EINVAL);
411
412	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
413	    sizeof(*oh) + sizeof(*uc));
414	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
415
416	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
417	    oh->ntlv.set >= IPFW_MAX_SETS)
418		return (EINVAL);
419
420	ni = CHAIN_TO_SRV(ch);
421	if (sd->sopt->sopt_dir == SOPT_GET) {
422		IPFW_UH_RLOCK(ch);
423		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
424		if (cfg == NULL) {
425			IPFW_UH_RUNLOCK(ch);
426			return (ENOENT);
427		}
428		nat64lsn_export_config(ch, cfg, uc);
429		IPFW_UH_RUNLOCK(ch);
430		return (0);
431	}
432
433	nat64lsn_default_config(uc);
434
435	IPFW_UH_WLOCK(ch);
436	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
437	if (cfg == NULL) {
438		IPFW_UH_WUNLOCK(ch);
439		return (ENOENT);
440	}
441
442	/*
443	 * For now allow to change only following values:
444	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
445	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
446	 */
447
448	cfg->states_chunks = uc->states_chunks;
449	cfg->jmaxlen = uc->jmaxlen;
450	cfg->host_delete_delay = uc->nh_delete_delay;
451	cfg->pg_delete_delay = uc->pg_delete_delay;
452	cfg->st_syn_ttl = uc->st_syn_ttl;
453	cfg->st_close_ttl = uc->st_close_ttl;
454	cfg->st_estab_ttl = uc->st_estab_ttl;
455	cfg->st_udp_ttl = uc->st_udp_ttl;
456	cfg->st_icmp_ttl = uc->st_icmp_ttl;
457	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
458	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
459
460	IPFW_UH_WUNLOCK(ch);
461
462	return (0);
463}
464
465/*
466 * Get nat64lsn statistics.
467 * Data layout (v0)(current):
468 * Request: [ ipfw_obj_header ]
469 * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
470 *
471 * Returns 0 on success
472 */
473static int
474nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
475    struct sockopt_data *sd)
476{
477	struct ipfw_nat64lsn_stats stats;
478	struct nat64lsn_cfg *cfg;
479	ipfw_obj_header *oh;
480	ipfw_obj_ctlv *ctlv;
481	size_t sz;
482
483	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
484	if (sd->valsize % sizeof(uint64_t))
485		return (EINVAL);
486	if (sd->valsize < sz)
487		return (ENOMEM);
488	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
489	if (oh == NULL)
490		return (EINVAL);
491	memset(&stats, 0, sizeof(stats));
492
493	IPFW_UH_RLOCK(ch);
494	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
495	if (cfg == NULL) {
496		IPFW_UH_RUNLOCK(ch);
497		return (ENOENT);
498	}
499
500	export_stats(ch, cfg, &stats);
501	IPFW_UH_RUNLOCK(ch);
502
503	ctlv = (ipfw_obj_ctlv *)(oh + 1);
504	memset(ctlv, 0, sizeof(*ctlv));
505	ctlv->head.type = IPFW_TLV_COUNTERS;
506	ctlv->head.length = sz - sizeof(ipfw_obj_header);
507	ctlv->count = sizeof(stats) / sizeof(uint64_t);
508	ctlv->objsize = sizeof(uint64_t);
509	ctlv->version = IPFW_NAT64_VERSION;
510	memcpy(ctlv + 1, &stats, sizeof(stats));
511	return (0);
512}
513
514/*
515 * Reset nat64lsn statistics.
516 * Data layout (v0)(current):
517 * Request: [ ipfw_obj_header ]
518 *
519 * Returns 0 on success
520 */
521static int
522nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
523    struct sockopt_data *sd)
524{
525	struct nat64lsn_cfg *cfg;
526	ipfw_obj_header *oh;
527
528	if (sd->valsize != sizeof(*oh))
529		return (EINVAL);
530	oh = (ipfw_obj_header *)sd->kbuf;
531	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
532	    oh->ntlv.set >= IPFW_MAX_SETS)
533		return (EINVAL);
534
535	IPFW_UH_WLOCK(ch);
536	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
537	if (cfg == NULL) {
538		IPFW_UH_WUNLOCK(ch);
539		return (ENOENT);
540	}
541	COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
542	IPFW_UH_WUNLOCK(ch);
543	return (0);
544}
545
546#ifdef __LP64__
547#define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
548#else
549#define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
550    ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
551#endif
552/*
553 * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
554 *	ipfw_nat64lsn_state x count, ... ] ]
555 */
556static int
557nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
558    struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
559{
560	ipfw_nat64lsn_state_v1 *s;
561	struct nat64lsn_state *state;
562	uint64_t freemask;
563	uint32_t i, count;
564
565	/* validate user input */
566	if (idx->chunk > pg->chunks_count - 1)
567		return (EINVAL);
568
569	FREEMASK_COPY(pg, idx->chunk, freemask);
570	count = 64 - bitcount64(freemask);
571	if (count == 0)
572		return (0);	/* Try next PG/chunk */
573
574	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
575	    (uintmax_t)idx->index, count);
576
577	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
578	    count * sizeof(ipfw_nat64lsn_state_v1));
579	if (s == NULL)
580		return (ENOMEM);
581
582	for (i = 0; i < 64; i++) {
583		if (ISSET64(freemask, i))
584			continue;
585		state = pg->chunks_count == 1 ? &pg->states->state[i] :
586		    &pg->states_chunk[idx->chunk]->state[i];
587
588		s->host6 = state->host->addr;
589		s->daddr.s_addr = htonl(state->ip_dst);
590		s->dport = state->dport;
591		s->sport = state->sport;
592		s->aport = state->aport;
593		s->flags = (uint8_t)(state->flags & 7);
594		s->proto = state->proto;
595		s->idle = GET_AGE(state->timestamp);
596		s++;
597	}
598	*ret_count = count;
599	return (0);
600}
601
602#define	LAST_IDX	0xFF
603static int
604nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
605    union nat64lsn_pgidx *idx)
606{
607
608	/* First iterate over chunks */
609	if (pg != NULL) {
610		if (idx->chunk < pg->chunks_count - 1) {
611			idx->chunk++;
612			return (0);
613		}
614	}
615	idx->chunk = 0;
616	/* Then over PGs */
617	if (idx->port < UINT16_MAX - 64) {
618		idx->port += 64;
619		return (0);
620	}
621	idx->port = NAT64_MIN_PORT;
622	/* Then over supported protocols */
623	switch (idx->proto) {
624	case IPPROTO_ICMP:
625		idx->proto = IPPROTO_TCP;
626		return (0);
627	case IPPROTO_TCP:
628		idx->proto = IPPROTO_UDP;
629		return (0);
630	default:
631		idx->proto = IPPROTO_ICMP;
632	}
633	/* And then over IPv4 alias addresses */
634	if (idx->addr < cfg->pmask4) {
635		idx->addr++;
636		return (1);	/* New states group is needed */
637	}
638	idx->index = LAST_IDX;
639	return (-1);		/* No more states */
640}
641
642static struct nat64lsn_pg*
643nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
644{
645	struct nat64lsn_alias *alias;
646	int pg_idx;
647
648	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
649	MPASS(alias->addr == idx->addr);
650
651	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
652	switch (idx->proto) {
653	case IPPROTO_ICMP:
654		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
655			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
656		break;
657	case IPPROTO_TCP:
658		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
659			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
660		break;
661	case IPPROTO_UDP:
662		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
663			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
664		break;
665	}
666	return (NULL);
667}
668
669/*
670 * Lists nat64lsn states.
671 * Data layout (v0):
672 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
673 * Reply: [ ipfw_obj_header ipfw_obj_data [
674 *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
675 *
676 * Returns 0 on success
677 */
678static int
679nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
680    struct sockopt_data *sd)
681{
682
683	/* TODO: implement states listing for old ipfw(8) binaries  */
684	return (EOPNOTSUPP);
685}
686
687/*
688 * Lists nat64lsn states.
689 * Data layout (v1)(current):
690 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
691 * Reply: [ ipfw_obj_header ipfw_obj_data [
692 *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
693 *
694 * Returns 0 on success
695 */
696static int
697nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
698    struct sockopt_data *sd)
699{
700	ipfw_obj_header *oh;
701	ipfw_obj_data *od;
702	ipfw_nat64lsn_stg_v1 *stg;
703	struct nat64lsn_cfg *cfg;
704	struct nat64lsn_pg *pg;
705	union nat64lsn_pgidx idx;
706	size_t sz;
707	uint32_t count, total;
708	int ret;
709
710	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
711	    sizeof(uint64_t);
712	/* Check minimum header size */
713	if (sd->valsize < sz)
714		return (EINVAL);
715
716	oh = (ipfw_obj_header *)sd->kbuf;
717	od = (ipfw_obj_data *)(oh + 1);
718	if (od->head.type != IPFW_TLV_OBJDATA ||
719	    od->head.length != sz - sizeof(ipfw_obj_header))
720		return (EINVAL);
721
722	idx.index = *(uint64_t *)(od + 1);
723	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
724	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
725		return (EINVAL);
726	if (idx.index == LAST_IDX)
727		return (EINVAL);
728
729	IPFW_UH_RLOCK(ch);
730	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
731	if (cfg == NULL) {
732		IPFW_UH_RUNLOCK(ch);
733		return (ENOENT);
734	}
735	if (idx.index == 0) {	/* Fill in starting point */
736		idx.addr = cfg->prefix4;
737		idx.proto = IPPROTO_ICMP;
738		idx.port = NAT64_MIN_PORT;
739	}
740	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
741	    idx.port < NAT64_MIN_PORT) {
742		IPFW_UH_RUNLOCK(ch);
743		return (EINVAL);
744	}
745	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
746	    sizeof(ipfw_nat64lsn_stg_v1);
747	if (sd->valsize < sz) {
748		IPFW_UH_RUNLOCK(ch);
749		return (ENOMEM);
750	}
751	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
752	od = (ipfw_obj_data *)(oh + 1);
753	od->head.type = IPFW_TLV_OBJDATA;
754	od->head.length = sz - sizeof(ipfw_obj_header);
755	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
756	stg->count = total = 0;
757	stg->next.index = idx.index;
758	/*
759	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
760	 * Thus states, hosts and PGs will not expire while we hold it.
761	 */
762	CALLOUT_LOCK(cfg);
763	ret = 0;
764	do {
765		pg = nat64lsn_get_pg_byidx(cfg, &idx);
766		if (pg != NULL) {
767			count = 0;
768			ret = nat64lsn_export_states_v1(cfg, &idx, pg,
769			    sd, &count);
770			if (ret != 0)
771				break;
772			if (count > 0) {
773				stg->count += count;
774				total += count;
775				/* Update total size of reply */
776				od->head.length +=
777				    count * sizeof(ipfw_nat64lsn_state_v1);
778				sz += count * sizeof(ipfw_nat64lsn_state_v1);
779			}
780			stg->alias4.s_addr = htonl(idx.addr);
781		}
782		/* Determine new index */
783		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
784		case -1:
785			ret = ENOENT; /* End of search */
786			break;
787		case 1: /*
788			 * Next alias address, new group may be needed.
789			 * If states count is zero, use this group.
790			 */
791			if (stg->count == 0)
792				continue;
793			/* Otherwise try to create new group */
794			sz += sizeof(ipfw_nat64lsn_stg_v1);
795			if (sd->valsize < sz) {
796				ret = ENOMEM;
797				break;
798			}
799			/* Save next index in current group */
800			stg->next.index = idx.index;
801			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
802			    sizeof(ipfw_nat64lsn_stg_v1));
803			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
804			stg->count = 0;
805			break;
806		}
807		stg->next.index = idx.index;
808	} while (ret == 0);
809	CALLOUT_UNLOCK(cfg);
810	IPFW_UH_RUNLOCK(ch);
811	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
812}
813
814static struct ipfw_sopt_handler	scodes[] = {
815	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
816	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
817	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
818	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
819	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
820	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
821	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states_v0 },
822	{ IP_FW_NAT64LSN_LIST_STATES,1,	HDIR_GET,	nat64lsn_states_v1 },
823};
824
825static int
826nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
827{
828	ipfw_insn *icmd;
829
830	icmd = cmd - 1;
831	if (icmd->opcode != O_EXTERNAL_ACTION ||
832	    icmd->arg1 != V_nat64lsn_eid)
833		return (1);
834
835	*puidx = cmd->arg1;
836	*ptype = 0;
837	return (0);
838}
839
840static void
841nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
842{
843
844	cmd->arg1 = idx;
845}
846
847static int
848nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
849    struct named_object **pno)
850{
851	int err;
852
853	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
854	    IPFW_TLV_NAT64LSN_NAME, pno);
855	return (err);
856}
857
858static struct named_object *
859nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
860{
861	struct namedobj_instance *ni;
862	struct named_object *no;
863
864	IPFW_UH_WLOCK_ASSERT(ch);
865	ni = CHAIN_TO_SRV(ch);
866	no = ipfw_objhash_lookup_kidx(ni, idx);
867	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
868
869	return (no);
870}
871
872static int
873nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
874    enum ipfw_sets_cmd cmd)
875{
876
877	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
878	    set, new_set, cmd));
879}
880
881static struct opcode_obj_rewrite opcodes[] = {
882	{
883		.opcode = O_EXTERNAL_INSTANCE,
884		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
885		.classifier = nat64lsn_classify,
886		.update = nat64lsn_update_arg1,
887		.find_byname = nat64lsn_findbyname,
888		.find_bykidx = nat64lsn_findbykidx,
889		.manage_sets = nat64lsn_manage_sets,
890	},
891};
892
893static int
894destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
895    void *arg)
896{
897	struct nat64lsn_cfg *cfg;
898	struct ip_fw_chain *ch;
899
900	ch = (struct ip_fw_chain *)arg;
901	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
902	SRV_OBJECT(ch, no->kidx) = NULL;
903	nat64lsn_detach_config(ch, cfg);
904	nat64lsn_destroy_instance(cfg);
905	return (0);
906}
907
908int
909nat64lsn_init(struct ip_fw_chain *ch, int first)
910{
911
912	if (first != 0)
913		nat64lsn_init_internal();
914	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
915	if (V_nat64lsn_eid == 0)
916		return (ENXIO);
917	IPFW_ADD_SOPT_HANDLER(first, scodes);
918	IPFW_ADD_OBJ_REWRITER(first, opcodes);
919	return (0);
920}
921
922void
923nat64lsn_uninit(struct ip_fw_chain *ch, int last)
924{
925
926	IPFW_DEL_OBJ_REWRITER(last, opcodes);
927	IPFW_DEL_SOPT_HANDLER(last, scodes);
928	ipfw_del_eaction(ch, V_nat64lsn_eid);
929	/*
930	 * Since we already have deregistered external action,
931	 * our named objects become unaccessible via rules, because
932	 * all rules were truncated by ipfw_del_eaction().
933	 * So, we can unlink and destroy our named objects without holding
934	 * IPFW_WLOCK().
935	 */
936	IPFW_UH_WLOCK(ch);
937	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
938	    IPFW_TLV_NAT64LSN_NAME);
939	V_nat64lsn_eid = 0;
940	IPFW_UH_WUNLOCK(ch);
941	if (last != 0)
942		nat64lsn_uninit_internal();
943}
944