1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/counter.h>
36#include <sys/ck.h>
37#include <sys/epoch.h>
38#include <sys/errno.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/module.h>
44#include <sys/rmlock.h>
45#include <sys/rwlock.h>
46#include <sys/socket.h>
47#include <sys/sockopt.h>
48
49#include <net/if.h>
50#include <net/pfil.h>
51
52#include <netinet/in.h>
53#include <netinet/ip.h>
54#include <netinet/ip_var.h>
55#include <netinet/ip_fw.h>
56#include <netinet6/ip_fw_nat64.h>
57
58#include <netpfil/ipfw/ip_fw_private.h>
59
60#include "nat64lsn.h"
61
62VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
63
64static struct nat64lsn_cfg *
65nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
66{
67	struct nat64lsn_cfg *cfg;
68
69	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
70	    IPFW_TLV_NAT64LSN_NAME, name);
71
72	return (cfg);
73}
74
75static void
76nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
77{
78
79	if (uc->jmaxlen == 0)
80		uc->jmaxlen = NAT64LSN_JMAXLEN;
81	if (uc->jmaxlen > 65536)
82		uc->jmaxlen = 65536;
83	if (uc->nh_delete_delay == 0)
84		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
85	if (uc->pg_delete_delay == 0)
86		uc->pg_delete_delay = NAT64LSN_PG_AGE;
87	if (uc->st_syn_ttl == 0)
88		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
89	if (uc->st_close_ttl == 0)
90		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
91	if (uc->st_estab_ttl == 0)
92		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
93	if (uc->st_udp_ttl == 0)
94		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
95	if (uc->st_icmp_ttl == 0)
96		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
97
98	if (uc->states_chunks == 0)
99		uc->states_chunks = 1;
100	else if (uc->states_chunks >= 128)
101		uc->states_chunks = 128;
102	else if (!powerof2(uc->states_chunks))
103		uc->states_chunks = 1 << fls(uc->states_chunks);
104}
105
106/*
107 * Creates new nat64lsn instance.
108 * Data layout (v0)(current):
109 * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
110 *
111 * Returns 0 on success
112 */
113static int
114nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
115    struct sockopt_data *sd)
116{
117	ipfw_obj_lheader *olh;
118	ipfw_nat64lsn_cfg *uc;
119	struct nat64lsn_cfg *cfg;
120	struct namedobj_instance *ni;
121	uint32_t addr4, mask4;
122
123	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
124		return (EINVAL);
125
126	olh = (ipfw_obj_lheader *)sd->kbuf;
127	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
128
129	if (ipfw_check_object_name_generic(uc->name) != 0)
130		return (EINVAL);
131
132	if (uc->set >= IPFW_MAX_SETS)
133		return (EINVAL);
134
135	if (uc->plen4 > 32)
136		return (EINVAL);
137
138	/*
139	 * Unspecified address has special meaning. But it must
140	 * have valid prefix length. This length will be used to
141	 * correctly extract and embedd IPv4 address into IPv6.
142	 */
143	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
144	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
145	    nat64_check_prefixlen(uc->plen6) != 0)
146		return (EINVAL);
147
148	/* XXX: Check prefix4 to be global */
149	addr4 = ntohl(uc->prefix4.s_addr);
150	mask4 = ~((1 << (32 - uc->plen4)) - 1);
151	if ((addr4 & mask4) != addr4)
152		return (EINVAL);
153
154	nat64lsn_default_config(uc);
155
156	ni = CHAIN_TO_SRV(ch);
157	IPFW_UH_RLOCK(ch);
158	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
159		IPFW_UH_RUNLOCK(ch);
160		return (EEXIST);
161	}
162	IPFW_UH_RUNLOCK(ch);
163
164	cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
165	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
166	cfg->no.name = cfg->name;
167	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
168	cfg->no.set = uc->set;
169
170	cfg->base.plat_prefix = uc->prefix6;
171	cfg->base.plat_plen = uc->plen6;
172	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
173	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
174		cfg->base.flags |= NAT64_WKPFX;
175	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
176		cfg->base.flags |= NAT64LSN_ANYPREFIX;
177
178	cfg->states_chunks = uc->states_chunks;
179	cfg->jmaxlen = uc->jmaxlen;
180	cfg->host_delete_delay = uc->nh_delete_delay;
181	cfg->pg_delete_delay = uc->pg_delete_delay;
182	cfg->st_syn_ttl = uc->st_syn_ttl;
183	cfg->st_close_ttl = uc->st_close_ttl;
184	cfg->st_estab_ttl = uc->st_estab_ttl;
185	cfg->st_udp_ttl = uc->st_udp_ttl;
186	cfg->st_icmp_ttl = uc->st_icmp_ttl;
187
188	cfg->nomatch_verdict = IP_FW_DENY;
189
190	IPFW_UH_WLOCK(ch);
191
192	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
193		IPFW_UH_WUNLOCK(ch);
194		nat64lsn_destroy_instance(cfg);
195		return (EEXIST);
196	}
197
198	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
199		IPFW_UH_WUNLOCK(ch);
200		nat64lsn_destroy_instance(cfg);
201		return (ENOSPC);
202	}
203	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
204
205	/* Okay, let's link data */
206	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
207	nat64lsn_start_instance(cfg);
208
209	IPFW_UH_WUNLOCK(ch);
210	return (0);
211}
212
213static void
214nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
215{
216
217	IPFW_UH_WLOCK_ASSERT(ch);
218
219	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
220	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
221}
222
223/*
224 * Destroys nat64 instance.
225 * Data layout (v0)(current):
226 * Request: [ ipfw_obj_header ]
227 *
228 * Returns 0 on success
229 */
230static int
231nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
232    struct sockopt_data *sd)
233{
234	struct nat64lsn_cfg *cfg;
235	ipfw_obj_header *oh;
236
237	if (sd->valsize != sizeof(*oh))
238		return (EINVAL);
239
240	oh = (ipfw_obj_header *)op3;
241
242	IPFW_UH_WLOCK(ch);
243	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
244	if (cfg == NULL) {
245		IPFW_UH_WUNLOCK(ch);
246		return (ENOENT);
247	}
248
249	if (cfg->no.refcnt > 0) {
250		IPFW_UH_WUNLOCK(ch);
251		return (EBUSY);
252	}
253
254	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
255	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
256	nat64lsn_detach_config(ch, cfg);
257	IPFW_UH_WUNLOCK(ch);
258
259	nat64lsn_destroy_instance(cfg);
260	return (0);
261}
262
263#define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
264	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
265static void
266export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
267    struct ipfw_nat64lsn_stats *stats)
268{
269	struct nat64lsn_alias *alias;
270	int i, j;
271
272	__COPY_STAT_FIELD(cfg, stats, opcnt64);
273	__COPY_STAT_FIELD(cfg, stats, opcnt46);
274	__COPY_STAT_FIELD(cfg, stats, ofrags);
275	__COPY_STAT_FIELD(cfg, stats, ifrags);
276	__COPY_STAT_FIELD(cfg, stats, oerrors);
277	__COPY_STAT_FIELD(cfg, stats, noroute4);
278	__COPY_STAT_FIELD(cfg, stats, noroute6);
279	__COPY_STAT_FIELD(cfg, stats, nomatch4);
280	__COPY_STAT_FIELD(cfg, stats, noproto);
281	__COPY_STAT_FIELD(cfg, stats, nomem);
282	__COPY_STAT_FIELD(cfg, stats, dropped);
283
284	__COPY_STAT_FIELD(cfg, stats, jcalls);
285	__COPY_STAT_FIELD(cfg, stats, jrequests);
286	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
287	__COPY_STAT_FIELD(cfg, stats, jportreq);
288	__COPY_STAT_FIELD(cfg, stats, jhostfails);
289	__COPY_STAT_FIELD(cfg, stats, jportfails);
290	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
291	__COPY_STAT_FIELD(cfg, stats, jnomem);
292	__COPY_STAT_FIELD(cfg, stats, jreinjected);
293	__COPY_STAT_FIELD(cfg, stats, screated);
294	__COPY_STAT_FIELD(cfg, stats, sdeleted);
295	__COPY_STAT_FIELD(cfg, stats, spgcreated);
296	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
297
298	stats->hostcount = cfg->hosts_count;
299	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
300		alias = &cfg->aliases[i];
301		for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
302			stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
303		for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
304			stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
305		for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
306			stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
307	}
308}
309#undef	__COPY_STAT_FIELD
310
311static void
312nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
313    ipfw_nat64lsn_cfg *uc)
314{
315
316	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
317	uc->states_chunks = cfg->states_chunks;
318	uc->jmaxlen = cfg->jmaxlen;
319	uc->nh_delete_delay = cfg->host_delete_delay;
320	uc->pg_delete_delay = cfg->pg_delete_delay;
321	uc->st_syn_ttl = cfg->st_syn_ttl;
322	uc->st_close_ttl = cfg->st_close_ttl;
323	uc->st_estab_ttl = cfg->st_estab_ttl;
324	uc->st_udp_ttl = cfg->st_udp_ttl;
325	uc->st_icmp_ttl = cfg->st_icmp_ttl;
326	uc->prefix4.s_addr = htonl(cfg->prefix4);
327	uc->prefix6 = cfg->base.plat_prefix;
328	uc->plen4 = cfg->plen4;
329	uc->plen6 = cfg->base.plat_plen;
330	uc->set = cfg->no.set;
331	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
332}
333
334struct nat64_dump_arg {
335	struct ip_fw_chain *ch;
336	struct sockopt_data *sd;
337};
338
339static int
340export_config_cb(struct namedobj_instance *ni, struct named_object *no,
341    void *arg)
342{
343	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
344	ipfw_nat64lsn_cfg *uc;
345
346	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
347	    sizeof(*uc));
348	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
349	return (0);
350}
351
352/*
353 * Lists all nat64 lsn instances currently available in kernel.
354 * Data layout (v0)(current):
355 * Request: [ ipfw_obj_lheader ]
356 * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
357 *
358 * Returns 0 on success
359 */
360static int
361nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
362    struct sockopt_data *sd)
363{
364	ipfw_obj_lheader *olh;
365	struct nat64_dump_arg da;
366
367	/* Check minimum header size */
368	if (sd->valsize < sizeof(ipfw_obj_lheader))
369		return (EINVAL);
370
371	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
372
373	IPFW_UH_RLOCK(ch);
374	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
375	    IPFW_TLV_NAT64LSN_NAME);
376	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
377	olh->size = sizeof(*olh) + olh->count * olh->objsize;
378
379	if (sd->valsize < olh->size) {
380		IPFW_UH_RUNLOCK(ch);
381		return (ENOMEM);
382	}
383	memset(&da, 0, sizeof(da));
384	da.ch = ch;
385	da.sd = sd;
386	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
387	    IPFW_TLV_NAT64LSN_NAME);
388	IPFW_UH_RUNLOCK(ch);
389
390	return (0);
391}
392
393/*
394 * Change existing nat64lsn instance configuration.
395 * Data layout (v0)(current):
396 * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
397 * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
398 *
399 * Returns 0 on success
400 */
401static int
402nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
403    struct sockopt_data *sd)
404{
405	ipfw_obj_header *oh;
406	ipfw_nat64lsn_cfg *uc;
407	struct nat64lsn_cfg *cfg;
408	struct namedobj_instance *ni;
409
410	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
411		return (EINVAL);
412
413	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
414	    sizeof(*oh) + sizeof(*uc));
415	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
416
417	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
418	    oh->ntlv.set >= IPFW_MAX_SETS)
419		return (EINVAL);
420
421	ni = CHAIN_TO_SRV(ch);
422	if (sd->sopt->sopt_dir == SOPT_GET) {
423		IPFW_UH_RLOCK(ch);
424		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
425		if (cfg == NULL) {
426			IPFW_UH_RUNLOCK(ch);
427			return (ENOENT);
428		}
429		nat64lsn_export_config(ch, cfg, uc);
430		IPFW_UH_RUNLOCK(ch);
431		return (0);
432	}
433
434	nat64lsn_default_config(uc);
435
436	IPFW_UH_WLOCK(ch);
437	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
438	if (cfg == NULL) {
439		IPFW_UH_WUNLOCK(ch);
440		return (ENOENT);
441	}
442
443	/*
444	 * For now allow to change only following values:
445	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
446	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
447	 */
448
449	cfg->states_chunks = uc->states_chunks;
450	cfg->jmaxlen = uc->jmaxlen;
451	cfg->host_delete_delay = uc->nh_delete_delay;
452	cfg->pg_delete_delay = uc->pg_delete_delay;
453	cfg->st_syn_ttl = uc->st_syn_ttl;
454	cfg->st_close_ttl = uc->st_close_ttl;
455	cfg->st_estab_ttl = uc->st_estab_ttl;
456	cfg->st_udp_ttl = uc->st_udp_ttl;
457	cfg->st_icmp_ttl = uc->st_icmp_ttl;
458	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
459	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
460
461	IPFW_UH_WUNLOCK(ch);
462
463	return (0);
464}
465
466/*
467 * Get nat64lsn statistics.
468 * Data layout (v0)(current):
469 * Request: [ ipfw_obj_header ]
470 * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
471 *
472 * Returns 0 on success
473 */
474static int
475nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
476    struct sockopt_data *sd)
477{
478	struct ipfw_nat64lsn_stats stats;
479	struct nat64lsn_cfg *cfg;
480	ipfw_obj_header *oh;
481	ipfw_obj_ctlv *ctlv;
482	size_t sz;
483
484	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
485	if (sd->valsize % sizeof(uint64_t))
486		return (EINVAL);
487	if (sd->valsize < sz)
488		return (ENOMEM);
489	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
490	if (oh == NULL)
491		return (EINVAL);
492	memset(&stats, 0, sizeof(stats));
493
494	IPFW_UH_RLOCK(ch);
495	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
496	if (cfg == NULL) {
497		IPFW_UH_RUNLOCK(ch);
498		return (ENOENT);
499	}
500
501	export_stats(ch, cfg, &stats);
502	IPFW_UH_RUNLOCK(ch);
503
504	ctlv = (ipfw_obj_ctlv *)(oh + 1);
505	memset(ctlv, 0, sizeof(*ctlv));
506	ctlv->head.type = IPFW_TLV_COUNTERS;
507	ctlv->head.length = sz - sizeof(ipfw_obj_header);
508	ctlv->count = sizeof(stats) / sizeof(uint64_t);
509	ctlv->objsize = sizeof(uint64_t);
510	ctlv->version = IPFW_NAT64_VERSION;
511	memcpy(ctlv + 1, &stats, sizeof(stats));
512	return (0);
513}
514
515/*
516 * Reset nat64lsn statistics.
517 * Data layout (v0)(current):
518 * Request: [ ipfw_obj_header ]
519 *
520 * Returns 0 on success
521 */
522static int
523nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
524    struct sockopt_data *sd)
525{
526	struct nat64lsn_cfg *cfg;
527	ipfw_obj_header *oh;
528
529	if (sd->valsize != sizeof(*oh))
530		return (EINVAL);
531	oh = (ipfw_obj_header *)sd->kbuf;
532	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
533	    oh->ntlv.set >= IPFW_MAX_SETS)
534		return (EINVAL);
535
536	IPFW_UH_WLOCK(ch);
537	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
538	if (cfg == NULL) {
539		IPFW_UH_WUNLOCK(ch);
540		return (ENOENT);
541	}
542	COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
543	IPFW_UH_WUNLOCK(ch);
544	return (0);
545}
546
547#ifdef __LP64__
548#define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
549#else
550#define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
551    ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
552#endif
553/*
554 * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
555 *	ipfw_nat64lsn_state x count, ... ] ]
556 */
557static int
558nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
559    struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
560{
561	ipfw_nat64lsn_state_v1 *s;
562	struct nat64lsn_state *state;
563	uint64_t freemask;
564	uint32_t i, count;
565
566	/* validate user input */
567	if (idx->chunk > pg->chunks_count - 1)
568		return (EINVAL);
569
570	FREEMASK_COPY(pg, idx->chunk, freemask);
571	count = 64 - bitcount64(freemask);
572	if (count == 0)
573		return (0);	/* Try next PG/chunk */
574
575	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
576	    (uintmax_t)idx->index, count);
577
578	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
579	    count * sizeof(ipfw_nat64lsn_state_v1));
580	if (s == NULL)
581		return (ENOMEM);
582
583	for (i = 0; i < 64; i++) {
584		if (ISSET64(freemask, i))
585			continue;
586		state = pg->chunks_count == 1 ? &pg->states->state[i] :
587		    &pg->states_chunk[idx->chunk]->state[i];
588
589		s->host6 = state->host->addr;
590		s->daddr.s_addr = htonl(state->ip_dst);
591		s->dport = state->dport;
592		s->sport = state->sport;
593		s->aport = state->aport;
594		s->flags = (uint8_t)(state->flags & 7);
595		s->proto = state->proto;
596		s->idle = GET_AGE(state->timestamp);
597		s++;
598	}
599	*ret_count = count;
600	return (0);
601}
602
603#define	LAST_IDX	0xFF
604static int
605nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
606    union nat64lsn_pgidx *idx)
607{
608
609	/* First iterate over chunks */
610	if (pg != NULL) {
611		if (idx->chunk < pg->chunks_count - 1) {
612			idx->chunk++;
613			return (0);
614		}
615	}
616	idx->chunk = 0;
617	/* Then over PGs */
618	if (idx->port < UINT16_MAX - 64) {
619		idx->port += 64;
620		return (0);
621	}
622	idx->port = NAT64_MIN_PORT;
623	/* Then over supported protocols */
624	switch (idx->proto) {
625	case IPPROTO_ICMP:
626		idx->proto = IPPROTO_TCP;
627		return (0);
628	case IPPROTO_TCP:
629		idx->proto = IPPROTO_UDP;
630		return (0);
631	default:
632		idx->proto = IPPROTO_ICMP;
633	}
634	/* And then over IPv4 alias addresses */
635	if (idx->addr < cfg->pmask4) {
636		idx->addr++;
637		return (1);	/* New states group is needed */
638	}
639	idx->index = LAST_IDX;
640	return (-1);		/* No more states */
641}
642
643static struct nat64lsn_pg*
644nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
645{
646	struct nat64lsn_alias *alias;
647	int pg_idx;
648
649	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
650	MPASS(alias->addr == idx->addr);
651
652	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
653	switch (idx->proto) {
654	case IPPROTO_ICMP:
655		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
656			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
657		break;
658	case IPPROTO_TCP:
659		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
660			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
661		break;
662	case IPPROTO_UDP:
663		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
664			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
665		break;
666	}
667	return (NULL);
668}
669
670/*
671 * Lists nat64lsn states.
672 * Data layout (v0):
673 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
674 * Reply: [ ipfw_obj_header ipfw_obj_data [
675 *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
676 *
677 * Returns 0 on success
678 */
679static int
680nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
681    struct sockopt_data *sd)
682{
683
684	/* TODO: implement states listing for old ipfw(8) binaries  */
685	return (EOPNOTSUPP);
686}
687
688/*
689 * Lists nat64lsn states.
690 * Data layout (v1)(current):
691 * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
692 * Reply: [ ipfw_obj_header ipfw_obj_data [
693 *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
694 *
695 * Returns 0 on success
696 */
697static int
698nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
699    struct sockopt_data *sd)
700{
701	ipfw_obj_header *oh;
702	ipfw_obj_data *od;
703	ipfw_nat64lsn_stg_v1 *stg;
704	struct nat64lsn_cfg *cfg;
705	struct nat64lsn_pg *pg;
706	union nat64lsn_pgidx idx;
707	size_t sz;
708	uint32_t count, total;
709	int ret;
710
711	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
712	    sizeof(uint64_t);
713	/* Check minimum header size */
714	if (sd->valsize < sz)
715		return (EINVAL);
716
717	oh = (ipfw_obj_header *)sd->kbuf;
718	od = (ipfw_obj_data *)(oh + 1);
719	if (od->head.type != IPFW_TLV_OBJDATA ||
720	    od->head.length != sz - sizeof(ipfw_obj_header))
721		return (EINVAL);
722
723	idx.index = *(uint64_t *)(od + 1);
724	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
725	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
726		return (EINVAL);
727	if (idx.index == LAST_IDX)
728		return (EINVAL);
729
730	IPFW_UH_RLOCK(ch);
731	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
732	if (cfg == NULL) {
733		IPFW_UH_RUNLOCK(ch);
734		return (ENOENT);
735	}
736	if (idx.index == 0) {	/* Fill in starting point */
737		idx.addr = cfg->prefix4;
738		idx.proto = IPPROTO_ICMP;
739		idx.port = NAT64_MIN_PORT;
740	}
741	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
742	    idx.port < NAT64_MIN_PORT) {
743		IPFW_UH_RUNLOCK(ch);
744		return (EINVAL);
745	}
746	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
747	    sizeof(ipfw_nat64lsn_stg_v1);
748	if (sd->valsize < sz) {
749		IPFW_UH_RUNLOCK(ch);
750		return (ENOMEM);
751	}
752	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
753	od = (ipfw_obj_data *)(oh + 1);
754	od->head.type = IPFW_TLV_OBJDATA;
755	od->head.length = sz - sizeof(ipfw_obj_header);
756	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
757	stg->count = total = 0;
758	stg->next.index = idx.index;
759	/*
760	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
761	 * Thus states, hosts and PGs will not expire while we hold it.
762	 */
763	CALLOUT_LOCK(cfg);
764	ret = 0;
765	do {
766		pg = nat64lsn_get_pg_byidx(cfg, &idx);
767		if (pg != NULL) {
768			count = 0;
769			ret = nat64lsn_export_states_v1(cfg, &idx, pg,
770			    sd, &count);
771			if (ret != 0)
772				break;
773			if (count > 0) {
774				stg->count += count;
775				total += count;
776				/* Update total size of reply */
777				od->head.length +=
778				    count * sizeof(ipfw_nat64lsn_state_v1);
779				sz += count * sizeof(ipfw_nat64lsn_state_v1);
780			}
781			stg->alias4.s_addr = htonl(idx.addr);
782		}
783		/* Determine new index */
784		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
785		case -1:
786			ret = ENOENT; /* End of search */
787			break;
788		case 1: /*
789			 * Next alias address, new group may be needed.
790			 * If states count is zero, use this group.
791			 */
792			if (stg->count == 0)
793				continue;
794			/* Otherwise try to create new group */
795			sz += sizeof(ipfw_nat64lsn_stg_v1);
796			if (sd->valsize < sz) {
797				ret = ENOMEM;
798				break;
799			}
800			/* Save next index in current group */
801			stg->next.index = idx.index;
802			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
803			    sizeof(ipfw_nat64lsn_stg_v1));
804			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
805			stg->count = 0;
806			break;
807		}
808		stg->next.index = idx.index;
809	} while (ret == 0);
810	CALLOUT_UNLOCK(cfg);
811	IPFW_UH_RUNLOCK(ch);
812	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
813}
814
815static struct ipfw_sopt_handler	scodes[] = {
816	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
817	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
818	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
819	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
820	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
821	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
822	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states_v0 },
823	{ IP_FW_NAT64LSN_LIST_STATES,1,	HDIR_GET,	nat64lsn_states_v1 },
824};
825
826static int
827nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
828{
829	ipfw_insn *icmd;
830
831	icmd = cmd - 1;
832	if (icmd->opcode != O_EXTERNAL_ACTION ||
833	    icmd->arg1 != V_nat64lsn_eid)
834		return (1);
835
836	*puidx = cmd->arg1;
837	*ptype = 0;
838	return (0);
839}
840
841static void
842nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
843{
844
845	cmd->arg1 = idx;
846}
847
848static int
849nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
850    struct named_object **pno)
851{
852	int err;
853
854	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
855	    IPFW_TLV_NAT64LSN_NAME, pno);
856	return (err);
857}
858
859static struct named_object *
860nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
861{
862	struct namedobj_instance *ni;
863	struct named_object *no;
864
865	IPFW_UH_WLOCK_ASSERT(ch);
866	ni = CHAIN_TO_SRV(ch);
867	no = ipfw_objhash_lookup_kidx(ni, idx);
868	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
869
870	return (no);
871}
872
873static int
874nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
875    enum ipfw_sets_cmd cmd)
876{
877
878	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
879	    set, new_set, cmd));
880}
881
882static struct opcode_obj_rewrite opcodes[] = {
883	{
884		.opcode = O_EXTERNAL_INSTANCE,
885		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
886		.classifier = nat64lsn_classify,
887		.update = nat64lsn_update_arg1,
888		.find_byname = nat64lsn_findbyname,
889		.find_bykidx = nat64lsn_findbykidx,
890		.manage_sets = nat64lsn_manage_sets,
891	},
892};
893
894static int
895destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
896    void *arg)
897{
898	struct nat64lsn_cfg *cfg;
899	struct ip_fw_chain *ch;
900
901	ch = (struct ip_fw_chain *)arg;
902	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
903	SRV_OBJECT(ch, no->kidx) = NULL;
904	nat64lsn_detach_config(ch, cfg);
905	nat64lsn_destroy_instance(cfg);
906	return (0);
907}
908
909int
910nat64lsn_init(struct ip_fw_chain *ch, int first)
911{
912
913	if (first != 0)
914		nat64lsn_init_internal();
915	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
916	if (V_nat64lsn_eid == 0)
917		return (ENXIO);
918	IPFW_ADD_SOPT_HANDLER(first, scodes);
919	IPFW_ADD_OBJ_REWRITER(first, opcodes);
920	return (0);
921}
922
923void
924nat64lsn_uninit(struct ip_fw_chain *ch, int last)
925{
926
927	IPFW_DEL_OBJ_REWRITER(last, opcodes);
928	IPFW_DEL_SOPT_HANDLER(last, scodes);
929	ipfw_del_eaction(ch, V_nat64lsn_eid);
930	/*
931	 * Since we already have deregistered external action,
932	 * our named objects become unaccessible via rules, because
933	 * all rules were truncated by ipfw_del_eaction().
934	 * So, we can unlink and destroy our named objects without holding
935	 * IPFW_WLOCK().
936	 */
937	IPFW_UH_WLOCK(ch);
938	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
939	    IPFW_TLV_NAT64LSN_NAME);
940	V_nat64lsn_eid = 0;
941	IPFW_UH_WUNLOCK(ch);
942	if (last != 0)
943		nat64lsn_uninit_internal();
944}
945
946