nat64lsn.h revision 346211
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $FreeBSD: stable/11/sys/netpfil/ipfw/nat64/nat64lsn.h 346211 2019-04-14 12:35:58Z ae $
30 */
31
32#ifndef	_IP_FW_NAT64LSN_H_
33#define	_IP_FW_NAT64LSN_H_
34
35#include "ip_fw_nat64.h"
36#include "nat64_translate.h"
37
38#define	NAT64_CHUNK_SIZE_BITS	6	/* 64 ports */
39#define	NAT64_CHUNK_SIZE	(1 << NAT64_CHUNK_SIZE_BITS)
40
41#define	NAT64_MIN_PORT		1024
42#define	NAT64_MIN_CHUNK		(NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
43
44struct st_ptr {
45	uint8_t			idx;	/* index in nh->pg_ptr array.
46					 * NOTE: it starts from 1.
47					 */
48	uint8_t			off;
49};
50#define	NAT64LSN_MAXPGPTR	((1 << (sizeof(uint8_t) * NBBY)) - 1)
51#define	NAT64LSN_PGPTRMASKBITS	(sizeof(uint64_t) * NBBY)
52#define	NAT64LSN_PGPTRNMASK	(roundup(NAT64LSN_MAXPGPTR,	\
53    NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
54
55struct nat64lsn_portgroup;
56/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
57struct nat64lsn_host {
58	struct rwlock	h_lock;		/* Host states lock */
59
60	struct in6_addr	addr;
61	struct nat64lsn_host	*next;
62	uint16_t	timestamp;	/* Last altered */
63	uint16_t	hsize;		/* ports hash size */
64	uint16_t	pg_used;	/* Number of portgroups used */
65#define	NAT64LSN_REMAININGPG	8	/* Number of remaining PG before
66					 * requesting of new chunk of indexes.
67					 */
68	uint16_t	pg_allocated;	/* Number of portgroups indexes
69					 * allocated.
70					 */
71#define	NAT64LSN_HSIZE	64
72	struct st_ptr	phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
73	/*
74	 * PG indexes are stored in chunks with 32 elements.
75	 * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
76	 */
77#define	NAT64LSN_PGIDX_CHUNK	32
78#define	NAT64LSN_PGNIDX		(roundup(NAT64LSN_MAXPGPTR, \
79    NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
80	struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
81};
82
83#define	NAT64_RLOCK_ASSERT(h)	rw_assert(&(h)->h_lock, RA_RLOCKED)
84#define	NAT64_WLOCK_ASSERT(h)	rw_assert(&(h)->h_lock, RA_WLOCKED)
85
86#define	NAT64_RLOCK(h)		rw_rlock(&(h)->h_lock)
87#define	NAT64_RUNLOCK(h)	rw_runlock(&(h)->h_lock)
88#define	NAT64_WLOCK(h)		rw_wlock(&(h)->h_lock)
89#define	NAT64_WUNLOCK(h)	rw_wunlock(&(h)->h_lock)
90#define	NAT64_LOCK(h)		NAT64_WLOCK(h)
91#define	NAT64_UNLOCK(h)		NAT64_WUNLOCK(h)
92#define	NAT64_LOCK_INIT(h) do {			\
93	rw_init(&(h)->h_lock, "NAT64 host lock");	\
94	} while (0)
95
96#define	NAT64_LOCK_DESTROY(h) do {			\
97	rw_destroy(&(h)->h_lock);			\
98	} while (0)
99
100/* Internal proto index */
101#define	NAT_PROTO_TCP	1
102#define	NAT_PROTO_UDP	2
103#define	NAT_PROTO_ICMP	3
104
105#define	NAT_MAX_PROTO	4
106extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
107
108VNET_DECLARE(uint16_t, nat64lsn_eid);
109#define	V_nat64lsn_eid		VNET(nat64lsn_eid)
110#define	IPFW_TLV_NAT64LSN_NAME	IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
111
112/* Timestamp macro */
113#define	_CT		((int)time_uptime % 65536)
114#define	SET_AGE(x)	(x) = _CT
115#define	GET_AGE(x)	((_CT >= (x)) ? _CT - (x) :	\
116	(int)65536 + _CT - (x))
117
118#ifdef __LP64__
119/* ffsl() is capable of checking 64-bit ints */
120#define	_FFS64
121#endif
122
123/* 16 bytes */
124struct nat64lsn_state {
125	union {
126		struct {
127			in_addr_t	faddr;	/* Remote IPv4 address */
128			uint16_t	fport;	/* Remote IPv4 port */
129			uint16_t	lport;	/* Local IPv6 port */
130		}s;
131		uint64_t		hkey;
132	} u;
133	uint8_t		nat_proto;
134	uint8_t		flags;
135	uint16_t	timestamp;
136	struct st_ptr	cur; /* Index of portgroup in nat64lsn_host */
137	struct st_ptr	next; /* Next entry index */
138};
139
140/*
141 * 1024+32 bytes per 64 states, used to store state
142 * AND for outside-in state lookup
143 */
144struct nat64lsn_portgroup {
145	struct nat64lsn_host	*host;	/* IPv6 source host info */
146	in_addr_t		aaddr;	/* Alias addr, network format */
147	uint16_t		aport;	/* Base port */
148	uint16_t		timestamp;
149	uint8_t			nat_proto;
150	uint8_t			spare[3];
151	uint32_t		idx;
152#ifdef _FFS64
153	uint64_t		freemask;	/* Mask of free entries */
154#else
155	uint32_t		freemask[2];	/* Mask of free entries */
156#endif
157	struct nat64lsn_state	states[NAT64_CHUNK_SIZE]; /* State storage */
158};
159#ifdef _FFS64
160#define	PG_MARK_BUSY_IDX(_pg, _idx)	(_pg)->freemask &= ~((uint64_t)1<<(_idx))
161#define	PG_MARK_FREE_IDX(_pg, _idx)	(_pg)->freemask |= ((uint64_t)1<<(_idx))
162#define	PG_IS_FREE_IDX(_pg, _idx)	((_pg)->freemask & ((uint64_t)1<<(_idx)))
163#define	PG_IS_BUSY_IDX(_pg, _idx)	(PG_IS_FREE_IDX(_pg, _idx) == 0)
164#define	PG_GET_FREE_IDX(_pg)		(ffsll((_pg)->freemask))
165#define	PG_IS_EMPTY(_pg)		(((_pg)->freemask + 1) == 0)
166#else
167#define	PG_MARK_BUSY_IDX(_pg, _idx)	\
168	(_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
169#define	PG_MARK_FREE_IDX(_pg, _idx)	\
170	(_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx)  % 32))
171#define	PG_IS_FREE_IDX(_pg, _idx)	\
172	((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
173#define	PG_IS_BUSY_IDX(_pg, _idx)	(PG_IS_FREE_IDX(_pg, _idx) == 0)
174#define	PG_GET_FREE_IDX(_pg)		_pg_get_free_idx(_pg)
175#define	PG_IS_EMPTY(_pg)		\
176	((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
177
178static inline int
179_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
180{
181	int i;
182
183	if ((i = ffsl(pg->freemask[0])) != 0)
184		return (i);
185	if ((i = ffsl(pg->freemask[1])) != 0)
186		return (i + 32);
187	return (0);
188}
189
190#endif
191
192TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
193
194struct nat64lsn_cfg {
195	struct named_object	no;
196	struct nat64lsn_portgroup	**pg;	/* XXX: array of pointers */
197	struct nat64lsn_host	**ih;	/* Host hash */
198	uint32_t	prefix4;	/* IPv4 prefix */
199	uint32_t	pmask4;		/* IPv4 prefix mask */
200	uint32_t	ihsize;		/* IPv6 host hash size */
201	uint8_t		plen4;
202	uint8_t		nomatch_verdict;/* What to return to ipfw on no-match */
203
204	uint32_t	ihcount;	/* Number of items in host hash */
205	int		max_chunks;	/* Max chunks per client */
206	int		agg_prefix_len;	/* Prefix length to count */
207	int		agg_prefix_max;	/* Max hosts per agg prefix */
208	uint32_t	jmaxlen;	/* Max jobqueue length */
209	uint16_t	min_chunk;	/* Min port group # to use */
210	uint16_t	max_chunk;	/* Max port group # to use */
211	uint16_t	nh_delete_delay;	/* Stale host delete delay */
212	uint16_t	pg_delete_delay;	/* Stale portgroup del delay */
213	uint16_t	st_syn_ttl;	/* TCP syn expire */
214	uint16_t	st_close_ttl;	/* TCP fin expire */
215	uint16_t	st_estab_ttl;	/* TCP established expire */
216	uint16_t	st_udp_ttl;	/* UDP expire */
217	uint16_t	st_icmp_ttl;	/* ICMP expire */
218	uint32_t	protochunks[NAT_MAX_PROTO];/* Number of chunks used */
219	struct nat64_config	base;
220#define	NAT64LSN_FLAGSMASK	(NAT64_LOG | NAT64_ALLOW_PRIVATE)
221
222	struct callout		periodic;
223	struct callout		jcallout;
224	struct ip_fw_chain	*ch;
225	struct vnet		*vp;
226	struct nat64lsn_job_head	jhead;
227	int			jlen;
228	char			name[64];	/* Nat instance name */
229};
230
231struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
232    size_t numaddr);
233void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
234void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
235void nat64lsn_init_internal(void);
236void nat64lsn_uninit_internal(void);
237int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
238    ipfw_insn *cmd, int *done);
239
240void
241nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
242    const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
243    const char *px, int off);
244/*
245 * Portgroup layout
246 * addr x nat_proto x port_off
247 *
248 */
249
250#define	_ADDR_PG_PROTO_COUNT	(65536 >> NAT64_CHUNK_SIZE_BITS)
251#define	_ADDR_PG_COUNT		(_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
252
253#define	GET_ADDR_IDX(_cfg, _addr)	((_addr) - ((_cfg)->prefix4))
254#define	__GET_PORTGROUP_IDX(_proto, _port)	\
255    ((_proto - 1) * _ADDR_PG_PROTO_COUNT +	\
256	((_port) >> NAT64_CHUNK_SIZE_BITS))
257
258#define	_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)	\
259    GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT +	\
260	__GET_PORTGROUP_IDX(_proto, _port)
261#define	GET_PORTGROUP(_cfg, _addr, _proto, _port)	\
262    ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
263
264#define	PORTGROUP_CHUNK(_nh, _idx)		\
265    ((_nh)->pg_ptr[(_idx)])
266#define	PORTGROUP_BYSIDX(_cfg, _nh, _idx)	\
267    (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
268	[((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
269
270
271/* Chained hash table */
272#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do {			\
273	unsigned int _buck = _PX##hash(_key) & (_hsize - 1);		\
274	_PX##lock(_ph, _buck);						\
275	_x = _PX##first(_ph, _buck);					\
276	for ( ; _x != NULL; _x = _PX##next(_x)) {			\
277		if (_PX##cmp(_key, _PX##val(_x)))			\
278			break;						\
279	}								\
280	if (_x == NULL)							\
281		_PX##unlock(_ph, _buck);				\
282} while(0)
283
284#define	CHT_UNLOCK_BUCK(_ph, _PX, _buck)				\
285	_PX##unlock(_ph, _buck);
286
287#define	CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do {			\
288	unsigned int _buck = _PX##hash(_key) & (_hsize - 1);		\
289	_PX##unlock(_ph, _buck);					\
290} while(0)
291
292#define	CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do {			\
293	unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1);	\
294	_PX##lock(_ph, _buck);						\
295	_PX##next(_i) = _PX##first(_ph, _buck);				\
296	_PX##first(_ph, _buck) = _i;					\
297	_PX##unlock(_ph, _buck);					\
298} while(0)
299
300#define	CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do {		\
301	unsigned int _buck = _PX##hash(_key) & (_hsize - 1);		\
302	_PX##lock(_ph, _buck);						\
303	_x = _PX##first(_ph, _buck);					\
304	_tmp = NULL;							\
305	for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) {		\
306		if (_PX##cmp(_key, _PX##val(_x)))			\
307			break;						\
308	}								\
309	if (_x != NULL) {						\
310		if (_tmp == NULL)					\
311			_PX##first(_ph, _buck) = _PX##next(_x);		\
312		else							\
313			_PX##next(_tmp) = _PX##next(_x);		\
314	}								\
315	_PX##unlock(_ph, _buck);					\
316} while(0)
317
318#define	CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do {	\
319	for (unsigned int _i = 0; _i < _hsize; _i++) {			\
320		_PX##lock(_ph, _i);					\
321		_x = _PX##first(_ph, _i);				\
322		_tmp = NULL;						\
323		for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) {	\
324			if (_cb(_x, _arg) == 0)				\
325				continue;				\
326			if (_tmp == NULL)				\
327				_PX##first(_ph, _i) = _PX##next(_x);	\
328			else						\
329				_tmp = _PX##next(_x);			\
330		}							\
331		_PX##unlock(_ph, _i);					\
332	}								\
333} while(0)
334
335#define	CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do {	\
336	unsigned int _buck;						\
337	for (unsigned int _i = 0; _i < _hsize; _i++) {			\
338		_x = _PX##first(_ph, _i);				\
339		_y = _x;						\
340		while (_y != NULL) {					\
341			_buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
342			_y = _PX##next(_x);				\
343			_PX##next(_x) = _PX##first(_nph, _buck);	\
344			_PX##first(_nph, _buck) = _x;			\
345		}							\
346	}								\
347} while(0)
348
349#endif /* _IP_FW_NAT64LSN_H_ */
350
351