ip_fw_dynamic.c revision 243401
1/*-
2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: stable/9/sys/netpfil/ipfw/ip_fw_dynamic.c 243401 2012-11-22 12:11:32Z glebius $");
28
29#define        DEB(x)
30#define        DDB(x) x
31
32/*
33 * Dynamic rule support for ipfw
34 */
35
36#include "opt_ipfw.h"
37#include "opt_inet.h"
38#ifndef INET
39#error IPFIREWALL requires INET.
40#endif /* INET */
41#include "opt_inet6.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/kernel.h>
48#include <sys/lock.h>
49#include <sys/socket.h>
50#include <sys/sysctl.h>
51#include <sys/syslog.h>
52#include <net/ethernet.h> /* for ETHERTYPE_IP */
53#include <net/if.h>
54#include <net/vnet.h>
55
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#include <netinet/ip_var.h>	/* ip_defttl */
59#include <netinet/ip_fw.h>
60#include <netinet/tcp_var.h>
61#include <netinet/udp.h>
62
63#include <netinet/ip6.h>	/* IN6_ARE_ADDR_EQUAL */
64#ifdef INET6
65#include <netinet6/in6_var.h>
66#include <netinet6/ip6_var.h>
67#endif
68
69#include <netpfil/ipfw/ip_fw_private.h>
70
71#include <machine/in_cksum.h>	/* XXX for in_cksum */
72
73#ifdef MAC
74#include <security/mac/mac_framework.h>
75#endif
76
77/*
78 * Description of dynamic rules.
79 *
80 * Dynamic rules are stored in lists accessed through a hash table
81 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
82 * be modified through the sysctl variable dyn_buckets which is
83 * updated when the table becomes empty.
84 *
85 * XXX currently there is only one list, ipfw_dyn.
86 *
87 * When a packet is received, its address fields are first masked
88 * with the mask defined for the rule, then hashed, then matched
89 * against the entries in the corresponding list.
90 * Dynamic rules can be used for different purposes:
91 *  + stateful rules;
92 *  + enforcing limits on the number of sessions;
93 *  + in-kernel NAT (not implemented yet)
94 *
95 * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
96 * measured in seconds and depending on the flags.
97 *
98 * The total number of dynamic rules is stored in dyn_count.
99 * The max number of dynamic rules is dyn_max. When we reach
100 * the maximum number of rules we do not create anymore. This is
101 * done to avoid consuming too much memory, but also too much
102 * time when searching on each packet (ideally, we should try instead
103 * to put a limit on the length of the list on each bucket...).
104 *
105 * Each dynamic rule holds a pointer to the parent ipfw rule so
106 * we know what action to perform. Dynamic rules are removed when
107 * the parent rule is deleted. XXX we should make them survive.
108 *
109 * There are some limitations with dynamic rules -- we do not
110 * obey the 'randomized match', and we do not do multiple
111 * passes through the firewall. XXX check the latter!!!
112 */
113
114/*
115 * Static variables followed by global ones
116 */
117static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v);
118static VNET_DEFINE(u_int32_t, dyn_buckets);
119static VNET_DEFINE(u_int32_t, curr_dyn_buckets);
120static VNET_DEFINE(struct callout, ipfw_timeout);
121#define	V_ipfw_dyn_v			VNET(ipfw_dyn_v)
122#define	V_dyn_buckets			VNET(dyn_buckets)
123#define	V_curr_dyn_buckets		VNET(curr_dyn_buckets)
124#define V_ipfw_timeout                  VNET(ipfw_timeout)
125
126static uma_zone_t ipfw_dyn_rule_zone;
127#ifndef __FreeBSD__
128DEFINE_SPINLOCK(ipfw_dyn_mtx);
129#else
130static struct mtx ipfw_dyn_mtx;		/* mutex guarding dynamic rules */
131#endif
132
133#define	IPFW_DYN_LOCK_INIT() \
134	mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF)
135#define	IPFW_DYN_LOCK_DESTROY()	mtx_destroy(&ipfw_dyn_mtx)
136#define	IPFW_DYN_LOCK()		mtx_lock(&ipfw_dyn_mtx)
137#define	IPFW_DYN_UNLOCK()	mtx_unlock(&ipfw_dyn_mtx)
138#define	IPFW_DYN_LOCK_ASSERT()	mtx_assert(&ipfw_dyn_mtx, MA_OWNED)
139
140void
141ipfw_dyn_unlock(void)
142{
143	IPFW_DYN_UNLOCK();
144}
145
146/*
147 * Timeouts for various events in handing dynamic rules.
148 */
149static VNET_DEFINE(u_int32_t, dyn_ack_lifetime);
150static VNET_DEFINE(u_int32_t, dyn_syn_lifetime);
151static VNET_DEFINE(u_int32_t, dyn_fin_lifetime);
152static VNET_DEFINE(u_int32_t, dyn_rst_lifetime);
153static VNET_DEFINE(u_int32_t, dyn_udp_lifetime);
154static VNET_DEFINE(u_int32_t, dyn_short_lifetime);
155
156#define	V_dyn_ack_lifetime		VNET(dyn_ack_lifetime)
157#define	V_dyn_syn_lifetime		VNET(dyn_syn_lifetime)
158#define	V_dyn_fin_lifetime		VNET(dyn_fin_lifetime)
159#define	V_dyn_rst_lifetime		VNET(dyn_rst_lifetime)
160#define	V_dyn_udp_lifetime		VNET(dyn_udp_lifetime)
161#define	V_dyn_short_lifetime		VNET(dyn_short_lifetime)
162
163/*
164 * Keepalives are sent if dyn_keepalive is set. They are sent every
165 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
166 * seconds of lifetime of a rule.
167 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
168 * than dyn_keepalive_period.
169 */
170
171static VNET_DEFINE(u_int32_t, dyn_keepalive_interval);
172static VNET_DEFINE(u_int32_t, dyn_keepalive_period);
173static VNET_DEFINE(u_int32_t, dyn_keepalive);
174
175#define	V_dyn_keepalive_interval	VNET(dyn_keepalive_interval)
176#define	V_dyn_keepalive_period		VNET(dyn_keepalive_period)
177#define	V_dyn_keepalive			VNET(dyn_keepalive)
178
179static VNET_DEFINE(u_int32_t, dyn_count);	/* # of dynamic rules */
180static VNET_DEFINE(u_int32_t, dyn_max);		/* max # of dynamic rules */
181
182#define	V_dyn_count			VNET(dyn_count)
183#define	V_dyn_max			VNET(dyn_max)
184
185#ifdef SYSCTL_NODE
186
187SYSBEGIN(f2)
188
189SYSCTL_DECL(_net_inet_ip_fw);
190SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
191    CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0,
192    "Number of dyn. buckets");
193SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
194    CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0,
195    "Current Number of dyn. buckets");
196SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_count,
197    CTLFLAG_RD, &VNET_NAME(dyn_count), 0,
198    "Number of dyn. rules");
199SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_max,
200    CTLFLAG_RW, &VNET_NAME(dyn_max), 0,
201    "Max number of dyn. rules");
202SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
203    CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0,
204    "Lifetime of dyn. rules for acks");
205SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
206    CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0,
207    "Lifetime of dyn. rules for syn");
208SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
209    CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0,
210    "Lifetime of dyn. rules for fin");
211SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
212    CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0,
213    "Lifetime of dyn. rules for rst");
214SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
215    CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0,
216    "Lifetime of dyn. rules for UDP");
217SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
218    CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0,
219    "Lifetime of dyn. rules for other situations");
220SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
221    CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
222    "Enable keepalives for dyn. rules");
223
224SYSEND
225
226#endif /* SYSCTL_NODE */
227
228
229static __inline int
230hash_packet6(struct ipfw_flow_id *id)
231{
232	u_int32_t i;
233	i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
234	    (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
235	    (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
236	    (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
237	    (id->dst_port) ^ (id->src_port);
238	return i;
239}
240
241/*
242 * IMPORTANT: the hash function for dynamic rules must be commutative
243 * in source and destination (ip,port), because rules are bidirectional
244 * and we want to find both in the same bucket.
245 */
246static __inline int
247hash_packet(struct ipfw_flow_id *id)
248{
249	u_int32_t i;
250
251#ifdef INET6
252	if (IS_IP6_FLOW_ID(id))
253		i = hash_packet6(id);
254	else
255#endif /* INET6 */
256	i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
257	i &= (V_curr_dyn_buckets - 1);
258	return i;
259}
260
261static __inline void
262unlink_dyn_rule_print(struct ipfw_flow_id *id)
263{
264	struct in_addr da;
265#ifdef INET6
266	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
267#else
268	char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
269#endif
270
271#ifdef INET6
272	if (IS_IP6_FLOW_ID(id)) {
273		ip6_sprintf(src, &id->src_ip6);
274		ip6_sprintf(dst, &id->dst_ip6);
275	} else
276#endif
277	{
278		da.s_addr = htonl(id->src_ip);
279		inet_ntoa_r(da, src);
280		da.s_addr = htonl(id->dst_ip);
281		inet_ntoa_r(da, dst);
282	}
283	printf("ipfw: unlink entry %s %d -> %s %d, %d left\n",
284	    src, id->src_port, dst, id->dst_port, V_dyn_count - 1);
285}
286
287/**
288 * unlink a dynamic rule from a chain. prev is a pointer to
289 * the previous one, q is a pointer to the rule to delete,
290 * head is a pointer to the head of the queue.
291 * Modifies q and potentially also head.
292 */
293#define UNLINK_DYN_RULE(prev, head, q) {				\
294	ipfw_dyn_rule *old_q = q;					\
295									\
296	/* remove a refcount to the parent */				\
297	if (q->dyn_type == O_LIMIT)					\
298		q->parent->count--;					\
299	DEB(unlink_dyn_rule_print(&q->id);)				\
300	if (prev != NULL)						\
301		prev->next = q = q->next;				\
302	else								\
303		head = q = q->next;					\
304	V_dyn_count--;							\
305	uma_zfree(ipfw_dyn_rule_zone, old_q); }
306
307#define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
308
309/**
310 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL.
311 *
312 * If keep_me == NULL, rules are deleted even if not expired,
313 * otherwise only expired rules are removed.
314 *
315 * The value of the second parameter is also used to point to identify
316 * a rule we absolutely do not want to remove (e.g. because we are
317 * holding a reference to it -- this is the case with O_LIMIT_PARENT
318 * rules). The pointer is only used for comparison, so any non-null
319 * value will do.
320 */
321static void
322remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me)
323{
324	static u_int32_t last_remove = 0;
325
326#define FORCE (keep_me == NULL)
327
328	ipfw_dyn_rule *prev, *q;
329	int i, pass = 0, max_pass = 0;
330
331	IPFW_DYN_LOCK_ASSERT();
332
333	if (V_ipfw_dyn_v == NULL || V_dyn_count == 0)
334		return;
335	/* do not expire more than once per second, it is useless */
336	if (!FORCE && last_remove == time_uptime)
337		return;
338	last_remove = time_uptime;
339
340	/*
341	 * because O_LIMIT refer to parent rules, during the first pass only
342	 * remove child and mark any pending LIMIT_PARENT, and remove
343	 * them in a second pass.
344	 */
345next_pass:
346	for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
347		for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) {
348			/*
349			 * Logic can become complex here, so we split tests.
350			 */
351			if (q == keep_me)
352				goto next;
353			if (rule != NULL && rule != q->rule)
354				goto next; /* not the one we are looking for */
355			if (q->dyn_type == O_LIMIT_PARENT) {
356				/*
357				 * handle parent in the second pass,
358				 * record we need one.
359				 */
360				max_pass = 1;
361				if (pass == 0)
362					goto next;
363				if (FORCE && q->count != 0 ) {
364					/* XXX should not happen! */
365					printf("ipfw: OUCH! cannot remove rule,"
366					     " count %d\n", q->count);
367				}
368			} else {
369				if (!FORCE &&
370				    !TIME_LEQ( q->expire, time_uptime ))
371					goto next;
372			}
373             if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
374                     UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q);
375                     continue;
376             }
377next:
378			prev=q;
379			q=q->next;
380		}
381	}
382	if (pass++ < max_pass)
383		goto next_pass;
384}
385
386void
387ipfw_remove_dyn_children(struct ip_fw *rule)
388{
389	IPFW_DYN_LOCK();
390	remove_dyn_rule(rule, NULL /* force removal */);
391	IPFW_DYN_UNLOCK();
392}
393
394/*
395 * Lookup a dynamic rule, locked version.
396 */
397static ipfw_dyn_rule *
398lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
399    struct tcphdr *tcp)
400{
401	/*
402	 * Stateful ipfw extensions.
403	 * Lookup into dynamic session queue.
404	 */
405#define MATCH_REVERSE	0
406#define MATCH_FORWARD	1
407#define MATCH_NONE	2
408#define MATCH_UNKNOWN	3
409	int i, dir = MATCH_NONE;
410	ipfw_dyn_rule *prev, *q = NULL;
411
412	IPFW_DYN_LOCK_ASSERT();
413
414	if (V_ipfw_dyn_v == NULL)
415		goto done;				/* not found */
416	i = hash_packet(pkt);
417	for (prev = NULL, q = V_ipfw_dyn_v[i]; q != NULL;) {
418		if (q->dyn_type == O_LIMIT_PARENT && q->count)
419			goto next;
420		if (TIME_LEQ(q->expire, time_uptime)) {	/* expire entry */
421			UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q);
422			continue;
423		}
424		if (pkt->proto != q->id.proto || q->dyn_type == O_LIMIT_PARENT)
425			goto next;
426
427		if (IS_IP6_FLOW_ID(pkt)) {
428			if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) &&
429			    IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) &&
430			    pkt->src_port == q->id.src_port &&
431			    pkt->dst_port == q->id.dst_port) {
432				dir = MATCH_FORWARD;
433				break;
434			}
435			if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) &&
436			    IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) &&
437			    pkt->src_port == q->id.dst_port &&
438			    pkt->dst_port == q->id.src_port) {
439				dir = MATCH_REVERSE;
440				break;
441			}
442		} else {
443			if (pkt->src_ip == q->id.src_ip &&
444			    pkt->dst_ip == q->id.dst_ip &&
445			    pkt->src_port == q->id.src_port &&
446			    pkt->dst_port == q->id.dst_port) {
447				dir = MATCH_FORWARD;
448				break;
449			}
450			if (pkt->src_ip == q->id.dst_ip &&
451			    pkt->dst_ip == q->id.src_ip &&
452			    pkt->src_port == q->id.dst_port &&
453			    pkt->dst_port == q->id.src_port) {
454				dir = MATCH_REVERSE;
455				break;
456			}
457		}
458next:
459		prev = q;
460		q = q->next;
461	}
462	if (q == NULL)
463		goto done;	/* q = NULL, not found */
464
465	if (prev != NULL) {	/* found and not in front */
466		prev->next = q->next;
467		q->next = V_ipfw_dyn_v[i];
468		V_ipfw_dyn_v[i] = q;
469	}
470	if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
471		uint32_t ack;
472		u_char flags = pkt->_flags & (TH_FIN | TH_SYN | TH_RST);
473
474#define BOTH_SYN	(TH_SYN | (TH_SYN << 8))
475#define BOTH_FIN	(TH_FIN | (TH_FIN << 8))
476#define	TCP_FLAGS	(TH_FLAGS | (TH_FLAGS << 8))
477#define	ACK_FWD		0x10000			/* fwd ack seen */
478#define	ACK_REV		0x20000			/* rev ack seen */
479
480		q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
481		switch (q->state & TCP_FLAGS) {
482		case TH_SYN:			/* opening */
483			q->expire = time_uptime + V_dyn_syn_lifetime;
484			break;
485
486		case BOTH_SYN:			/* move to established */
487		case BOTH_SYN | TH_FIN:		/* one side tries to close */
488		case BOTH_SYN | (TH_FIN << 8):
489#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
490			if (tcp == NULL)
491				break;
492
493			ack = ntohl(tcp->th_ack);
494			if (dir == MATCH_FORWARD) {
495				if (q->ack_fwd == 0 ||
496				    _SEQ_GE(ack, q->ack_fwd)) {
497					q->ack_fwd = ack;
498					q->state |= ACK_FWD;
499				}
500			} else {
501				if (q->ack_rev == 0 ||
502				    _SEQ_GE(ack, q->ack_rev)) {
503					q->ack_rev = ack;
504					q->state |= ACK_REV;
505				}
506			}
507			if ((q->state & (ACK_FWD | ACK_REV)) ==
508			    (ACK_FWD | ACK_REV)) {
509				q->expire = time_uptime + V_dyn_ack_lifetime;
510				q->state &= ~(ACK_FWD | ACK_REV);
511			}
512			break;
513
514		case BOTH_SYN | BOTH_FIN:	/* both sides closed */
515			if (V_dyn_fin_lifetime >= V_dyn_keepalive_period)
516				V_dyn_fin_lifetime = V_dyn_keepalive_period - 1;
517			q->expire = time_uptime + V_dyn_fin_lifetime;
518			break;
519
520		default:
521#if 0
522			/*
523			 * reset or some invalid combination, but can also
524			 * occur if we use keep-state the wrong way.
525			 */
526			if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
527				printf("invalid state: 0x%x\n", q->state);
528#endif
529			if (V_dyn_rst_lifetime >= V_dyn_keepalive_period)
530				V_dyn_rst_lifetime = V_dyn_keepalive_period - 1;
531			q->expire = time_uptime + V_dyn_rst_lifetime;
532			break;
533		}
534	} else if (pkt->proto == IPPROTO_UDP) {
535		q->expire = time_uptime + V_dyn_udp_lifetime;
536	} else {
537		/* other protocols */
538		q->expire = time_uptime + V_dyn_short_lifetime;
539	}
540done:
541	if (match_direction != NULL)
542		*match_direction = dir;
543	return (q);
544}
545
546ipfw_dyn_rule *
547ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
548    struct tcphdr *tcp)
549{
550	ipfw_dyn_rule *q;
551
552	IPFW_DYN_LOCK();
553	q = lookup_dyn_rule_locked(pkt, match_direction, tcp);
554	if (q == NULL)
555		IPFW_DYN_UNLOCK();
556	/* NB: return table locked when q is not NULL */
557	return q;
558}
559
560static void
561realloc_dynamic_table(void)
562{
563	IPFW_DYN_LOCK_ASSERT();
564
565	/*
566	 * Try reallocation, make sure we have a power of 2 and do
567	 * not allow more than 64k entries. In case of overflow,
568	 * default to 1024.
569	 */
570
571	if (V_dyn_buckets > 65536)
572		V_dyn_buckets = 1024;
573	if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */
574		V_dyn_buckets = V_curr_dyn_buckets; /* reset */
575		return;
576	}
577	V_curr_dyn_buckets = V_dyn_buckets;
578	if (V_ipfw_dyn_v != NULL)
579		free(V_ipfw_dyn_v, M_IPFW);
580	for (;;) {
581		V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *),
582		       M_IPFW, M_NOWAIT | M_ZERO);
583		if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2)
584			break;
585		V_curr_dyn_buckets /= 2;
586	}
587}
588
589/**
590 * Install state of type 'type' for a dynamic session.
591 * The hash table contains two type of rules:
592 * - regular rules (O_KEEP_STATE)
593 * - rules for sessions with limited number of sess per user
594 *   (O_LIMIT). When they are created, the parent is
595 *   increased by 1, and decreased on delete. In this case,
596 *   the third parameter is the parent rule and not the chain.
597 * - "parent" rules for the above (O_LIMIT_PARENT).
598 */
599static ipfw_dyn_rule *
600add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
601{
602	ipfw_dyn_rule *r;
603	int i;
604
605	IPFW_DYN_LOCK_ASSERT();
606
607	if (V_ipfw_dyn_v == NULL ||
608	    (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) {
609		realloc_dynamic_table();
610		if (V_ipfw_dyn_v == NULL)
611			return NULL; /* failed ! */
612	}
613	i = hash_packet(id);
614
615	r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO);
616	if (r == NULL) {
617		printf ("ipfw: sorry cannot allocate state\n");
618		return NULL;
619	}
620
621	/* increase refcount on parent, and set pointer */
622	if (dyn_type == O_LIMIT) {
623		ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
624		if ( parent->dyn_type != O_LIMIT_PARENT)
625			panic("invalid parent");
626		parent->count++;
627		r->parent = parent;
628		rule = parent->rule;
629	}
630
631	r->id = *id;
632	r->expire = time_uptime + V_dyn_syn_lifetime;
633	r->rule = rule;
634	r->dyn_type = dyn_type;
635	r->pcnt = r->bcnt = 0;
636	r->count = 0;
637
638	r->bucket = i;
639	r->next = V_ipfw_dyn_v[i];
640	V_ipfw_dyn_v[i] = r;
641	V_dyn_count++;
642	DEB({
643		struct in_addr da;
644#ifdef INET6
645		char src[INET6_ADDRSTRLEN];
646		char dst[INET6_ADDRSTRLEN];
647#else
648		char src[INET_ADDRSTRLEN];
649		char dst[INET_ADDRSTRLEN];
650#endif
651
652#ifdef INET6
653		if (IS_IP6_FLOW_ID(&(r->id))) {
654			ip6_sprintf(src, &r->id.src_ip6);
655			ip6_sprintf(dst, &r->id.dst_ip6);
656		} else
657#endif
658		{
659			da.s_addr = htonl(r->id.src_ip);
660			inet_ntoa_r(da, src);
661			da.s_addr = htonl(r->id.dst_ip);
662			inet_ntoa_r(da, dst);
663		}
664		printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n",
665		    dyn_type, src, r->id.src_port, dst, r->id.dst_port,
666		    V_dyn_count);
667	})
668	return r;
669}
670
671/**
672 * lookup dynamic parent rule using pkt and rule as search keys.
673 * If the lookup fails, then install one.
674 */
675static ipfw_dyn_rule *
676lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule)
677{
678	ipfw_dyn_rule *q;
679	int i;
680
681	IPFW_DYN_LOCK_ASSERT();
682
683	if (V_ipfw_dyn_v) {
684		int is_v6 = IS_IP6_FLOW_ID(pkt);
685		i = hash_packet( pkt );
686		for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next)
687			if (q->dyn_type == O_LIMIT_PARENT &&
688			    rule== q->rule &&
689			    pkt->proto == q->id.proto &&
690			    pkt->src_port == q->id.src_port &&
691			    pkt->dst_port == q->id.dst_port &&
692			    (
693				(is_v6 &&
694				 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
695					&(q->id.src_ip6)) &&
696				 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
697					&(q->id.dst_ip6))) ||
698				(!is_v6 &&
699				 pkt->src_ip == q->id.src_ip &&
700				 pkt->dst_ip == q->id.dst_ip)
701			    )
702			) {
703				q->expire = time_uptime + V_dyn_short_lifetime;
704				DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
705				return q;
706			}
707	}
708	return add_dyn_rule(pkt, O_LIMIT_PARENT, rule);
709}
710
711/**
712 * Install dynamic state for rule type cmd->o.opcode
713 *
714 * Returns 1 (failure) if state is not installed because of errors or because
715 * session limitations are enforced.
716 */
717int
718ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
719    struct ip_fw_args *args, uint32_t tablearg)
720{
721	static int last_log;
722	ipfw_dyn_rule *q;
723	struct in_addr da;
724#ifdef INET6
725	char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2];
726#else
727	char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
728#endif
729
730	src[0] = '\0';
731	dst[0] = '\0';
732
733	IPFW_DYN_LOCK();
734
735	DEB(
736#ifdef INET6
737	if (IS_IP6_FLOW_ID(&(args->f_id))) {
738		ip6_sprintf(src, &args->f_id.src_ip6);
739		ip6_sprintf(dst, &args->f_id.dst_ip6);
740	} else
741#endif
742	{
743		da.s_addr = htonl(args->f_id.src_ip);
744		inet_ntoa_r(da, src);
745		da.s_addr = htonl(args->f_id.dst_ip);
746		inet_ntoa_r(da, dst);
747	}
748	printf("ipfw: %s: type %d %s %u -> %s %u\n",
749	    __func__, cmd->o.opcode, src, args->f_id.src_port,
750	    dst, args->f_id.dst_port);
751	src[0] = '\0';
752	dst[0] = '\0';
753	)
754
755	q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
756
757	if (q != NULL) {	/* should never occur */
758		DEB(
759		if (last_log != time_uptime) {
760			last_log = time_uptime;
761			printf("ipfw: %s: entry already present, done\n",
762			    __func__);
763		})
764		IPFW_DYN_UNLOCK();
765		return (0);
766	}
767
768	if (V_dyn_count >= V_dyn_max)
769		/* Run out of slots, try to remove any expired rule. */
770		remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
771
772	if (V_dyn_count >= V_dyn_max) {
773		if (last_log != time_uptime) {
774			last_log = time_uptime;
775			printf("ipfw: %s: Too many dynamic rules\n", __func__);
776		}
777		IPFW_DYN_UNLOCK();
778		return (1);	/* cannot install, notify caller */
779	}
780
781	switch (cmd->o.opcode) {
782	case O_KEEP_STATE:	/* bidir rule */
783		add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
784		break;
785
786	case O_LIMIT: {		/* limit number of sessions */
787		struct ipfw_flow_id id;
788		ipfw_dyn_rule *parent;
789		uint32_t conn_limit;
790		uint16_t limit_mask = cmd->limit_mask;
791
792		conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ?
793		    tablearg : cmd->conn_limit;
794
795		DEB(
796		if (cmd->conn_limit == IP_FW_TABLEARG)
797			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
798			    "(tablearg)\n", __func__, conn_limit);
799		else
800			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
801			    __func__, conn_limit);
802		)
803
804		id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
805		id.proto = args->f_id.proto;
806		id.addr_type = args->f_id.addr_type;
807		id.fib = M_GETFIB(args->m);
808
809		if (IS_IP6_FLOW_ID (&(args->f_id))) {
810			if (limit_mask & DYN_SRC_ADDR)
811				id.src_ip6 = args->f_id.src_ip6;
812			if (limit_mask & DYN_DST_ADDR)
813				id.dst_ip6 = args->f_id.dst_ip6;
814		} else {
815			if (limit_mask & DYN_SRC_ADDR)
816				id.src_ip = args->f_id.src_ip;
817			if (limit_mask & DYN_DST_ADDR)
818				id.dst_ip = args->f_id.dst_ip;
819		}
820		if (limit_mask & DYN_SRC_PORT)
821			id.src_port = args->f_id.src_port;
822		if (limit_mask & DYN_DST_PORT)
823			id.dst_port = args->f_id.dst_port;
824		if ((parent = lookup_dyn_parent(&id, rule)) == NULL) {
825			printf("ipfw: %s: add parent failed\n", __func__);
826			IPFW_DYN_UNLOCK();
827			return (1);
828		}
829
830		if (parent->count >= conn_limit) {
831			/* See if we can remove some expired rule. */
832			remove_dyn_rule(rule, parent);
833			if (parent->count >= conn_limit) {
834				if (V_fw_verbose && last_log != time_uptime) {
835					last_log = time_uptime;
836#ifdef INET6
837					/*
838					 * XXX IPv6 flows are not
839					 * supported yet.
840					 */
841					if (IS_IP6_FLOW_ID(&(args->f_id))) {
842						char ip6buf[INET6_ADDRSTRLEN];
843						snprintf(src, sizeof(src),
844						    "[%s]", ip6_sprintf(ip6buf,
845							&args->f_id.src_ip6));
846						snprintf(dst, sizeof(dst),
847						    "[%s]", ip6_sprintf(ip6buf,
848							&args->f_id.dst_ip6));
849					} else
850#endif
851					{
852						da.s_addr =
853						    htonl(args->f_id.src_ip);
854						inet_ntoa_r(da, src);
855						da.s_addr =
856						    htonl(args->f_id.dst_ip);
857						inet_ntoa_r(da, dst);
858					}
859					log(LOG_SECURITY | LOG_DEBUG,
860					    "ipfw: %d %s %s:%u -> %s:%u, %s\n",
861					    parent->rule->rulenum,
862					    "drop session",
863					    src, (args->f_id.src_port),
864					    dst, (args->f_id.dst_port),
865					    "too many entries");
866				}
867				IPFW_DYN_UNLOCK();
868				return (1);
869			}
870		}
871		add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
872		break;
873	}
874	default:
875		printf("ipfw: %s: unknown dynamic rule type %u\n",
876		    __func__, cmd->o.opcode);
877		IPFW_DYN_UNLOCK();
878		return (1);
879	}
880
881	/* XXX just set lifetime */
882	lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
883
884	IPFW_DYN_UNLOCK();
885	return (0);
886}
887
888/*
889 * Generate a TCP packet, containing either a RST or a keepalive.
890 * When flags & TH_RST, we are sending a RST packet, because of a
891 * "reset" action matched the packet.
892 * Otherwise we are sending a keepalive, and flags & TH_
893 * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
894 * so that MAC can label the reply appropriately.
895 */
896struct mbuf *
897ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
898    u_int32_t ack, int flags)
899{
900	struct mbuf *m = NULL;		/* stupid compiler */
901	int len, dir;
902	struct ip *h = NULL;		/* stupid compiler */
903#ifdef INET6
904	struct ip6_hdr *h6 = NULL;
905#endif
906	struct tcphdr *th = NULL;
907
908	MGETHDR(m, M_DONTWAIT, MT_DATA);
909	if (m == NULL)
910		return (NULL);
911
912	M_SETFIB(m, id->fib);
913#ifdef MAC
914	if (replyto != NULL)
915		mac_netinet_firewall_reply(replyto, m);
916	else
917		mac_netinet_firewall_send(m);
918#else
919	(void)replyto;		/* don't warn about unused arg */
920#endif
921
922	switch (id->addr_type) {
923	case 4:
924		len = sizeof(struct ip) + sizeof(struct tcphdr);
925		break;
926#ifdef INET6
927	case 6:
928		len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
929		break;
930#endif
931	default:
932		/* XXX: log me?!? */
933		FREE_PKT(m);
934		return (NULL);
935	}
936	dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
937
938	m->m_data += max_linkhdr;
939	m->m_flags |= M_SKIP_FIREWALL;
940	m->m_pkthdr.len = m->m_len = len;
941	m->m_pkthdr.rcvif = NULL;
942	bzero(m->m_data, len);
943
944	switch (id->addr_type) {
945	case 4:
946		h = mtod(m, struct ip *);
947
948		/* prepare for checksum */
949		h->ip_p = IPPROTO_TCP;
950		h->ip_len = htons(sizeof(struct tcphdr));
951		if (dir) {
952			h->ip_src.s_addr = htonl(id->src_ip);
953			h->ip_dst.s_addr = htonl(id->dst_ip);
954		} else {
955			h->ip_src.s_addr = htonl(id->dst_ip);
956			h->ip_dst.s_addr = htonl(id->src_ip);
957		}
958
959		th = (struct tcphdr *)(h + 1);
960		break;
961#ifdef INET6
962	case 6:
963		h6 = mtod(m, struct ip6_hdr *);
964
965		/* prepare for checksum */
966		h6->ip6_nxt = IPPROTO_TCP;
967		h6->ip6_plen = htons(sizeof(struct tcphdr));
968		if (dir) {
969			h6->ip6_src = id->src_ip6;
970			h6->ip6_dst = id->dst_ip6;
971		} else {
972			h6->ip6_src = id->dst_ip6;
973			h6->ip6_dst = id->src_ip6;
974		}
975
976		th = (struct tcphdr *)(h6 + 1);
977		break;
978#endif
979	}
980
981	if (dir) {
982		th->th_sport = htons(id->src_port);
983		th->th_dport = htons(id->dst_port);
984	} else {
985		th->th_sport = htons(id->dst_port);
986		th->th_dport = htons(id->src_port);
987	}
988	th->th_off = sizeof(struct tcphdr) >> 2;
989
990	if (flags & TH_RST) {
991		if (flags & TH_ACK) {
992			th->th_seq = htonl(ack);
993			th->th_flags = TH_RST;
994		} else {
995			if (flags & TH_SYN)
996				seq++;
997			th->th_ack = htonl(seq);
998			th->th_flags = TH_RST | TH_ACK;
999		}
1000	} else {
1001		/*
1002		 * Keepalive - use caller provided sequence numbers
1003		 */
1004		th->th_seq = htonl(seq);
1005		th->th_ack = htonl(ack);
1006		th->th_flags = TH_ACK;
1007	}
1008
1009	switch (id->addr_type) {
1010	case 4:
1011		th->th_sum = in_cksum(m, len);
1012
1013		/* finish the ip header */
1014		h->ip_v = 4;
1015		h->ip_hl = sizeof(*h) >> 2;
1016		h->ip_tos = IPTOS_LOWDELAY;
1017		h->ip_off = 0;
1018		/* ip_len must be in host format for ip_output */
1019		h->ip_len = len;
1020		h->ip_ttl = V_ip_defttl;
1021		h->ip_sum = 0;
1022		break;
1023#ifdef INET6
1024	case 6:
1025		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
1026		    sizeof(struct tcphdr));
1027
1028		/* finish the ip6 header */
1029		h6->ip6_vfc |= IPV6_VERSION;
1030		h6->ip6_hlim = IPV6_DEFHLIM;
1031		break;
1032#endif
1033	}
1034
1035	return (m);
1036}
1037
1038/*
1039 * This procedure is only used to handle keepalives. It is invoked
1040 * every dyn_keepalive_period
1041 */
1042static void
1043ipfw_tick(void * vnetx)
1044{
1045	struct mbuf *m0, *m, *mnext, **mtailp;
1046#ifdef INET6
1047	struct mbuf *m6, **m6_tailp;
1048#endif
1049	int i;
1050	ipfw_dyn_rule *q;
1051#ifdef VIMAGE
1052	struct vnet *vp = vnetx;
1053#endif
1054
1055	CURVNET_SET(vp);
1056	if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0)
1057		goto done;
1058
1059	/*
1060	 * We make a chain of packets to go out here -- not deferring
1061	 * until after we drop the IPFW dynamic rule lock would result
1062	 * in a lock order reversal with the normal packet input -> ipfw
1063	 * call stack.
1064	 */
1065	m0 = NULL;
1066	mtailp = &m0;
1067#ifdef INET6
1068	m6 = NULL;
1069	m6_tailp = &m6;
1070#endif
1071	IPFW_DYN_LOCK();
1072	for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
1073		for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) {
1074			if (q->dyn_type == O_LIMIT_PARENT)
1075				continue;
1076			if (q->id.proto != IPPROTO_TCP)
1077				continue;
1078			if ( (q->state & BOTH_SYN) != BOTH_SYN)
1079				continue;
1080			if (TIME_LEQ(time_uptime + V_dyn_keepalive_interval,
1081			    q->expire))
1082				continue;	/* too early */
1083			if (TIME_LEQ(q->expire, time_uptime))
1084				continue;	/* too late, rule expired */
1085
1086			m = (q->state & ACK_REV) ? NULL :
1087			    ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1,
1088			    q->ack_fwd, TH_SYN);
1089			mnext = (q->state & ACK_FWD) ? NULL :
1090			    ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1,
1091			    q->ack_rev, 0);
1092
1093			switch (q->id.addr_type) {
1094			case 4:
1095				if (m != NULL) {
1096					*mtailp = m;
1097					mtailp = &(*mtailp)->m_nextpkt;
1098				}
1099				if (mnext != NULL) {
1100					*mtailp = mnext;
1101					mtailp = &(*mtailp)->m_nextpkt;
1102				}
1103				break;
1104#ifdef INET6
1105			case 6:
1106				if (m != NULL) {
1107					*m6_tailp = m;
1108					m6_tailp = &(*m6_tailp)->m_nextpkt;
1109				}
1110				if (mnext != NULL) {
1111					*m6_tailp = mnext;
1112					m6_tailp = &(*m6_tailp)->m_nextpkt;
1113				}
1114				break;
1115#endif
1116			}
1117		}
1118	}
1119	IPFW_DYN_UNLOCK();
1120	for (m = m0; m != NULL; m = mnext) {
1121		mnext = m->m_nextpkt;
1122		m->m_nextpkt = NULL;
1123		ip_output(m, NULL, NULL, 0, NULL, NULL);
1124	}
1125#ifdef INET6
1126	for (m = m6; m != NULL; m = mnext) {
1127		mnext = m->m_nextpkt;
1128		m->m_nextpkt = NULL;
1129		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1130	}
1131#endif
1132done:
1133	callout_reset_on(&V_ipfw_timeout, V_dyn_keepalive_period * hz,
1134		      ipfw_tick, vnetx, 0);
1135	CURVNET_RESTORE();
1136}
1137
1138void
1139ipfw_dyn_attach(void)
1140{
1141        ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
1142            sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
1143            UMA_ALIGN_PTR, 0);
1144
1145        IPFW_DYN_LOCK_INIT();
1146}
1147
1148void
1149ipfw_dyn_detach(void)
1150{
1151        uma_zdestroy(ipfw_dyn_rule_zone);
1152        IPFW_DYN_LOCK_DESTROY();
1153}
1154
1155void
1156ipfw_dyn_init(void)
1157{
1158        V_ipfw_dyn_v = NULL;
1159        V_dyn_buckets = 256;    /* must be power of 2 */
1160        V_curr_dyn_buckets = 256; /* must be power of 2 */
1161
1162        V_dyn_ack_lifetime = 300;
1163        V_dyn_syn_lifetime = 20;
1164        V_dyn_fin_lifetime = 1;
1165        V_dyn_rst_lifetime = 1;
1166        V_dyn_udp_lifetime = 10;
1167        V_dyn_short_lifetime = 5;
1168
1169        V_dyn_keepalive_interval = 20;
1170        V_dyn_keepalive_period = 5;
1171        V_dyn_keepalive = 1;    /* do send keepalives */
1172
1173        V_dyn_max = 4096;       /* max # of dynamic rules */
1174        callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE);
1175        callout_reset_on(&V_ipfw_timeout, hz, ipfw_tick, curvnet, 0);
1176}
1177
1178void
1179ipfw_dyn_uninit(int pass)
1180{
1181	if (pass == 0)
1182		callout_drain(&V_ipfw_timeout);
1183	else {
1184		if (V_ipfw_dyn_v != NULL)
1185			free(V_ipfw_dyn_v, M_IPFW);
1186	}
1187}
1188
1189int
1190ipfw_dyn_len(void)
1191{
1192	return (V_ipfw_dyn_v == NULL) ? 0 :
1193		(V_dyn_count * sizeof(ipfw_dyn_rule));
1194}
1195
1196void
1197ipfw_get_dynamic(char **pbp, const char *ep)
1198{
1199	ipfw_dyn_rule *p, *last = NULL;
1200	char *bp;
1201	int i;
1202
1203	if (V_ipfw_dyn_v == NULL)
1204		return;
1205	bp = *pbp;
1206
1207	IPFW_DYN_LOCK();
1208	for (i = 0 ; i < V_curr_dyn_buckets; i++)
1209		for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) {
1210			if (bp + sizeof *p <= ep) {
1211				ipfw_dyn_rule *dst =
1212					(ipfw_dyn_rule *)bp;
1213				bcopy(p, dst, sizeof *p);
1214				bcopy(&(p->rule->rulenum), &(dst->rule),
1215				    sizeof(p->rule->rulenum));
1216				/*
1217				 * store set number into high word of
1218				 * dst->rule pointer.
1219				 */
1220				bcopy(&(p->rule->set),
1221				    (char *)&dst->rule +
1222				    sizeof(p->rule->rulenum),
1223				    sizeof(p->rule->set));
1224				/*
1225				 * store a non-null value in "next".
1226				 * The userland code will interpret a
1227				 * NULL here as a marker
1228				 * for the last dynamic rule.
1229				 */
1230				bcopy(&dst, &dst->next, sizeof(dst));
1231				last = dst;
1232				dst->expire =
1233				    TIME_LEQ(dst->expire, time_uptime) ?
1234					0 : dst->expire - time_uptime ;
1235				bp += sizeof(ipfw_dyn_rule);
1236			}
1237		}
1238	IPFW_DYN_UNLOCK();
1239	if (last != NULL) /* mark last dynamic rule */
1240		bzero(&last->next, sizeof(last));
1241	*pbp = bp;
1242}
1243/* end of file */
1244