ip_fw_sockopt.c revision 200590
1124758Semax/*-
2124758Semax * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
3124758Semax *
4124758Semax * Redistribution and use in source and binary forms, with or without
5124758Semax * modification, are permitted provided that the following conditions
6124758Semax * are met:
7124758Semax * 1. Redistributions of source code must retain the above copyright
8124758Semax *    notice, this list of conditions and the following disclaimer.
9124758Semax * 2. Redistributions in binary form must reproduce the above copyright
10124758Semax *    notice, this list of conditions and the following disclaimer in the
11124758Semax *    documentation and/or other materials provided with the distribution.
12124758Semax *
13124758Semax * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14124758Semax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15124758Semax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16124758Semax * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17124758Semax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18124758Semax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19124758Semax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20124758Semax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21124758Semax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22124758Semax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23124758Semax * SUCH DAMAGE.
24124758Semax */
25124758Semax
26124758Semax#include <sys/cdefs.h>
27124758Semax__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_sockopt.c 200590 2009-12-15 21:24:12Z luigi $");
28124758Semax
29124758Semax#define        DEB(x)
30124758Semax#define        DDB(x) x
31124758Semax
32124758Semax/*
33124758Semax * Sockopt support for ipfw
34124758Semax */
35124758Semax
36124758Semax#if !defined(KLD_MODULE)
37281210Stakawata#include "opt_ipfw.h"
38124758Semax#include "opt_ipdivert.h"
39124758Semax#include "opt_ipdn.h"
40124758Semax#include "opt_inet.h"
41124758Semax#ifndef INET
42124758Semax#error IPFIREWALL requires INET.
43124758Semax#endif /* INET */
44124758Semax#endif
45139721Semax#include "opt_inet6.h"
46124758Semax#include "opt_ipsec.h"
47124758Semax
48124758Semax#include <sys/param.h>
49124758Semax#include <sys/systm.h>
50124758Semax#include <sys/malloc.h>
51124758Semax#include <sys/kernel.h>
52124758Semax#include <sys/lock.h>
53124758Semax#include <sys/priv.h>
54124758Semax#include <sys/proc.h>
55124758Semax#include <sys/rwlock.h>
56124758Semax#include <sys/socket.h>
57124758Semax#include <sys/socketvar.h>
58124758Semax#include <sys/sysctl.h>
59124758Semax#include <sys/syslog.h>
60124758Semax#include <net/if.h>
61139721Semax#include <net/route.h>
62139721Semax#include <net/vnet.h>
63124758Semax
64124758Semax#include <netinet/in.h>
65124758Semax#include <netinet/ip_fw.h>
66124758Semax#include <netinet/ipfw/ip_fw_private.h>
67124758Semax#include <netinet/ip_divert.h>
68124758Semax
69124758Semax#include <netgraph/ng_ipfw.h>
70124758Semax
71124758Semax#ifdef MAC
72124758Semax#include <security/mac/mac_framework.h>
73124758Semax#endif
74124758Semax
75124758Semaxstatic VNET_DEFINE(int, autoinc_step);
76124758Semax#define	V_autoinc_step			VNET(autoinc_step)
77124758Semax
78124758Semaxstatic VNET_DEFINE(u_int32_t, static_count);	/* # of static rules */
79124758Semaxstatic VNET_DEFINE(u_int32_t, static_len);	/* bytes of static rules */
80124758Semax#define	V_static_count			VNET(static_count)
81124758Semax#define	V_static_len			VNET(static_len)
82124758Semax
83124758Semax#ifdef SYSCTL_NODE
84124758SemaxSYSCTL_DECL(_net_inet_ip_fw);
85124758SemaxSYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
86124758Semax    CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
87124758Semax    "Rule number auto-increment step");
88124758SemaxSYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
89124758Semax    CTLFLAG_RD, &VNET_NAME(static_count), 0,
90124758Semax    "Number of static rules");
91124758Semax
92124758Semax#endif /* SYSCTL_NODE */
93124758Semax
94124758Semax/*
95124758Semax * When a rule is added/deleted, clear the next_rule pointers in all rules.
96124758Semax * These will be reconstructed on the fly as packets are matched.
97124758Semax */
98124758Semaxstatic void
99124758Semaxflush_rule_ptrs(struct ip_fw_chain *chain)
100124758Semax{
101124758Semax	struct ip_fw *rule;
102124758Semax
103124758Semax	IPFW_WLOCK_ASSERT(chain);
104124758Semax
105124758Semax	chain->id++;
106124758Semax
107124758Semax	for (rule = chain->rules; rule; rule = rule->next)
108124758Semax		rule->next_rule = NULL;
109124758Semax}
110124758Semax
111124758Semax/*
112124758Semax * Add a new rule to the list. Copy the rule into a malloc'ed area, then
113124758Semax * possibly create a rule number and add the rule to the list.
114124758Semax * Update the rule_number in the input struct so the caller knows it as well.
115124758Semax */
116124758Semaxint
117124758Semaxipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
118124758Semax{
119124758Semax	struct ip_fw *rule, *f, *prev;
120124758Semax	int l = RULESIZE(input_rule);
121124758Semax
122124758Semax	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
123124758Semax		return (EINVAL);
124124758Semax
125124758Semax	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
126124758Semax	if (rule == NULL)
127124758Semax		return (ENOSPC);
128124758Semax
129124758Semax	bcopy(input_rule, rule, l);
130124758Semax
131124758Semax	rule->next = NULL;
132124758Semax	rule->next_rule = NULL;
133124758Semax
134124758Semax	rule->pcnt = 0;
135124758Semax	rule->bcnt = 0;
136124758Semax	rule->timestamp = 0;
137124758Semax
138124758Semax	IPFW_WLOCK(chain);
139124758Semax
140124758Semax	if (chain->rules == NULL) {	/* default rule */
141124758Semax		chain->rules = rule;
142124758Semax		rule->id = ++chain->id;
143124758Semax		goto done;
144124758Semax        }
145124758Semax
146124758Semax	/*
147124758Semax	 * If rulenum is 0, find highest numbered rule before the
148124758Semax	 * default rule, and add autoinc_step
149124758Semax	 */
150124758Semax	if (V_autoinc_step < 1)
151139721Semax		V_autoinc_step = 1;
152139721Semax	else if (V_autoinc_step > 1000)
153139721Semax		V_autoinc_step = 1000;
154124758Semax	if (rule->rulenum == 0) {
155124758Semax		/*
156124758Semax		 * locate the highest numbered rule before default
157139721Semax		 */
158139721Semax		for (f = chain->rules; f; f = f->next) {
159139721Semax			if (f->rulenum == IPFW_DEFAULT_RULE)
160139721Semax				break;
161139721Semax			rule->rulenum = f->rulenum;
162139721Semax		}
163139721Semax		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
164139721Semax			rule->rulenum += V_autoinc_step;
165139721Semax		input_rule->rulenum = rule->rulenum;
166139721Semax	}
167139721Semax
168139721Semax	/*
169139721Semax	 * Now insert the new rule in the right place in the sorted list.
170139721Semax	 */
171139721Semax	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
172139721Semax		if (f->rulenum > rule->rulenum) { /* found the location */
173139721Semax			if (prev) {
174139721Semax				rule->next = f;
175139721Semax				prev->next = rule;
176139721Semax			} else { /* head insert */
177139721Semax				rule->next = chain->rules;
178124758Semax				chain->rules = rule;
179124758Semax			}
180124758Semax			break;
181124758Semax		}
182124758Semax	}
183124758Semax	flush_rule_ptrs(chain);
184124758Semax	/* chain->id incremented inside flush_rule_ptrs() */
185124758Semax	rule->id = chain->id;
186124758Semaxdone:
187124758Semax	V_static_count++;
188124758Semax	V_static_len += l;
189139721Semax	IPFW_WUNLOCK(chain);
190139721Semax	DEB(printf("ipfw: installed rule %d, static count now %d\n",
191139721Semax		rule->rulenum, V_static_count);)
192139721Semax	return (0);
193139721Semax}
194139721Semax
195124758Semax/**
196124758Semax * Remove a static rule (including derived * dynamic rules)
197124758Semax * and place it on the ``reap list'' for later reclamation.
198124758Semax * The caller is in charge of clearing rule pointers to avoid
199124758Semax * dangling pointers.
200124758Semax * @return a pointer to the next entry.
201124758Semax * Arguments are not checked, so they better be correct.
202124758Semax */
203124758Semaxstatic struct ip_fw *
204124758Semaxremove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
205124758Semax    struct ip_fw *prev)
206124758Semax{
207124758Semax	struct ip_fw *n;
208124758Semax	int l = RULESIZE(rule);
209124758Semax
210124758Semax	IPFW_WLOCK_ASSERT(chain);
211124758Semax
212124758Semax	n = rule->next;
213124758Semax	remove_dyn_children(rule);
214124758Semax	if (prev == NULL)
215124758Semax		chain->rules = n;
216124758Semax	else
217124758Semax		prev->next = n;
218124758Semax	V_static_count--;
219124758Semax	V_static_len -= l;
220124758Semax
221124758Semax	rule->next = chain->reap;
222124758Semax	chain->reap = rule;
223124758Semax
224124758Semax	return n;
225124758Semax}
226124758Semax
227124758Semax/*
228124758Semax * Reclaim storage associated with a list of rules.  This is
229124758Semax * typically the list created using remove_rule.
230124758Semax * A NULL pointer on input is handled correctly.
231124758Semax */
232124758Semaxvoid
233124758Semaxipfw_reap_rules(struct ip_fw *head)
234124758Semax{
235124758Semax	struct ip_fw *rule;
236124758Semax
237124758Semax	while ((rule = head) != NULL) {
238124758Semax		head = head->next;
239124758Semax		free(rule, M_IPFW);
240124758Semax	}
241124758Semax}
242124758Semax
243124758Semax/*
244124758Semax * Remove all rules from a chain (except rules in set RESVD_SET
245124758Semax * unless kill_default = 1).  The caller is responsible for
246124758Semax * reclaiming storage for the rules left in chain->reap.
247124758Semax */
248124758Semaxvoid
249124758Semaxipfw_free_chain(struct ip_fw_chain *chain, int kill_default)
250124758Semax{
251124758Semax	struct ip_fw *prev, *rule;
252124758Semax
253124758Semax	IPFW_WLOCK_ASSERT(chain);
254124758Semax
255126245Semax	chain->reap = NULL;
256126245Semax	flush_rule_ptrs(chain); /* more efficient to do outside the loop */
257126245Semax	for (prev = NULL, rule = chain->rules; rule ; )
258124758Semax		if (kill_default || rule->set != RESVD_SET)
259124758Semax			rule = remove_rule(chain, rule, prev);
260124758Semax		else {
261124758Semax			prev = rule;
262124758Semax			rule = rule->next;
263124758Semax		}
264124758Semax}
265124758Semax
266124758Semax/**
267124758Semax * Remove all rules with given number, and also do set manipulation.
268124758Semax * Assumes chain != NULL && *chain != NULL.
269124758Semax *
270124758Semax * The argument is an u_int32_t. The low 16 bit are the rule or set number,
271124758Semax * the next 8 bits are the new set, the top 8 bits are the command:
272124758Semax *
273124758Semax *	0	delete rules with given number
274124758Semax *	1	delete rules with given set number
275124758Semax *	2	move rules with given number to new set
276124758Semax *	3	move rules with given set number to new set
277124758Semax *	4	swap sets with given numbers
278124758Semax *	5	delete rules with given number and with given set number
279124758Semax */
280124758Semaxstatic int
281124758Semaxdel_entry(struct ip_fw_chain *chain, u_int32_t arg)
282124758Semax{
283124758Semax	struct ip_fw *prev = NULL, *rule;
284	u_int16_t rulenum;	/* rule or old_set */
285	u_int8_t cmd, new_set;
286
287	rulenum = arg & 0xffff;
288	cmd = (arg >> 24) & 0xff;
289	new_set = (arg >> 16) & 0xff;
290
291	if (cmd > 5 || new_set > RESVD_SET)
292		return EINVAL;
293	if (cmd == 0 || cmd == 2 || cmd == 5) {
294		if (rulenum >= IPFW_DEFAULT_RULE)
295			return EINVAL;
296	} else {
297		if (rulenum > RESVD_SET)	/* old_set */
298			return EINVAL;
299	}
300
301	IPFW_WLOCK(chain);
302	rule = chain->rules;	/* common starting point */
303	chain->reap = NULL;	/* prepare for deletions */
304	switch (cmd) {
305	case 0:	/* delete rules with given number */
306		/*
307		 * locate first rule to delete
308		 */
309		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
310			;
311		if (rule->rulenum != rulenum) {
312			IPFW_WUNLOCK(chain);
313			return EINVAL;
314		}
315
316		/*
317		 * flush pointers outside the loop, then delete all matching
318		 * rules. prev remains the same throughout the cycle.
319		 */
320		flush_rule_ptrs(chain);
321		while (rule->rulenum == rulenum)
322			rule = remove_rule(chain, rule, prev);
323		break;
324
325	case 1:	/* delete all rules with given set number */
326		flush_rule_ptrs(chain);
327		while (rule->rulenum < IPFW_DEFAULT_RULE) {
328			if (rule->set == rulenum)
329				rule = remove_rule(chain, rule, prev);
330			else {
331				prev = rule;
332				rule = rule->next;
333			}
334		}
335		break;
336
337	case 2:	/* move rules with given number to new set */
338		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
339			if (rule->rulenum == rulenum)
340				rule->set = new_set;
341		break;
342
343	case 3: /* move rules with given set number to new set */
344		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
345			if (rule->set == rulenum)
346				rule->set = new_set;
347		break;
348
349	case 4: /* swap two sets */
350		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
351			if (rule->set == rulenum)
352				rule->set = new_set;
353			else if (rule->set == new_set)
354				rule->set = rulenum;
355		break;
356
357	case 5: /* delete rules with given number and with given set number.
358		 * rulenum - given rule number;
359		 * new_set - given set number.
360		 */
361		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
362			;
363		if (rule->rulenum != rulenum) {
364			IPFW_WUNLOCK(chain);
365			return (EINVAL);
366		}
367		flush_rule_ptrs(chain);
368		while (rule->rulenum == rulenum) {
369			if (rule->set == new_set)
370				rule = remove_rule(chain, rule, prev);
371			else {
372				prev = rule;
373				rule = rule->next;
374			}
375		}
376	}
377	/*
378	 * Look for rules to reclaim.  We grab the list before
379	 * releasing the lock then reclaim them w/o the lock to
380	 * avoid a LOR with dummynet.
381	 */
382	rule = chain->reap;
383	IPFW_WUNLOCK(chain);
384	ipfw_reap_rules(rule);
385	return 0;
386}
387
388/*
389 * Clear counters for a specific rule.
390 * The enclosing "table" is assumed locked.
391 */
392static void
393clear_counters(struct ip_fw *rule, int log_only)
394{
395	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
396
397	if (log_only == 0) {
398		rule->bcnt = rule->pcnt = 0;
399		rule->timestamp = 0;
400	}
401	if (l->o.opcode == O_LOG)
402		l->log_left = l->max_log;
403}
404
405/**
406 * Reset some or all counters on firewall rules.
407 * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
408 * the next 8 bits are the set number, the top 8 bits are the command:
409 *	0	work with rules from all set's;
410 *	1	work with rules only from specified set.
411 * Specified rule number is zero if we want to clear all entries.
412 * log_only is 1 if we only want to reset logs, zero otherwise.
413 */
414static int
415zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
416{
417	struct ip_fw *rule;
418	char *msg;
419
420	uint16_t rulenum = arg & 0xffff;
421	uint8_t set = (arg >> 16) & 0xff;
422	uint8_t cmd = (arg >> 24) & 0xff;
423
424	if (cmd > 1)
425		return (EINVAL);
426	if (cmd == 1 && set > RESVD_SET)
427		return (EINVAL);
428
429	IPFW_WLOCK(chain);
430	if (rulenum == 0) {
431		V_norule_counter = 0;
432		for (rule = chain->rules; rule; rule = rule->next) {
433			/* Skip rules from another set. */
434			if (cmd == 1 && rule->set != set)
435				continue;
436			clear_counters(rule, log_only);
437		}
438		msg = log_only ? "All logging counts reset" :
439		    "Accounting cleared";
440	} else {
441		int cleared = 0;
442		/*
443		 * We can have multiple rules with the same number, so we
444		 * need to clear them all.
445		 */
446		for (rule = chain->rules; rule; rule = rule->next)
447			if (rule->rulenum == rulenum) {
448				while (rule && rule->rulenum == rulenum) {
449					if (cmd == 0 || rule->set == set)
450						clear_counters(rule, log_only);
451					rule = rule->next;
452				}
453				cleared = 1;
454				break;
455			}
456		if (!cleared) {	/* we did not find any matching rules */
457			IPFW_WUNLOCK(chain);
458			return (EINVAL);
459		}
460		msg = log_only ? "logging count reset" : "cleared";
461	}
462	IPFW_WUNLOCK(chain);
463
464	if (V_fw_verbose) {
465		int lev = LOG_SECURITY | LOG_NOTICE;
466
467		if (rulenum)
468			log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
469		else
470			log(lev, "ipfw: %s.\n", msg);
471	}
472	return (0);
473}
474
475/*
476 * Check validity of the structure before insert.
477 * Fortunately rules are simple, so this mostly need to check rule sizes.
478 */
479static int
480check_ipfw_struct(struct ip_fw *rule, int size)
481{
482	int l, cmdlen = 0;
483	int have_action=0;
484	ipfw_insn *cmd;
485
486	if (size < sizeof(*rule)) {
487		printf("ipfw: rule too short\n");
488		return (EINVAL);
489	}
490	/* first, check for valid size */
491	l = RULESIZE(rule);
492	if (l != size) {
493		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
494		return (EINVAL);
495	}
496	if (rule->act_ofs >= rule->cmd_len) {
497		printf("ipfw: bogus action offset (%u > %u)\n",
498		    rule->act_ofs, rule->cmd_len - 1);
499		return (EINVAL);
500	}
501	/*
502	 * Now go for the individual checks. Very simple ones, basically only
503	 * instruction sizes.
504	 */
505	for (l = rule->cmd_len, cmd = rule->cmd ;
506			l > 0 ; l -= cmdlen, cmd += cmdlen) {
507		cmdlen = F_LEN(cmd);
508		if (cmdlen > l) {
509			printf("ipfw: opcode %d size truncated\n",
510			    cmd->opcode);
511			return EINVAL;
512		}
513		DEB(printf("ipfw: opcode %d\n", cmd->opcode);)
514		switch (cmd->opcode) {
515		case O_PROBE_STATE:
516		case O_KEEP_STATE:
517		case O_PROTO:
518		case O_IP_SRC_ME:
519		case O_IP_DST_ME:
520		case O_LAYER2:
521		case O_IN:
522		case O_FRAG:
523		case O_DIVERTED:
524		case O_IPOPT:
525		case O_IPTOS:
526		case O_IPPRECEDENCE:
527		case O_IPVER:
528		case O_TCPWIN:
529		case O_TCPFLAGS:
530		case O_TCPOPTS:
531		case O_ESTAB:
532		case O_VERREVPATH:
533		case O_VERSRCREACH:
534		case O_ANTISPOOF:
535		case O_IPSEC:
536#ifdef INET6
537		case O_IP6_SRC_ME:
538		case O_IP6_DST_ME:
539		case O_EXT_HDR:
540		case O_IP6:
541#endif
542		case O_IP4:
543		case O_TAG:
544			if (cmdlen != F_INSN_SIZE(ipfw_insn))
545				goto bad_size;
546			break;
547
548		case O_FIB:
549			if (cmdlen != F_INSN_SIZE(ipfw_insn))
550				goto bad_size;
551			if (cmd->arg1 >= rt_numfibs) {
552				printf("ipfw: invalid fib number %d\n",
553					cmd->arg1);
554				return EINVAL;
555			}
556			break;
557
558		case O_SETFIB:
559			if (cmdlen != F_INSN_SIZE(ipfw_insn))
560				goto bad_size;
561			if (cmd->arg1 >= rt_numfibs) {
562				printf("ipfw: invalid fib number %d\n",
563					cmd->arg1);
564				return EINVAL;
565			}
566			goto check_action;
567
568		case O_UID:
569		case O_GID:
570		case O_JAIL:
571		case O_IP_SRC:
572		case O_IP_DST:
573		case O_TCPSEQ:
574		case O_TCPACK:
575		case O_PROB:
576		case O_ICMPTYPE:
577			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
578				goto bad_size;
579			break;
580
581		case O_LIMIT:
582			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
583				goto bad_size;
584			break;
585
586		case O_LOG:
587			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
588				goto bad_size;
589
590			((ipfw_insn_log *)cmd)->log_left =
591			    ((ipfw_insn_log *)cmd)->max_log;
592
593			break;
594
595		case O_IP_SRC_MASK:
596		case O_IP_DST_MASK:
597			/* only odd command lengths */
598			if ( !(cmdlen & 1) || cmdlen > 31)
599				goto bad_size;
600			break;
601
602		case O_IP_SRC_SET:
603		case O_IP_DST_SET:
604			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
605				printf("ipfw: invalid set size %d\n",
606					cmd->arg1);
607				return EINVAL;
608			}
609			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
610			    (cmd->arg1+31)/32 )
611				goto bad_size;
612			break;
613
614		case O_IP_SRC_LOOKUP:
615		case O_IP_DST_LOOKUP:
616			if (cmd->arg1 >= IPFW_TABLES_MAX) {
617				printf("ipfw: invalid table number %d\n",
618				    cmd->arg1);
619				return (EINVAL);
620			}
621			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
622			    cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
623			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
624				goto bad_size;
625			break;
626
627		case O_MACADDR2:
628			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
629				goto bad_size;
630			break;
631
632		case O_NOP:
633		case O_IPID:
634		case O_IPTTL:
635		case O_IPLEN:
636		case O_TCPDATALEN:
637		case O_TAGGED:
638			if (cmdlen < 1 || cmdlen > 31)
639				goto bad_size;
640			break;
641
642		case O_MAC_TYPE:
643		case O_IP_SRCPORT:
644		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
645			if (cmdlen < 2 || cmdlen > 31)
646				goto bad_size;
647			break;
648
649		case O_RECV:
650		case O_XMIT:
651		case O_VIA:
652			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
653				goto bad_size;
654			break;
655
656		case O_ALTQ:
657			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
658				goto bad_size;
659			break;
660
661		case O_PIPE:
662		case O_QUEUE:
663			if (cmdlen != F_INSN_SIZE(ipfw_insn))
664				goto bad_size;
665			goto check_action;
666
667		case O_FORWARD_IP:
668#ifdef	IPFIREWALL_FORWARD
669			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
670				goto bad_size;
671			goto check_action;
672#else
673			return EINVAL;
674#endif
675
676		case O_DIVERT:
677		case O_TEE:
678			if (ip_divert_ptr == NULL)
679				return EINVAL;
680			else
681				goto check_size;
682		case O_NETGRAPH:
683		case O_NGTEE:
684			if (!NG_IPFW_LOADED)
685				return EINVAL;
686			else
687				goto check_size;
688		case O_NAT:
689			if (!IPFW_NAT_LOADED)
690				return EINVAL;
691			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
692 				goto bad_size;
693 			goto check_action;
694		case O_FORWARD_MAC: /* XXX not implemented yet */
695		case O_CHECK_STATE:
696		case O_COUNT:
697		case O_ACCEPT:
698		case O_DENY:
699		case O_REJECT:
700#ifdef INET6
701		case O_UNREACH6:
702#endif
703		case O_SKIPTO:
704		case O_REASS:
705check_size:
706			if (cmdlen != F_INSN_SIZE(ipfw_insn))
707				goto bad_size;
708check_action:
709			if (have_action) {
710				printf("ipfw: opcode %d, multiple actions"
711					" not allowed\n",
712					cmd->opcode);
713				return EINVAL;
714			}
715			have_action = 1;
716			if (l != cmdlen) {
717				printf("ipfw: opcode %d, action must be"
718					" last opcode\n",
719					cmd->opcode);
720				return EINVAL;
721			}
722			break;
723#ifdef INET6
724		case O_IP6_SRC:
725		case O_IP6_DST:
726			if (cmdlen != F_INSN_SIZE(struct in6_addr) +
727			    F_INSN_SIZE(ipfw_insn))
728				goto bad_size;
729			break;
730
731		case O_FLOW6ID:
732			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
733			    ((ipfw_insn_u32 *)cmd)->o.arg1)
734				goto bad_size;
735			break;
736
737		case O_IP6_SRC_MASK:
738		case O_IP6_DST_MASK:
739			if ( !(cmdlen & 1) || cmdlen > 127)
740				goto bad_size;
741			break;
742		case O_ICMP6TYPE:
743			if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
744				goto bad_size;
745			break;
746#endif
747
748		default:
749			switch (cmd->opcode) {
750#ifndef INET6
751			case O_IP6_SRC_ME:
752			case O_IP6_DST_ME:
753			case O_EXT_HDR:
754			case O_IP6:
755			case O_UNREACH6:
756			case O_IP6_SRC:
757			case O_IP6_DST:
758			case O_FLOW6ID:
759			case O_IP6_SRC_MASK:
760			case O_IP6_DST_MASK:
761			case O_ICMP6TYPE:
762				printf("ipfw: no IPv6 support in kernel\n");
763				return EPROTONOSUPPORT;
764#endif
765			default:
766				printf("ipfw: opcode %d, unknown opcode\n",
767					cmd->opcode);
768				return EINVAL;
769			}
770		}
771	}
772	if (have_action == 0) {
773		printf("ipfw: missing action\n");
774		return EINVAL;
775	}
776	return 0;
777
778bad_size:
779	printf("ipfw: opcode %d size %d wrong\n",
780		cmd->opcode, cmdlen);
781	return EINVAL;
782}
783
784/*
785 * Copy the static and dynamic rules to the supplied buffer
786 * and return the amount of space actually used.
787 */
788static size_t
789ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
790{
791	char *bp = buf;
792	char *ep = bp + space;
793	struct ip_fw *rule;
794	int i;
795	time_t	boot_seconds;
796
797        boot_seconds = boottime.tv_sec;
798	/* XXX this can take a long time and locking will block packet flow */
799	IPFW_RLOCK(chain);
800	for (rule = chain->rules; rule ; rule = rule->next) {
801		/*
802		 * Verify the entry fits in the buffer in case the
803		 * rules changed between calculating buffer space and
804		 * now.  This would be better done using a generation
805		 * number but should suffice for now.
806		 */
807		i = RULESIZE(rule);
808		if (bp + i <= ep) {
809			bcopy(rule, bp, i);
810			/*
811			 * XXX HACK. Store the disable mask in the "next"
812			 * pointer in a wild attempt to keep the ABI the same.
813			 * Why do we do this on EVERY rule?
814			 */
815			bcopy(&V_set_disable,
816			    &(((struct ip_fw *)bp)->next_rule),
817			    sizeof(V_set_disable));
818			if (((struct ip_fw *)bp)->timestamp)
819				((struct ip_fw *)bp)->timestamp += boot_seconds;
820			bp += i;
821		}
822	}
823	IPFW_RUNLOCK(chain);
824	ipfw_get_dynamic(&bp, ep);
825	return (bp - (char *)buf);
826}
827
828
829/**
830 * {set|get}sockopt parser.
831 */
832int
833ipfw_ctl(struct sockopt *sopt)
834{
835#define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
836	int error;
837	size_t size;
838	struct ip_fw *buf, *rule;
839	u_int32_t rulenum[2];
840
841	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
842	if (error)
843		return (error);
844
845	/*
846	 * Disallow modifications in really-really secure mode, but still allow
847	 * the logging counters to be reset.
848	 */
849	if (sopt->sopt_name == IP_FW_ADD ||
850	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
851		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
852		if (error)
853			return (error);
854	}
855
856	error = 0;
857
858	switch (sopt->sopt_name) {
859	case IP_FW_GET:
860		/*
861		 * pass up a copy of the current rules. Static rules
862		 * come first (the last of which has number IPFW_DEFAULT_RULE),
863		 * followed by a possibly empty list of dynamic rule.
864		 * The last dynamic rule has NULL in the "next" field.
865		 *
866		 * Note that the calculated size is used to bound the
867		 * amount of data returned to the user.  The rule set may
868		 * change between calculating the size and returning the
869		 * data in which case we'll just return what fits.
870		 */
871		size = V_static_len;	/* size of static rules */
872		size += ipfw_dyn_len();
873
874		if (size >= sopt->sopt_valsize)
875			break;
876		/*
877		 * XXX todo: if the user passes a short length just to know
878		 * how much room is needed, do not bother filling up the
879		 * buffer, just jump to the sooptcopyout.
880		 */
881		buf = malloc(size, M_TEMP, M_WAITOK);
882		error = sooptcopyout(sopt, buf,
883				ipfw_getrules(&V_layer3_chain, buf, size));
884		free(buf, M_TEMP);
885		break;
886
887	case IP_FW_FLUSH:
888		/*
889		 * Normally we cannot release the lock on each iteration.
890		 * We could do it here only because we start from the head all
891		 * the times so there is no risk of missing some entries.
892		 * On the other hand, the risk is that we end up with
893		 * a very inconsistent ruleset, so better keep the lock
894		 * around the whole cycle.
895		 *
896		 * XXX this code can be improved by resetting the head of
897		 * the list to point to the default rule, and then freeing
898		 * the old list without the need for a lock.
899		 */
900
901		IPFW_WLOCK(&V_layer3_chain);
902		ipfw_free_chain(&V_layer3_chain, 0 /* keep default rule */);
903		rule = V_layer3_chain.reap;
904		IPFW_WUNLOCK(&V_layer3_chain);
905		ipfw_reap_rules(rule);
906		break;
907
908	case IP_FW_ADD:
909		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
910		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
911			sizeof(struct ip_fw) );
912		if (error == 0)
913			error = check_ipfw_struct(rule, sopt->sopt_valsize);
914		if (error == 0) {
915			error = ipfw_add_rule(&V_layer3_chain, rule);
916			size = RULESIZE(rule);
917			if (!error && sopt->sopt_dir == SOPT_GET)
918				error = sooptcopyout(sopt, rule, size);
919		}
920		free(rule, M_TEMP);
921		break;
922
923	case IP_FW_DEL:
924		/*
925		 * IP_FW_DEL is used for deleting single rules or sets,
926		 * and (ab)used to atomically manipulate sets. Argument size
927		 * is used to distinguish between the two:
928		 *    sizeof(u_int32_t)
929		 *	delete single rule or set of rules,
930		 *	or reassign rules (or sets) to a different set.
931		 *    2*sizeof(u_int32_t)
932		 *	atomic disable/enable sets.
933		 *	first u_int32_t contains sets to be disabled,
934		 *	second u_int32_t contains sets to be enabled.
935		 */
936		error = sooptcopyin(sopt, rulenum,
937			2*sizeof(u_int32_t), sizeof(u_int32_t));
938		if (error)
939			break;
940		size = sopt->sopt_valsize;
941		if (size == sizeof(u_int32_t))	/* delete or reassign */
942			error = del_entry(&V_layer3_chain, rulenum[0]);
943		else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */
944			V_set_disable =
945			    (V_set_disable | rulenum[0]) & ~rulenum[1] &
946			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
947		else
948			error = EINVAL;
949		break;
950
951	case IP_FW_ZERO:
952	case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
953		rulenum[0] = 0;
954		if (sopt->sopt_val != 0) {
955		    error = sooptcopyin(sopt, rulenum,
956			    sizeof(u_int32_t), sizeof(u_int32_t));
957		    if (error)
958			break;
959		}
960		error = zero_entry(&V_layer3_chain, rulenum[0],
961			sopt->sopt_name == IP_FW_RESETLOG);
962		break;
963
964	case IP_FW_TABLE_ADD:
965		{
966			ipfw_table_entry ent;
967
968			error = sooptcopyin(sopt, &ent,
969			    sizeof(ent), sizeof(ent));
970			if (error)
971				break;
972			error = ipfw_add_table_entry(&V_layer3_chain, ent.tbl,
973			    ent.addr, ent.masklen, ent.value);
974		}
975		break;
976
977	case IP_FW_TABLE_DEL:
978		{
979			ipfw_table_entry ent;
980
981			error = sooptcopyin(sopt, &ent,
982			    sizeof(ent), sizeof(ent));
983			if (error)
984				break;
985			error = ipfw_del_table_entry(&V_layer3_chain, ent.tbl,
986			    ent.addr, ent.masklen);
987		}
988		break;
989
990	case IP_FW_TABLE_FLUSH:
991		{
992			u_int16_t tbl;
993
994			error = sooptcopyin(sopt, &tbl,
995			    sizeof(tbl), sizeof(tbl));
996			if (error)
997				break;
998			IPFW_WLOCK(&V_layer3_chain);
999			error = ipfw_flush_table(&V_layer3_chain, tbl);
1000			IPFW_WUNLOCK(&V_layer3_chain);
1001		}
1002		break;
1003
1004	case IP_FW_TABLE_GETSIZE:
1005		{
1006			u_int32_t tbl, cnt;
1007
1008			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
1009			    sizeof(tbl))))
1010				break;
1011			IPFW_RLOCK(&V_layer3_chain);
1012			error = ipfw_count_table(&V_layer3_chain, tbl, &cnt);
1013			IPFW_RUNLOCK(&V_layer3_chain);
1014			if (error)
1015				break;
1016			error = sooptcopyout(sopt, &cnt, sizeof(cnt));
1017		}
1018		break;
1019
1020	case IP_FW_TABLE_LIST:
1021		{
1022			ipfw_table *tbl;
1023
1024			if (sopt->sopt_valsize < sizeof(*tbl)) {
1025				error = EINVAL;
1026				break;
1027			}
1028			size = sopt->sopt_valsize;
1029			tbl = malloc(size, M_TEMP, M_WAITOK);
1030			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
1031			if (error) {
1032				free(tbl, M_TEMP);
1033				break;
1034			}
1035			tbl->size = (size - sizeof(*tbl)) /
1036			    sizeof(ipfw_table_entry);
1037			IPFW_RLOCK(&V_layer3_chain);
1038			error = ipfw_dump_table(&V_layer3_chain, tbl);
1039			IPFW_RUNLOCK(&V_layer3_chain);
1040			if (error) {
1041				free(tbl, M_TEMP);
1042				break;
1043			}
1044			error = sooptcopyout(sopt, tbl, size);
1045			free(tbl, M_TEMP);
1046		}
1047		break;
1048
1049	case IP_FW_NAT_CFG:
1050		if (IPFW_NAT_LOADED)
1051			error = ipfw_nat_cfg_ptr(sopt);
1052		else {
1053			printf("IP_FW_NAT_CFG: %s\n",
1054			    "ipfw_nat not present, please load it");
1055			error = EINVAL;
1056		}
1057		break;
1058
1059	case IP_FW_NAT_DEL:
1060		if (IPFW_NAT_LOADED)
1061			error = ipfw_nat_del_ptr(sopt);
1062		else {
1063			printf("IP_FW_NAT_DEL: %s\n",
1064			    "ipfw_nat not present, please load it");
1065			error = EINVAL;
1066		}
1067		break;
1068
1069	case IP_FW_NAT_GET_CONFIG:
1070		if (IPFW_NAT_LOADED)
1071			error = ipfw_nat_get_cfg_ptr(sopt);
1072		else {
1073			printf("IP_FW_NAT_GET_CFG: %s\n",
1074			    "ipfw_nat not present, please load it");
1075			error = EINVAL;
1076		}
1077		break;
1078
1079	case IP_FW_NAT_GET_LOG:
1080		if (IPFW_NAT_LOADED)
1081			error = ipfw_nat_get_log_ptr(sopt);
1082		else {
1083			printf("IP_FW_NAT_GET_LOG: %s\n",
1084			    "ipfw_nat not present, please load it");
1085			error = EINVAL;
1086		}
1087		break;
1088
1089	default:
1090		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
1091		error = EINVAL;
1092	}
1093
1094	return (error);
1095#undef RULE_MAXSIZE
1096}
1097