ip_fw_sockopt.c revision 201527
1/*-
2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
3 *
4 * Supported by: Valeria Paoli
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_sockopt.c 201527 2010-01-04 19:01:22Z luigi $");
30
31/*
32 * Sockopt support for ipfw. The routines here implement
33 * the upper half of the ipfw code.
34 */
35
36#if !defined(KLD_MODULE)
37#include "opt_ipfw.h"
38#include "opt_ipdivert.h"
39#include "opt_ipdn.h"
40#include "opt_inet.h"
41#ifndef INET
42#error IPFIREWALL requires INET.
43#endif /* INET */
44#endif
45#include "opt_inet6.h"
46#include "opt_ipsec.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>	/* struct m_tag used by nested headers */
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/priv.h>
55#include <sys/proc.h>
56#include <sys/rwlock.h>
57#include <sys/socket.h>
58#include <sys/socketvar.h>
59#include <sys/sysctl.h>
60#include <sys/syslog.h>
61#include <net/if.h>
62#include <net/route.h>
63#include <net/vnet.h>
64
65#include <netinet/in.h>
66#include <netinet/ip_fw.h>
67#include <netinet/ipfw/ip_fw_private.h>
68
69#ifdef MAC
70#include <security/mac/mac_framework.h>
71#endif
72
73MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
74
75/*
76 * static variables followed by global ones (none in this file)
77 */
78
79/*
80 * Find the smallest rule >= key, id.
81 * We could use bsearch but it is so simple that we code it directly
82 */
83int
84ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
85{
86	int i, lo, hi;
87	struct ip_fw *r;
88
89  	for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
90		i = (lo + hi) / 2;
91		r = chain->map[i];
92		if (r->rulenum < key)
93			lo = i + 1;	/* continue from the next one */
94		else if (r->rulenum > key)
95			hi = i;		/* this might be good */
96		else if (r->id < id)
97			lo = i + 1;	/* continue from the next one */
98		else /* r->id >= id */
99			hi = i;		/* this might be good */
100	};
101	return hi;
102}
103
104/*
105 * allocate a new map, returns the chain locked. extra is the number
106 * of entries to add or delete.
107 */
108static struct ip_fw **
109get_map(struct ip_fw_chain *chain, int extra, int locked)
110{
111
112	for (;;) {
113		struct ip_fw **map;
114		int i;
115
116		i = chain->n_rules + extra;
117		map = malloc(i * sizeof(struct ip_fw *), M_IPFW, M_WAITOK);
118		if (map == NULL) {
119			printf("%s: cannot allocate map\n", __FUNCTION__);
120			return NULL;
121		}
122		if (!locked)
123			IPFW_UH_WLOCK(chain);
124		if (i >= chain->n_rules + extra) /* good */
125			return map;
126		/* otherwise we lost the race, free and retry */
127		if (!locked)
128			IPFW_UH_WUNLOCK(chain);
129		free(map, M_IPFW);
130	}
131}
132
133/*
134 * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
135 */
136static struct ip_fw **
137swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
138{
139	struct ip_fw **old_map;
140
141	IPFW_WLOCK(chain);
142	chain->id++;
143	chain->n_rules = new_len;
144	old_map = chain->map;
145	chain->map = new_map;
146	IPFW_WUNLOCK(chain);
147	return old_map;
148}
149
150/*
151 * Add a new rule to the list. Copy the rule into a malloc'ed area, then
152 * possibly create a rule number and add the rule to the list.
153 * Update the rule_number in the input struct so the caller knows it as well.
154 * XXX DO NOT USE FOR THE DEFAULT RULE.
155 * Must be called without IPFW_UH held
156 */
157int
158ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
159{
160	struct ip_fw *rule;
161	int i, l, insert_before;
162	struct ip_fw **map;	/* the new array of pointers */
163
164	if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
165		return (EINVAL);
166
167	l = RULESIZE(input_rule);
168	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
169	if (rule == NULL)
170		return (ENOSPC);
171	/* get_map returns with IPFW_UH_WLOCK if successful */
172	map = get_map(chain, 1, 0 /* not locked */);
173	if (map == NULL) {
174		free(rule, M_IPFW);
175		return ENOSPC;
176	}
177
178	bcopy(input_rule, rule, l);
179	/* clear fields not settable from userland */
180	rule->x_next = NULL;
181	rule->next_rule = NULL;
182	rule->pcnt = 0;
183	rule->bcnt = 0;
184	rule->timestamp = 0;
185
186	if (V_autoinc_step < 1)
187		V_autoinc_step = 1;
188	else if (V_autoinc_step > 1000)
189		V_autoinc_step = 1000;
190	/* find the insertion point, we will insert before */
191	insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
192	i = ipfw_find_rule(chain, insert_before, 0);
193	/* duplicate first part */
194	if (i > 0)
195		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
196	map[i] = rule;
197	/* duplicate remaining part, we always have the default rule */
198	bcopy(chain->map + i, map + i + 1,
199		sizeof(struct ip_fw *) *(chain->n_rules - i));
200	if (rule->rulenum == 0) {
201		/* write back the number */
202		rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
203		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
204			rule->rulenum += V_autoinc_step;
205		input_rule->rulenum = rule->rulenum;
206	}
207
208	rule->id = chain->id + 1;
209	map = swap_map(chain, map, chain->n_rules + 1);
210	chain->static_len += l;
211	IPFW_UH_WUNLOCK(chain);
212	if (map)
213		free(map, M_IPFW);
214	return (0);
215}
216
217/*
218 * Reclaim storage associated with a list of rules.  This is
219 * typically the list created using remove_rule.
220 * A NULL pointer on input is handled correctly.
221 */
222void
223ipfw_reap_rules(struct ip_fw *head)
224{
225	struct ip_fw *rule;
226
227	while ((rule = head) != NULL) {
228		head = head->x_next;
229		free(rule, M_IPFW);
230	}
231}
232
233/**
234 * Remove all rules with given number, and also do set manipulation.
235 * Assumes chain != NULL && *chain != NULL.
236 *
237 * The argument is an u_int32_t. The low 16 bit are the rule or set number,
238 * the next 8 bits are the new set, the top 8 bits are the command:
239 *
240 *	0	delete rules with given number
241 *	1	delete rules with given set number
242 *	2	move rules with given number to new set
243 *	3	move rules with given set number to new set
244 *	4	swap sets with given numbers
245 *	5	delete rules with given number and with given set number
246 */
247static int
248del_entry(struct ip_fw_chain *chain, u_int32_t arg)
249{
250	struct ip_fw *rule;
251	uint32_t rulenum;	/* rule or old_set */
252	uint8_t cmd, new_set;
253	int start, end = 0, i, ofs, n;
254	struct ip_fw **map = NULL;
255	int error = 0;
256
257	rulenum = arg & 0xffff;
258	cmd = (arg >> 24) & 0xff;
259	new_set = (arg >> 16) & 0xff;
260
261	if (cmd > 5 || new_set > RESVD_SET)
262		return EINVAL;
263	if (cmd == 0 || cmd == 2 || cmd == 5) {
264		if (rulenum >= IPFW_DEFAULT_RULE)
265			return EINVAL;
266	} else {
267		if (rulenum > RESVD_SET)	/* old_set */
268			return EINVAL;
269	}
270
271	IPFW_UH_WLOCK(chain); /* prevent conflicts among the writers */
272	chain->reap = NULL;	/* prepare for deletions */
273
274	switch (cmd) {
275	case 0:	/* delete rules with given number (0 is special means all) */
276	case 1:	/* delete all rules with given set number, rule->set == rulenum */
277	case 5: /* delete rules with given number and with given set number.
278		 * rulenum - given rule number;
279		 * new_set - given set number.
280		 */
281		/* locate first rule to delete (start), the one after the
282		 * last one (end), and count how many rules to delete (n)
283		 */
284		n = 0;
285		if (cmd == 1) { /* look for a specific set, must scan all */
286			for (start = -1, i = 0; i < chain->n_rules; i++) {
287				if (chain->map[start]->set != rulenum)
288					continue;
289				if (start < 0)
290					start = i;
291				end = i;
292				n++;
293			}
294			end++;	/* first non-matching */
295		} else {
296			start = ipfw_find_rule(chain, rulenum, 0);
297			for (end = start; end < chain->n_rules; end++) {
298				rule = chain->map[end];
299				if (rulenum > 0 && rule->rulenum != rulenum)
300					break;
301				if (rule->set != RESVD_SET &&
302				    (cmd == 0 || rule->set == new_set) )
303					n++;
304			}
305		}
306		if (n == 0 && arg == 0)
307			break; /* special case, flush on empty ruleset */
308		/* allocate the map, if needed */
309		if (n > 0)
310			map = get_map(chain, -n, 1 /* locked */);
311		if (n == 0 || map == NULL) {
312			error = EINVAL;
313			break;
314		}
315		/* copy the initial part of the map */
316		if (start > 0)
317			bcopy(chain->map, map, start * sizeof(struct ip_fw *));
318		/* copy active rules between start and end */
319		for (i = ofs = start; i < end; i++) {
320			rule = chain->map[i];
321			if (!(rule->set != RESVD_SET &&
322			    (cmd == 0 || rule->set == new_set) ))
323				map[ofs++] = chain->map[i];
324		}
325		/* finally the tail */
326		bcopy(chain->map + end, map + ofs,
327			(chain->n_rules - end) * sizeof(struct ip_fw *));
328		map = swap_map(chain, map, chain->n_rules - n);
329		/* now remove the rules deleted */
330		for (i = start; i < end; i++) {
331			rule = map[i];
332			if (rule->set != RESVD_SET &&
333			    (cmd == 0 || rule->set == new_set) ) {
334				int l = RULESIZE(rule);
335
336				chain->static_len -= l;
337				ipfw_remove_dyn_children(rule);
338				rule->x_next = chain->reap;
339				chain->reap = rule;
340			}
341		}
342		break;
343
344	case 2:	/* move rules with given number to new set */
345		IPFW_UH_WLOCK(chain);
346		for (i = 0; i < chain->n_rules; i++) {
347			rule = chain->map[i];
348			if (rule->rulenum == rulenum)
349				rule->set = new_set;
350		}
351		IPFW_UH_WUNLOCK(chain);
352		break;
353
354	case 3: /* move rules with given set number to new set */
355		IPFW_UH_WLOCK(chain);
356		for (i = 0; i < chain->n_rules; i++) {
357			rule = chain->map[i];
358			if (rule->set == rulenum)
359				rule->set = new_set;
360		}
361		IPFW_UH_WUNLOCK(chain);
362		break;
363
364	case 4: /* swap two sets */
365		IPFW_UH_WLOCK(chain);
366		for (i = 0; i < chain->n_rules; i++) {
367			rule = chain->map[i];
368			if (rule->set == rulenum)
369				rule->set = new_set;
370			else if (rule->set == new_set)
371				rule->set = rulenum;
372		}
373		IPFW_UH_WUNLOCK(chain);
374		break;
375	}
376	rule = chain->reap;
377	chain->reap = NULL;
378	IPFW_UH_WUNLOCK(chain);
379	ipfw_reap_rules(rule);
380	if (map)
381		free(map, M_IPFW);
382	return error;
383}
384
385/*
386 * Clear counters for a specific rule.
387 * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
388 * so we only care that rules do not disappear.
389 */
390static void
391clear_counters(struct ip_fw *rule, int log_only)
392{
393	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
394
395	if (log_only == 0) {
396		rule->bcnt = rule->pcnt = 0;
397		rule->timestamp = 0;
398	}
399	if (l->o.opcode == O_LOG)
400		l->log_left = l->max_log;
401}
402
403/**
404 * Reset some or all counters on firewall rules.
405 * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
406 * the next 8 bits are the set number, the top 8 bits are the command:
407 *	0	work with rules from all set's;
408 *	1	work with rules only from specified set.
409 * Specified rule number is zero if we want to clear all entries.
410 * log_only is 1 if we only want to reset logs, zero otherwise.
411 */
412static int
413zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
414{
415	struct ip_fw *rule;
416	char *msg;
417	int i;
418
419	uint16_t rulenum = arg & 0xffff;
420	uint8_t set = (arg >> 16) & 0xff;
421	uint8_t cmd = (arg >> 24) & 0xff;
422
423	if (cmd > 1)
424		return (EINVAL);
425	if (cmd == 1 && set > RESVD_SET)
426		return (EINVAL);
427
428	IPFW_UH_RLOCK(chain);
429	if (rulenum == 0) {
430		V_norule_counter = 0;
431		for (i = 0; i < chain->n_rules; i++) {
432			rule = chain->map[i];
433			/* Skip rules not in our set. */
434			if (cmd == 1 && rule->set != set)
435				continue;
436			clear_counters(rule, log_only);
437		}
438		msg = log_only ? "All logging counts reset" :
439		    "Accounting cleared";
440	} else {
441		int cleared = 0;
442		for (i = 0; i < chain->n_rules; i++) {
443			rule = chain->map[i];
444			if (rule->rulenum == rulenum) {
445				if (cmd == 0 || rule->set == set)
446					clear_counters(rule, log_only);
447				cleared = 1;
448			}
449			if (rule->rulenum > rulenum)
450				break;
451		}
452		if (!cleared) {	/* we did not find any matching rules */
453			IPFW_WUNLOCK(chain);
454			return (EINVAL);
455		}
456		msg = log_only ? "logging count reset" : "cleared";
457	}
458	IPFW_UH_RUNLOCK(chain);
459
460	if (V_fw_verbose) {
461		int lev = LOG_SECURITY | LOG_NOTICE;
462
463		if (rulenum)
464			log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
465		else
466			log(lev, "ipfw: %s.\n", msg);
467	}
468	return (0);
469}
470
471/*
472 * Check validity of the structure before insert.
473 * Rules are simple, so this mostly need to check rule sizes.
474 */
475static int
476check_ipfw_struct(struct ip_fw *rule, int size)
477{
478	int l, cmdlen = 0;
479	int have_action=0;
480	ipfw_insn *cmd;
481
482	if (size < sizeof(*rule)) {
483		printf("ipfw: rule too short\n");
484		return (EINVAL);
485	}
486	/* first, check for valid size */
487	l = RULESIZE(rule);
488	if (l != size) {
489		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
490		return (EINVAL);
491	}
492	if (rule->act_ofs >= rule->cmd_len) {
493		printf("ipfw: bogus action offset (%u > %u)\n",
494		    rule->act_ofs, rule->cmd_len - 1);
495		return (EINVAL);
496	}
497	/*
498	 * Now go for the individual checks. Very simple ones, basically only
499	 * instruction sizes.
500	 */
501	for (l = rule->cmd_len, cmd = rule->cmd ;
502			l > 0 ; l -= cmdlen, cmd += cmdlen) {
503		cmdlen = F_LEN(cmd);
504		if (cmdlen > l) {
505			printf("ipfw: opcode %d size truncated\n",
506			    cmd->opcode);
507			return EINVAL;
508		}
509		switch (cmd->opcode) {
510		case O_PROBE_STATE:
511		case O_KEEP_STATE:
512		case O_PROTO:
513		case O_IP_SRC_ME:
514		case O_IP_DST_ME:
515		case O_LAYER2:
516		case O_IN:
517		case O_FRAG:
518		case O_DIVERTED:
519		case O_IPOPT:
520		case O_IPTOS:
521		case O_IPPRECEDENCE:
522		case O_IPVER:
523		case O_TCPWIN:
524		case O_TCPFLAGS:
525		case O_TCPOPTS:
526		case O_ESTAB:
527		case O_VERREVPATH:
528		case O_VERSRCREACH:
529		case O_ANTISPOOF:
530		case O_IPSEC:
531#ifdef INET6
532		case O_IP6_SRC_ME:
533		case O_IP6_DST_ME:
534		case O_EXT_HDR:
535		case O_IP6:
536#endif
537		case O_IP4:
538		case O_TAG:
539			if (cmdlen != F_INSN_SIZE(ipfw_insn))
540				goto bad_size;
541			break;
542
543		case O_FIB:
544			if (cmdlen != F_INSN_SIZE(ipfw_insn))
545				goto bad_size;
546			if (cmd->arg1 >= rt_numfibs) {
547				printf("ipfw: invalid fib number %d\n",
548					cmd->arg1);
549				return EINVAL;
550			}
551			break;
552
553		case O_SETFIB:
554			if (cmdlen != F_INSN_SIZE(ipfw_insn))
555				goto bad_size;
556			if (cmd->arg1 >= rt_numfibs) {
557				printf("ipfw: invalid fib number %d\n",
558					cmd->arg1);
559				return EINVAL;
560			}
561			goto check_action;
562
563		case O_UID:
564		case O_GID:
565		case O_JAIL:
566		case O_IP_SRC:
567		case O_IP_DST:
568		case O_TCPSEQ:
569		case O_TCPACK:
570		case O_PROB:
571		case O_ICMPTYPE:
572			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
573				goto bad_size;
574			break;
575
576		case O_LIMIT:
577			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
578				goto bad_size;
579			break;
580
581		case O_LOG:
582			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
583				goto bad_size;
584
585			((ipfw_insn_log *)cmd)->log_left =
586			    ((ipfw_insn_log *)cmd)->max_log;
587
588			break;
589
590		case O_IP_SRC_MASK:
591		case O_IP_DST_MASK:
592			/* only odd command lengths */
593			if ( !(cmdlen & 1) || cmdlen > 31)
594				goto bad_size;
595			break;
596
597		case O_IP_SRC_SET:
598		case O_IP_DST_SET:
599			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
600				printf("ipfw: invalid set size %d\n",
601					cmd->arg1);
602				return EINVAL;
603			}
604			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
605			    (cmd->arg1+31)/32 )
606				goto bad_size;
607			break;
608
609		case O_IP_SRC_LOOKUP:
610		case O_IP_DST_LOOKUP:
611			if (cmd->arg1 >= IPFW_TABLES_MAX) {
612				printf("ipfw: invalid table number %d\n",
613				    cmd->arg1);
614				return (EINVAL);
615			}
616			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
617			    cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
618			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
619				goto bad_size;
620			break;
621
622		case O_MACADDR2:
623			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
624				goto bad_size;
625			break;
626
627		case O_NOP:
628		case O_IPID:
629		case O_IPTTL:
630		case O_IPLEN:
631		case O_TCPDATALEN:
632		case O_TAGGED:
633			if (cmdlen < 1 || cmdlen > 31)
634				goto bad_size;
635			break;
636
637		case O_MAC_TYPE:
638		case O_IP_SRCPORT:
639		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
640			if (cmdlen < 2 || cmdlen > 31)
641				goto bad_size;
642			break;
643
644		case O_RECV:
645		case O_XMIT:
646		case O_VIA:
647			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
648				goto bad_size;
649			break;
650
651		case O_ALTQ:
652			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
653				goto bad_size;
654			break;
655
656		case O_PIPE:
657		case O_QUEUE:
658			if (cmdlen != F_INSN_SIZE(ipfw_insn))
659				goto bad_size;
660			goto check_action;
661
662		case O_FORWARD_IP:
663#ifdef	IPFIREWALL_FORWARD
664			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
665				goto bad_size;
666			goto check_action;
667#else
668			return EINVAL;
669#endif
670
671		case O_DIVERT:
672		case O_TEE:
673			if (ip_divert_ptr == NULL)
674				return EINVAL;
675			else
676				goto check_size;
677		case O_NETGRAPH:
678		case O_NGTEE:
679			if (!NG_IPFW_LOADED)
680				return EINVAL;
681			else
682				goto check_size;
683		case O_NAT:
684			if (!IPFW_NAT_LOADED)
685				return EINVAL;
686			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
687 				goto bad_size;
688 			goto check_action;
689		case O_FORWARD_MAC: /* XXX not implemented yet */
690		case O_CHECK_STATE:
691		case O_COUNT:
692		case O_ACCEPT:
693		case O_DENY:
694		case O_REJECT:
695#ifdef INET6
696		case O_UNREACH6:
697#endif
698		case O_SKIPTO:
699		case O_REASS:
700check_size:
701			if (cmdlen != F_INSN_SIZE(ipfw_insn))
702				goto bad_size;
703check_action:
704			if (have_action) {
705				printf("ipfw: opcode %d, multiple actions"
706					" not allowed\n",
707					cmd->opcode);
708				return EINVAL;
709			}
710			have_action = 1;
711			if (l != cmdlen) {
712				printf("ipfw: opcode %d, action must be"
713					" last opcode\n",
714					cmd->opcode);
715				return EINVAL;
716			}
717			break;
718#ifdef INET6
719		case O_IP6_SRC:
720		case O_IP6_DST:
721			if (cmdlen != F_INSN_SIZE(struct in6_addr) +
722			    F_INSN_SIZE(ipfw_insn))
723				goto bad_size;
724			break;
725
726		case O_FLOW6ID:
727			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
728			    ((ipfw_insn_u32 *)cmd)->o.arg1)
729				goto bad_size;
730			break;
731
732		case O_IP6_SRC_MASK:
733		case O_IP6_DST_MASK:
734			if ( !(cmdlen & 1) || cmdlen > 127)
735				goto bad_size;
736			break;
737		case O_ICMP6TYPE:
738			if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
739				goto bad_size;
740			break;
741#endif
742
743		default:
744			switch (cmd->opcode) {
745#ifndef INET6
746			case O_IP6_SRC_ME:
747			case O_IP6_DST_ME:
748			case O_EXT_HDR:
749			case O_IP6:
750			case O_UNREACH6:
751			case O_IP6_SRC:
752			case O_IP6_DST:
753			case O_FLOW6ID:
754			case O_IP6_SRC_MASK:
755			case O_IP6_DST_MASK:
756			case O_ICMP6TYPE:
757				printf("ipfw: no IPv6 support in kernel\n");
758				return EPROTONOSUPPORT;
759#endif
760			default:
761				printf("ipfw: opcode %d, unknown opcode\n",
762					cmd->opcode);
763				return EINVAL;
764			}
765		}
766	}
767	if (have_action == 0) {
768		printf("ipfw: missing action\n");
769		return EINVAL;
770	}
771	return 0;
772
773bad_size:
774	printf("ipfw: opcode %d size %d wrong\n",
775		cmd->opcode, cmdlen);
776	return EINVAL;
777}
778
779/*
780 * Copy the static and dynamic rules to the supplied buffer
781 * and return the amount of space actually used.
782 * Must be run under IPFW_UH_RLOCK
783 */
784static size_t
785ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
786{
787	char *bp = buf;
788	char *ep = bp + space;
789	struct ip_fw *rule, *dst;
790	int l, i;
791	time_t	boot_seconds;
792
793        boot_seconds = boottime.tv_sec;
794	for (i = 0; i < chain->n_rules; i++) {
795		rule = chain->map[i];
796		l = RULESIZE(rule);
797		if (bp + l > ep) { /* should not happen */
798			printf("overflow dumping static rules\n");
799			break;
800		}
801		dst = (struct ip_fw *)bp;
802		bcopy(rule, dst, l);
803		/*
804		 * XXX HACK. Store the disable mask in the "next"
805		 * pointer in a wild attempt to keep the ABI the same.
806		 * Why do we do this on EVERY rule?
807		 */
808		bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
809		if (dst->timestamp)
810			dst->timestamp += boot_seconds;
811		bp += l;
812	}
813	ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */
814	return (bp - (char *)buf);
815}
816
817
818/**
819 * {set|get}sockopt parser.
820 */
821int
822ipfw_ctl(struct sockopt *sopt)
823{
824#define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
825	int error;
826	size_t size;
827	struct ip_fw *buf, *rule;
828	struct ip_fw_chain *chain;
829	u_int32_t rulenum[2];
830
831	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
832	if (error)
833		return (error);
834
835	/*
836	 * Disallow modifications in really-really secure mode, but still allow
837	 * the logging counters to be reset.
838	 */
839	if (sopt->sopt_name == IP_FW_ADD ||
840	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
841		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
842		if (error)
843			return (error);
844	}
845
846	chain = &V_layer3_chain;
847	error = 0;
848
849	switch (sopt->sopt_name) {
850	case IP_FW_GET:
851		/*
852		 * pass up a copy of the current rules. Static rules
853		 * come first (the last of which has number IPFW_DEFAULT_RULE),
854		 * followed by a possibly empty list of dynamic rule.
855		 * The last dynamic rule has NULL in the "next" field.
856		 *
857		 * Note that the calculated size is used to bound the
858		 * amount of data returned to the user.  The rule set may
859		 * change between calculating the size and returning the
860		 * data in which case we'll just return what fits.
861		 */
862		for (;;) {
863			int len = 0, want;
864
865			size = chain->static_len;
866			size += ipfw_dyn_len();
867			if (size >= sopt->sopt_valsize)
868				break;
869			buf = malloc(size, M_TEMP, M_WAITOK);
870			if (buf == NULL)
871				break;
872			IPFW_UH_RLOCK(chain);
873			/* check again how much space we need */
874			want = chain->static_len + ipfw_dyn_len();
875			if (size >= want)
876				len = ipfw_getrules(chain, buf, size);
877			IPFW_UH_RUNLOCK(chain);
878			if (size >= want)
879				error = sooptcopyout(sopt, buf, len);
880			free(buf, M_TEMP);
881			if (size >= want)
882				break;
883		}
884		break;
885
886	case IP_FW_FLUSH:
887		/* locking is done within del_entry() */
888		error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
889		break;
890
891	case IP_FW_ADD:
892		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
893		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
894			sizeof(struct ip_fw) );
895		if (error == 0)
896			error = check_ipfw_struct(rule, sopt->sopt_valsize);
897		if (error == 0) {
898			/* locking is done within ipfw_add_rule() */
899			error = ipfw_add_rule(chain, rule);
900			size = RULESIZE(rule);
901			if (!error && sopt->sopt_dir == SOPT_GET)
902				error = sooptcopyout(sopt, rule, size);
903		}
904		free(rule, M_TEMP);
905		break;
906
907	case IP_FW_DEL:
908		/*
909		 * IP_FW_DEL is used for deleting single rules or sets,
910		 * and (ab)used to atomically manipulate sets. Argument size
911		 * is used to distinguish between the two:
912		 *    sizeof(u_int32_t)
913		 *	delete single rule or set of rules,
914		 *	or reassign rules (or sets) to a different set.
915		 *    2*sizeof(u_int32_t)
916		 *	atomic disable/enable sets.
917		 *	first u_int32_t contains sets to be disabled,
918		 *	second u_int32_t contains sets to be enabled.
919		 */
920		error = sooptcopyin(sopt, rulenum,
921			2*sizeof(u_int32_t), sizeof(u_int32_t));
922		if (error)
923			break;
924		size = sopt->sopt_valsize;
925		if (size == sizeof(u_int32_t) && rulenum[0] != 0) {
926			/* delete or reassign, locking done in del_entry() */
927			error = del_entry(chain, rulenum[0]);
928		} else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
929			IPFW_UH_WLOCK(chain);
930			V_set_disable =
931			    (V_set_disable | rulenum[0]) & ~rulenum[1] &
932			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
933			IPFW_UH_WUNLOCK(chain);
934		} else
935			error = EINVAL;
936		break;
937
938	case IP_FW_ZERO:
939	case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
940		rulenum[0] = 0;
941		if (sopt->sopt_val != 0) {
942		    error = sooptcopyin(sopt, rulenum,
943			    sizeof(u_int32_t), sizeof(u_int32_t));
944		    if (error)
945			break;
946		}
947		error = zero_entry(chain, rulenum[0],
948			sopt->sopt_name == IP_FW_RESETLOG);
949		break;
950
951	/*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
952	case IP_FW_TABLE_ADD:
953		{
954			ipfw_table_entry ent;
955
956			error = sooptcopyin(sopt, &ent,
957			    sizeof(ent), sizeof(ent));
958			if (error)
959				break;
960			error = ipfw_add_table_entry(chain, ent.tbl,
961			    ent.addr, ent.masklen, ent.value);
962		}
963		break;
964
965	case IP_FW_TABLE_DEL:
966		{
967			ipfw_table_entry ent;
968
969			error = sooptcopyin(sopt, &ent,
970			    sizeof(ent), sizeof(ent));
971			if (error)
972				break;
973			error = ipfw_del_table_entry(chain, ent.tbl,
974			    ent.addr, ent.masklen);
975		}
976		break;
977
978	case IP_FW_TABLE_FLUSH:
979		{
980			u_int16_t tbl;
981
982			error = sooptcopyin(sopt, &tbl,
983			    sizeof(tbl), sizeof(tbl));
984			if (error)
985				break;
986			IPFW_WLOCK(chain);
987			error = ipfw_flush_table(chain, tbl);
988			IPFW_WUNLOCK(chain);
989		}
990		break;
991
992	case IP_FW_TABLE_GETSIZE:
993		{
994			u_int32_t tbl, cnt;
995
996			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
997			    sizeof(tbl))))
998				break;
999			IPFW_RLOCK(chain);
1000			error = ipfw_count_table(chain, tbl, &cnt);
1001			IPFW_RUNLOCK(chain);
1002			if (error)
1003				break;
1004			error = sooptcopyout(sopt, &cnt, sizeof(cnt));
1005		}
1006		break;
1007
1008	case IP_FW_TABLE_LIST:
1009		{
1010			ipfw_table *tbl;
1011
1012			if (sopt->sopt_valsize < sizeof(*tbl)) {
1013				error = EINVAL;
1014				break;
1015			}
1016			size = sopt->sopt_valsize;
1017			tbl = malloc(size, M_TEMP, M_WAITOK);
1018			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
1019			if (error) {
1020				free(tbl, M_TEMP);
1021				break;
1022			}
1023			tbl->size = (size - sizeof(*tbl)) /
1024			    sizeof(ipfw_table_entry);
1025			IPFW_RLOCK(chain);
1026			error = ipfw_dump_table(chain, tbl);
1027			IPFW_RUNLOCK(chain);
1028			if (error) {
1029				free(tbl, M_TEMP);
1030				break;
1031			}
1032			error = sooptcopyout(sopt, tbl, size);
1033			free(tbl, M_TEMP);
1034		}
1035		break;
1036
1037	/*--- NAT operations are protected by the IPFW_LOCK ---*/
1038	case IP_FW_NAT_CFG:
1039		if (IPFW_NAT_LOADED)
1040			error = ipfw_nat_cfg_ptr(sopt);
1041		else {
1042			printf("IP_FW_NAT_CFG: %s\n",
1043			    "ipfw_nat not present, please load it");
1044			error = EINVAL;
1045		}
1046		break;
1047
1048	case IP_FW_NAT_DEL:
1049		if (IPFW_NAT_LOADED)
1050			error = ipfw_nat_del_ptr(sopt);
1051		else {
1052			printf("IP_FW_NAT_DEL: %s\n",
1053			    "ipfw_nat not present, please load it");
1054			error = EINVAL;
1055		}
1056		break;
1057
1058	case IP_FW_NAT_GET_CONFIG:
1059		if (IPFW_NAT_LOADED)
1060			error = ipfw_nat_get_cfg_ptr(sopt);
1061		else {
1062			printf("IP_FW_NAT_GET_CFG: %s\n",
1063			    "ipfw_nat not present, please load it");
1064			error = EINVAL;
1065		}
1066		break;
1067
1068	case IP_FW_NAT_GET_LOG:
1069		if (IPFW_NAT_LOADED)
1070			error = ipfw_nat_get_log_ptr(sopt);
1071		else {
1072			printf("IP_FW_NAT_GET_LOG: %s\n",
1073			    "ipfw_nat not present, please load it");
1074			error = EINVAL;
1075		}
1076		break;
1077
1078	default:
1079		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
1080		error = EINVAL;
1081	}
1082
1083	return (error);
1084#undef RULE_MAXSIZE
1085}
1086/* end of file */
1087