ip_fw.h revision 215179
1/*-
2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/netinet/ip_fw.h 215179 2010-11-12 13:05:17Z luigi $
26 */
27
28#ifndef _IPFW2_H
29#define _IPFW2_H
30
31/*
32 * The default rule number.  By the design of ip_fw, the default rule
33 * is the last one, so its number can also serve as the highest number
34 * allowed for a rule.  The ip_fw code relies on both meanings of this
35 * constant.
36 */
37#define	IPFW_DEFAULT_RULE	65535
38
39/*
40 * The number of ipfw tables.  The maximum allowed table number is the
41 * (IPFW_TABLES_MAX - 1).
42 */
43#define	IPFW_TABLES_MAX		128
44
45/*
46 * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit
47 * argument between 1 and 65534. The value 0 is unused, the value
48 * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the
49 * can be 1..65534, or 65535 to indicate the use of a 'tablearg'
50 * result of the most recent table() lookup.
51 * Note that 16bit is only a historical limit, resulting from
52 * the use of a 16-bit fields for that value. In reality, we can have
53 * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg.
54 */
55#define	IPFW_ARG_MIN		1
56#define	IPFW_ARG_MAX		65534
57#define IP_FW_TABLEARG		65535	/* XXX should use 0 */
58
59/*
60 * The kernel representation of ipfw rules is made of a list of
61 * 'instructions' (for all practical purposes equivalent to BPF
62 * instructions), which specify which fields of the packet
63 * (or its metadata) should be analysed.
64 *
65 * Each instruction is stored in a structure which begins with
66 * "ipfw_insn", and can contain extra fields depending on the
67 * instruction type (listed below).
68 * Note that the code is written so that individual instructions
69 * have a size which is a multiple of 32 bits. This means that, if
70 * such structures contain pointers or other 64-bit entities,
71 * (there is just one instance now) they may end up unaligned on
72 * 64-bit architectures, so the must be handled with care.
73 *
74 * "enum ipfw_opcodes" are the opcodes supported. We can have up
75 * to 256 different opcodes. When adding new opcodes, they should
76 * be appended to the end of the opcode list before O_LAST_OPCODE,
77 * this will prevent the ABI from being broken, otherwise users
78 * will have to recompile ipfw(8) when they update the kernel.
79 */
80
81enum ipfw_opcodes {		/* arguments (4 byte each)	*/
82	O_NOP,
83
84	O_IP_SRC,		/* u32 = IP			*/
85	O_IP_SRC_MASK,		/* ip = IP/mask			*/
86	O_IP_SRC_ME,		/* none				*/
87	O_IP_SRC_SET,		/* u32=base, arg1=len, bitmap	*/
88
89	O_IP_DST,		/* u32 = IP			*/
90	O_IP_DST_MASK,		/* ip = IP/mask			*/
91	O_IP_DST_ME,		/* none				*/
92	O_IP_DST_SET,		/* u32=base, arg1=len, bitmap	*/
93
94	O_IP_SRCPORT,		/* (n)port list:mask 4 byte ea	*/
95	O_IP_DSTPORT,		/* (n)port list:mask 4 byte ea	*/
96	O_PROTO,		/* arg1=protocol		*/
97
98	O_MACADDR2,		/* 2 mac addr:mask		*/
99	O_MAC_TYPE,		/* same as srcport		*/
100
101	O_LAYER2,		/* none				*/
102	O_IN,			/* none				*/
103	O_FRAG,			/* none				*/
104
105	O_RECV,			/* none				*/
106	O_XMIT,			/* none				*/
107	O_VIA,			/* none				*/
108
109	O_IPOPT,		/* arg1 = 2*u8 bitmap		*/
110	O_IPLEN,		/* arg1 = len			*/
111	O_IPID,			/* arg1 = id			*/
112
113	O_IPTOS,		/* arg1 = id			*/
114	O_IPPRECEDENCE,		/* arg1 = precedence << 5	*/
115	O_IPTTL,		/* arg1 = TTL			*/
116
117	O_IPVER,		/* arg1 = version		*/
118	O_UID,			/* u32 = id			*/
119	O_GID,			/* u32 = id			*/
120	O_ESTAB,		/* none (tcp established)	*/
121	O_TCPFLAGS,		/* arg1 = 2*u8 bitmap		*/
122	O_TCPWIN,		/* arg1 = desired win		*/
123	O_TCPSEQ,		/* u32 = desired seq.		*/
124	O_TCPACK,		/* u32 = desired seq.		*/
125	O_ICMPTYPE,		/* u32 = icmp bitmap		*/
126	O_TCPOPTS,		/* arg1 = 2*u8 bitmap		*/
127
128	O_VERREVPATH,		/* none				*/
129	O_VERSRCREACH,		/* none				*/
130
131	O_PROBE_STATE,		/* none				*/
132	O_KEEP_STATE,		/* none				*/
133	O_LIMIT,		/* ipfw_insn_limit		*/
134	O_LIMIT_PARENT,		/* dyn_type, not an opcode.	*/
135
136	/*
137	 * These are really 'actions'.
138	 */
139
140	O_LOG,			/* ipfw_insn_log		*/
141	O_PROB,			/* u32 = match probability	*/
142
143	O_CHECK_STATE,		/* none				*/
144	O_ACCEPT,		/* none				*/
145	O_DENY,			/* none 			*/
146	O_REJECT,		/* arg1=icmp arg (same as deny)	*/
147	O_COUNT,		/* none				*/
148	O_SKIPTO,		/* arg1=next rule number	*/
149	O_PIPE,			/* arg1=pipe number		*/
150	O_QUEUE,		/* arg1=queue number		*/
151	O_DIVERT,		/* arg1=port number		*/
152	O_TEE,			/* arg1=port number		*/
153	O_FORWARD_IP,		/* fwd sockaddr			*/
154	O_FORWARD_MAC,		/* fwd mac			*/
155	O_NAT,                  /* nope                         */
156	O_REASS,                /* none                         */
157
158	/*
159	 * More opcodes.
160	 */
161	O_IPSEC,		/* has ipsec history 		*/
162	O_IP_SRC_LOOKUP,	/* arg1=table number, u32=value	*/
163	O_IP_DST_LOOKUP,	/* arg1=table number, u32=value	*/
164	O_ANTISPOOF,		/* none				*/
165	O_JAIL,			/* u32 = id			*/
166	O_ALTQ,			/* u32 = altq classif. qid	*/
167	O_DIVERTED,		/* arg1=bitmap (1:loop, 2:out)	*/
168	O_TCPDATALEN,		/* arg1 = tcp data len		*/
169	O_IP6_SRC,		/* address without mask		*/
170	O_IP6_SRC_ME,		/* my addresses			*/
171	O_IP6_SRC_MASK,		/* address with the mask	*/
172	O_IP6_DST,
173	O_IP6_DST_ME,
174	O_IP6_DST_MASK,
175	O_FLOW6ID,		/* for flow id tag in the ipv6 pkt */
176	O_ICMP6TYPE,		/* icmp6 packet type filtering	*/
177	O_EXT_HDR,		/* filtering for ipv6 extension header */
178	O_IP6,
179
180	/*
181	 * actions for ng_ipfw
182	 */
183	O_NETGRAPH,		/* send to ng_ipfw		*/
184	O_NGTEE,		/* copy to ng_ipfw		*/
185
186	O_IP4,
187
188	O_UNREACH6,		/* arg1=icmpv6 code arg (deny)  */
189
190	O_TAG,   		/* arg1=tag number */
191	O_TAGGED,		/* arg1=tag number */
192
193	O_SETFIB,		/* arg1=FIB number */
194	O_FIB,			/* arg1=FIB desired fib number */
195
196	O_SOCKARG,		/* socket argument */
197
198	O_LAST_OPCODE		/* not an opcode!		*/
199};
200
201
202/*
203 * The extension header are filtered only for presence using a bit
204 * vector with a flag for each header.
205 */
206#define EXT_FRAGMENT	0x1
207#define EXT_HOPOPTS	0x2
208#define EXT_ROUTING	0x4
209#define EXT_AH		0x8
210#define EXT_ESP		0x10
211#define EXT_DSTOPTS	0x20
212#define EXT_RTHDR0		0x40
213#define EXT_RTHDR2		0x80
214
215/*
216 * Template for instructions.
217 *
218 * ipfw_insn is used for all instructions which require no operands,
219 * a single 16-bit value (arg1), or a couple of 8-bit values.
220 *
221 * For other instructions which require different/larger arguments
222 * we have derived structures, ipfw_insn_*.
223 *
224 * The size of the instruction (in 32-bit words) is in the low
225 * 6 bits of "len". The 2 remaining bits are used to implement
226 * NOT and OR on individual instructions. Given a type, you can
227 * compute the length to be put in "len" using F_INSN_SIZE(t)
228 *
229 * F_NOT	negates the match result of the instruction.
230 *
231 * F_OR		is used to build or blocks. By default, instructions
232 *		are evaluated as part of a logical AND. An "or" block
233 *		{ X or Y or Z } contains F_OR set in all but the last
234 *		instruction of the block. A match will cause the code
235 *		to skip past the last instruction of the block.
236 *
237 * NOTA BENE: in a couple of places we assume that
238 *	sizeof(ipfw_insn) == sizeof(u_int32_t)
239 * this needs to be fixed.
240 *
241 */
242typedef struct	_ipfw_insn {	/* template for instructions */
243	u_int8_t 	opcode;
244	u_int8_t	len;	/* number of 32-bit words */
245#define	F_NOT		0x80
246#define	F_OR		0x40
247#define	F_LEN_MASK	0x3f
248#define	F_LEN(cmd)	((cmd)->len & F_LEN_MASK)
249
250	u_int16_t	arg1;
251} ipfw_insn;
252
253/*
254 * The F_INSN_SIZE(type) computes the size, in 4-byte words, of
255 * a given type.
256 */
257#define	F_INSN_SIZE(t)	((sizeof (t))/sizeof(u_int32_t))
258
259/*
260 * This is used to store an array of 16-bit entries (ports etc.)
261 */
262typedef struct	_ipfw_insn_u16 {
263	ipfw_insn o;
264	u_int16_t ports[2];	/* there may be more */
265} ipfw_insn_u16;
266
267/*
268 * This is used to store an array of 32-bit entries
269 * (uid, single IPv4 addresses etc.)
270 */
271typedef struct	_ipfw_insn_u32 {
272	ipfw_insn o;
273	u_int32_t d[1];	/* one or more */
274} ipfw_insn_u32;
275
276/*
277 * This is used to store IP addr-mask pairs.
278 */
279typedef struct	_ipfw_insn_ip {
280	ipfw_insn o;
281	struct in_addr	addr;
282	struct in_addr	mask;
283} ipfw_insn_ip;
284
285/*
286 * This is used to forward to a given address (ip).
287 */
288typedef struct  _ipfw_insn_sa {
289	ipfw_insn o;
290	struct sockaddr_in sa;
291} ipfw_insn_sa;
292
293/*
294 * This is used for MAC addr-mask pairs.
295 */
296typedef struct	_ipfw_insn_mac {
297	ipfw_insn o;
298	u_char addr[12];	/* dst[6] + src[6] */
299	u_char mask[12];	/* dst[6] + src[6] */
300} ipfw_insn_mac;
301
302/*
303 * This is used for interface match rules (recv xx, xmit xx).
304 */
305typedef struct	_ipfw_insn_if {
306	ipfw_insn o;
307	union {
308		struct in_addr ip;
309		int glob;
310	} p;
311	char name[IFNAMSIZ];
312} ipfw_insn_if;
313
314/*
315 * This is used for storing an altq queue id number.
316 */
317typedef struct _ipfw_insn_altq {
318	ipfw_insn	o;
319	u_int32_t	qid;
320} ipfw_insn_altq;
321
322/*
323 * This is used for limit rules.
324 */
325typedef struct	_ipfw_insn_limit {
326	ipfw_insn o;
327	u_int8_t _pad;
328	u_int8_t limit_mask;	/* combination of DYN_* below	*/
329#define	DYN_SRC_ADDR	0x1
330#define	DYN_SRC_PORT	0x2
331#define	DYN_DST_ADDR	0x4
332#define	DYN_DST_PORT	0x8
333
334	u_int16_t conn_limit;
335} ipfw_insn_limit;
336
337/*
338 * This is used for log instructions.
339 */
340typedef struct  _ipfw_insn_log {
341        ipfw_insn o;
342	u_int32_t max_log;	/* how many do we log -- 0 = all */
343	u_int32_t log_left;	/* how many left to log 	*/
344} ipfw_insn_log;
345
346/*
347 * Data structures required by both ipfw(8) and ipfw(4) but not part of the
348 * management API are protected by IPFW_INTERNAL.
349 */
350#ifdef IPFW_INTERNAL
351/* Server pool support (LSNAT). */
352struct cfg_spool {
353	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
354	struct in_addr          addr;
355	u_short                 port;
356};
357#endif
358
359/* Redirect modes id. */
360#define REDIR_ADDR      0x01
361#define REDIR_PORT      0x02
362#define REDIR_PROTO     0x04
363
364#ifdef IPFW_INTERNAL
365/* Nat redirect configuration. */
366struct cfg_redir {
367	LIST_ENTRY(cfg_redir)   _next;          /* chain of redir instances */
368	u_int16_t               mode;           /* type of redirect mode */
369	struct in_addr	        laddr;          /* local ip address */
370	struct in_addr	        paddr;          /* public ip address */
371	struct in_addr	        raddr;          /* remote ip address */
372	u_short                 lport;          /* local port */
373	u_short                 pport;          /* public port */
374	u_short                 rport;          /* remote port  */
375	u_short                 pport_cnt;      /* number of public ports */
376	u_short                 rport_cnt;      /* number of remote ports */
377	int                     proto;          /* protocol: tcp/udp */
378	struct alias_link       **alink;
379	/* num of entry in spool chain */
380	u_int16_t               spool_cnt;
381	/* chain of spool instances */
382	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
383};
384#endif
385
386#define NAT_BUF_LEN     1024
387
388#ifdef IPFW_INTERNAL
389/* Nat configuration data struct. */
390struct cfg_nat {
391	/* chain of nat instances */
392	LIST_ENTRY(cfg_nat)     _next;
393	int                     id;                     /* nat id */
394	struct in_addr          ip;                     /* nat ip address */
395	char                    if_name[IF_NAMESIZE];   /* interface name */
396	int                     mode;                   /* aliasing mode */
397	struct libalias	        *lib;                   /* libalias instance */
398	/* number of entry in spool chain */
399	int                     redir_cnt;
400	/* chain of redir instances */
401	LIST_HEAD(redir_chain, cfg_redir) redir_chain;
402};
403#endif
404
405#define SOF_NAT         sizeof(struct cfg_nat)
406#define SOF_REDIR       sizeof(struct cfg_redir)
407#define SOF_SPOOL       sizeof(struct cfg_spool)
408
409/* Nat command. */
410typedef struct	_ipfw_insn_nat {
411 	ipfw_insn	o;
412 	struct cfg_nat *nat;
413} ipfw_insn_nat;
414
415/* Apply ipv6 mask on ipv6 addr */
416#define APPLY_MASK(addr,mask)                          \
417    (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
418    (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \
419    (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \
420    (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3];
421
422/* Structure for ipv6 */
423typedef struct _ipfw_insn_ip6 {
424       ipfw_insn o;
425       struct in6_addr addr6;
426       struct in6_addr mask6;
427} ipfw_insn_ip6;
428
429/* Used to support icmp6 types */
430typedef struct _ipfw_insn_icmp6 {
431       ipfw_insn o;
432       uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h
433                       *     define ICMP6_MAXTYPE
434                       *     as follows: n = ICMP6_MAXTYPE/32 + 1
435                        *     Actually is 203
436                       */
437} ipfw_insn_icmp6;
438
439/*
440 * Here we have the structure representing an ipfw rule.
441 *
442 * It starts with a general area (with link fields and counters)
443 * followed by an array of one or more instructions, which the code
444 * accesses as an array of 32-bit values.
445 *
446 * Given a rule pointer  r:
447 *
448 *  r->cmd		is the start of the first instruction.
449 *  ACTION_PTR(r)	is the start of the first action (things to do
450 *			once a rule matched).
451 *
452 * When assembling instruction, remember the following:
453 *
454 *  + if a rule has a "keep-state" (or "limit") option, then the
455 *	first instruction (at r->cmd) MUST BE an O_PROBE_STATE
456 *  + if a rule has a "log" option, then the first action
457 *	(at ACTION_PTR(r)) MUST be O_LOG
458 *  + if a rule has an "altq" option, it comes after "log"
459 *  + if a rule has an O_TAG option, it comes after "log" and "altq"
460 *
461 * NOTE: we use a simple linked list of rules because we never need
462 * 	to delete a rule without scanning the list. We do not use
463 *	queue(3) macros for portability and readability.
464 */
465
466struct ip_fw {
467	struct ip_fw	*x_next;	/* linked list of rules		*/
468	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
469	/* 'next_rule' is used to pass up 'set_disable' status		*/
470
471	uint16_t	act_ofs;	/* offset of action in 32-bit units */
472	uint16_t	cmd_len;	/* # of 32-bit words in cmd	*/
473	uint16_t	rulenum;	/* rule number			*/
474	uint8_t	set;		/* rule set (0..31)		*/
475#define	RESVD_SET	31	/* set for default and persistent rules */
476	uint8_t		_pad;		/* padding			*/
477	uint32_t	id;		/* rule id */
478
479	/* These fields are present in all rules.			*/
480	uint64_t	pcnt;		/* Packet counter		*/
481	uint64_t	bcnt;		/* Byte counter			*/
482	uint32_t	timestamp;	/* tv_sec of last match		*/
483
484	ipfw_insn	cmd[1];		/* storage for commands		*/
485};
486
487#define ACTION_PTR(rule)				\
488	(ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) )
489
490#define RULESIZE(rule)  (sizeof(struct ip_fw) + \
491	((struct ip_fw *)(rule))->cmd_len * 4 - 4)
492
493#if 1 // should be moved to in.h
494/*
495 * This structure is used as a flow mask and a flow id for various
496 * parts of the code.
497 * addr_type is used in userland and kernel to mark the address type.
498 * fib is used in the kernel to record the fib in use.
499 * _flags is used in the kernel to store tcp flags for dynamic rules.
500 */
501struct ipfw_flow_id {
502	uint32_t	dst_ip;
503	uint32_t	src_ip;
504	uint16_t	dst_port;
505	uint16_t	src_port;
506	uint8_t		fib;
507	uint8_t		proto;
508	uint8_t		_flags;	/* protocol-specific flags */
509	uint8_t		addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
510	struct in6_addr dst_ip6;
511	struct in6_addr src_ip6;
512	uint32_t	flow_id6;
513	uint32_t	extra; /* queue/pipe or frag_id */
514};
515#endif
516
517#define IS_IP6_FLOW_ID(id)	((id)->addr_type == 6)
518
519/*
520 * Dynamic ipfw rule.
521 */
522typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
523
524struct _ipfw_dyn_rule {
525	ipfw_dyn_rule	*next;		/* linked list of rules.	*/
526	struct ip_fw *rule;		/* pointer to rule		*/
527	/* 'rule' is used to pass up the rule number (from the parent)	*/
528
529	ipfw_dyn_rule *parent;		/* pointer to parent rule	*/
530	u_int64_t	pcnt;		/* packet match counter		*/
531	u_int64_t	bcnt;		/* byte match counter		*/
532	struct ipfw_flow_id id;		/* (masked) flow id		*/
533	u_int32_t	expire;		/* expire time			*/
534	u_int32_t	bucket;		/* which bucket in hash table	*/
535	u_int32_t	state;		/* state of this rule (typically a
536					 * combination of TCP flags)
537					 */
538	u_int32_t	ack_fwd;	/* most recent ACKs in forward	*/
539	u_int32_t	ack_rev;	/* and reverse directions (used	*/
540					/* to generate keepalives)	*/
541	u_int16_t	dyn_type;	/* rule type			*/
542	u_int16_t	count;		/* refcount			*/
543};
544
545/*
546 * Definitions for IP option names.
547 */
548#define	IP_FW_IPOPT_LSRR	0x01
549#define	IP_FW_IPOPT_SSRR	0x02
550#define	IP_FW_IPOPT_RR		0x04
551#define	IP_FW_IPOPT_TS		0x08
552
553/*
554 * Definitions for TCP option names.
555 */
556#define	IP_FW_TCPOPT_MSS	0x01
557#define	IP_FW_TCPOPT_WINDOW	0x02
558#define	IP_FW_TCPOPT_SACK	0x04
559#define	IP_FW_TCPOPT_TS		0x08
560#define	IP_FW_TCPOPT_CC		0x10
561
562#define	ICMP_REJECT_RST		0x100	/* fake ICMP code (send a TCP RST) */
563#define	ICMP6_UNREACH_RST	0x100	/* fake ICMPv6 code (send a TCP RST) */
564
565/*
566 * These are used for lookup tables.
567 */
568typedef struct	_ipfw_table_entry {
569	in_addr_t	addr;		/* network address		*/
570	u_int32_t	value;		/* value			*/
571	u_int16_t	tbl;		/* table number			*/
572	u_int8_t	masklen;	/* mask length			*/
573} ipfw_table_entry;
574
575typedef struct	_ipfw_table {
576	u_int32_t	size;		/* size of entries in bytes	*/
577	u_int32_t	cnt;		/* # of entries			*/
578	u_int16_t	tbl;		/* table number			*/
579	ipfw_table_entry ent[0];	/* entries			*/
580} ipfw_table;
581
582#endif /* _IPFW2_H */
583