Deleted Added
full compact
ip_fw2.c (222582) ip_fw2.c (222748)
1/*-
2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw2.c 222582 2011-06-01 19:44:52Z ae $");
27__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw2.c 222748 2011-06-06 12:55:02Z rwatson $");
28
29/*
30 * The FreeBSD IP packet firewall, main file
31 */
32
33#if !defined(KLD_MODULE)
34#include "opt_ipfw.h"
35#include "opt_ipdivert.h"
36#include "opt_ipdn.h"
37#include "opt_inet.h"
38#ifndef INET
39#error IPFIREWALL requires INET.
40#endif /* INET */
41#endif
42#include "opt_inet6.h"
43#include "opt_ipsec.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/condvar.h>
48#include <sys/eventhandler.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/kernel.h>
52#include <sys/lock.h>
53#include <sys/jail.h>
54#include <sys/module.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/rwlock.h>
58#include <sys/socket.h>
59#include <sys/socketvar.h>
60#include <sys/sysctl.h>
61#include <sys/syslog.h>
62#include <sys/ucred.h>
63#include <net/ethernet.h> /* for ETHERTYPE_IP */
64#include <net/if.h>
65#include <net/route.h>
66#include <net/pf_mtag.h>
67#include <net/vnet.h>
68
69#include <netinet/in.h>
70#include <netinet/in_var.h>
71#include <netinet/in_pcb.h>
72#include <netinet/ip.h>
73#include <netinet/ip_var.h>
74#include <netinet/ip_icmp.h>
75#include <netinet/ip_fw.h>
76#include <netinet/ipfw/ip_fw_private.h>
77#include <netinet/ip_carp.h>
78#include <netinet/pim.h>
79#include <netinet/tcp_var.h>
80#include <netinet/udp.h>
81#include <netinet/udp_var.h>
82#include <netinet/sctp.h>
83
84#include <netinet/ip6.h>
85#include <netinet/icmp6.h>
86#ifdef INET6
87#include <netinet6/scope6_var.h>
88#include <netinet6/ip6_var.h>
89#endif
90
91#include <machine/in_cksum.h> /* XXX for in_cksum */
92
93#ifdef MAC
94#include <security/mac/mac_framework.h>
95#endif
96
97/*
98 * static variables followed by global ones.
99 * All ipfw global variables are here.
100 */
101
102/* ipfw_vnet_ready controls when we are open for business */
103static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
104#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
105
106static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
107#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs)
108
109#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
110static int default_to_accept = 1;
111#else
112static int default_to_accept;
113#endif
114
115VNET_DEFINE(int, autoinc_step);
116VNET_DEFINE(int, fw_one_pass) = 1;
117
118/*
119 * Each rule belongs to one of 32 different sets (0..31).
120 * The variable set_disable contains one bit per set.
121 * If the bit is set, all rules in the corresponding set
122 * are disabled. Set RESVD_SET(31) is reserved for the default rule
123 * and rules that are not deleted by the flush command,
124 * and CANNOT be disabled.
125 * Rules in set RESVD_SET can only be deleted individually.
126 */
127VNET_DEFINE(u_int32_t, set_disable);
128#define V_set_disable VNET(set_disable)
129
130VNET_DEFINE(int, fw_verbose);
131/* counter for ipfw_log(NULL...) */
132VNET_DEFINE(u_int64_t, norule_counter);
133VNET_DEFINE(int, verbose_limit);
134
135/* layer3_chain contains the list of rules for layer 3 */
136VNET_DEFINE(struct ip_fw_chain, layer3_chain);
137
138ipfw_nat_t *ipfw_nat_ptr = NULL;
139struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
140ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
141ipfw_nat_cfg_t *ipfw_nat_del_ptr;
142ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
143ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
144
145#ifdef SYSCTL_NODE
146uint32_t dummy_def = IPFW_DEFAULT_RULE;
147uint32_t dummy_tables_max = IPFW_TABLES_MAX;
148
149SYSBEGIN(f3)
150
151SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
152SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
153 CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
154 "Only do a single pass through ipfw when using dummynet(4)");
155SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
156 CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
157 "Rule number auto-increment step");
158SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
159 CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
160 "Log matches to ipfw rules");
161SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
162 CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
163 "Set upper limit of matches of ipfw rules logged");
164SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
165 &dummy_def, 0,
166 "The default/max possible rule number.");
167SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD,
168 &dummy_tables_max, 0,
169 "The maximum number of tables.");
170SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
171 &default_to_accept, 0,
172 "Make the default rule accept all packets.");
173TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
174SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
175 CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
176 "Number of static rules");
177
178#ifdef INET6
179SYSCTL_DECL(_net_inet6_ip6);
180SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
181SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
182 CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
183 "Deny packets with unknown IPv6 Extension Headers");
184#endif /* INET6 */
185
186SYSEND
187
188#endif /* SYSCTL_NODE */
189
190
191/*
192 * Some macros used in the various matching options.
193 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
194 * Other macros just cast void * into the appropriate type
195 */
196#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
197#define TCP(p) ((struct tcphdr *)(p))
198#define SCTP(p) ((struct sctphdr *)(p))
199#define UDP(p) ((struct udphdr *)(p))
200#define ICMP(p) ((struct icmphdr *)(p))
201#define ICMP6(p) ((struct icmp6_hdr *)(p))
202
203static __inline int
204icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
205{
206 int type = icmp->icmp_type;
207
208 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
209}
210
211#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
212 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
213
214static int
215is_icmp_query(struct icmphdr *icmp)
216{
217 int type = icmp->icmp_type;
218
219 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
220}
221#undef TT
222
223/*
224 * The following checks use two arrays of 8 or 16 bits to store the
225 * bits that we want set or clear, respectively. They are in the
226 * low and high half of cmd->arg1 or cmd->d[0].
227 *
228 * We scan options and store the bits we find set. We succeed if
229 *
230 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
231 *
232 * The code is sometimes optimized not to store additional variables.
233 */
234
235static int
236flags_match(ipfw_insn *cmd, u_int8_t bits)
237{
238 u_char want_clear;
239 bits = ~bits;
240
241 if ( ((cmd->arg1 & 0xff) & bits) != 0)
242 return 0; /* some bits we want set were clear */
243 want_clear = (cmd->arg1 >> 8) & 0xff;
244 if ( (want_clear & bits) != want_clear)
245 return 0; /* some bits we want clear were set */
246 return 1;
247}
248
249static int
250ipopts_match(struct ip *ip, ipfw_insn *cmd)
251{
252 int optlen, bits = 0;
253 u_char *cp = (u_char *)(ip + 1);
254 int x = (ip->ip_hl << 2) - sizeof (struct ip);
255
256 for (; x > 0; x -= optlen, cp += optlen) {
257 int opt = cp[IPOPT_OPTVAL];
258
259 if (opt == IPOPT_EOL)
260 break;
261 if (opt == IPOPT_NOP)
262 optlen = 1;
263 else {
264 optlen = cp[IPOPT_OLEN];
265 if (optlen <= 0 || optlen > x)
266 return 0; /* invalid or truncated */
267 }
268 switch (opt) {
269
270 default:
271 break;
272
273 case IPOPT_LSRR:
274 bits |= IP_FW_IPOPT_LSRR;
275 break;
276
277 case IPOPT_SSRR:
278 bits |= IP_FW_IPOPT_SSRR;
279 break;
280
281 case IPOPT_RR:
282 bits |= IP_FW_IPOPT_RR;
283 break;
284
285 case IPOPT_TS:
286 bits |= IP_FW_IPOPT_TS;
287 break;
288 }
289 }
290 return (flags_match(cmd, bits));
291}
292
293static int
294tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
295{
296 int optlen, bits = 0;
297 u_char *cp = (u_char *)(tcp + 1);
298 int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
299
300 for (; x > 0; x -= optlen, cp += optlen) {
301 int opt = cp[0];
302 if (opt == TCPOPT_EOL)
303 break;
304 if (opt == TCPOPT_NOP)
305 optlen = 1;
306 else {
307 optlen = cp[1];
308 if (optlen <= 0)
309 break;
310 }
311
312 switch (opt) {
313
314 default:
315 break;
316
317 case TCPOPT_MAXSEG:
318 bits |= IP_FW_TCPOPT_MSS;
319 break;
320
321 case TCPOPT_WINDOW:
322 bits |= IP_FW_TCPOPT_WINDOW;
323 break;
324
325 case TCPOPT_SACK_PERMITTED:
326 case TCPOPT_SACK:
327 bits |= IP_FW_TCPOPT_SACK;
328 break;
329
330 case TCPOPT_TIMESTAMP:
331 bits |= IP_FW_TCPOPT_TS;
332 break;
333
334 }
335 }
336 return (flags_match(cmd, bits));
337}
338
339static int
340iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
341{
342 if (ifp == NULL) /* no iface with this packet, match fails */
343 return 0;
344 /* Check by name or by IP address */
345 if (cmd->name[0] != '\0') { /* match by name */
346 /* Check name */
347 if (cmd->p.glob) {
348 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
349 return(1);
350 } else {
351 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
352 return(1);
353 }
354 } else {
355#ifdef __FreeBSD__ /* and OSX too ? */
356 struct ifaddr *ia;
357
358 if_addr_rlock(ifp);
359 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
360 if (ia->ifa_addr->sa_family != AF_INET)
361 continue;
362 if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
363 (ia->ifa_addr))->sin_addr.s_addr) {
364 if_addr_runlock(ifp);
365 return(1); /* match */
366 }
367 }
368 if_addr_runlock(ifp);
369#endif /* __FreeBSD__ */
370 }
371 return(0); /* no match, fail ... */
372}
373
374/*
375 * The verify_path function checks if a route to the src exists and
376 * if it is reachable via ifp (when provided).
377 *
378 * The 'verrevpath' option checks that the interface that an IP packet
379 * arrives on is the same interface that traffic destined for the
380 * packet's source address would be routed out of.
381 * The 'versrcreach' option just checks that the source address is
382 * reachable via any route (except default) in the routing table.
383 * These two are a measure to block forged packets. This is also
384 * commonly known as "anti-spoofing" or Unicast Reverse Path
385 * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
386 * is purposely reminiscent of the Cisco IOS command,
387 *
388 * ip verify unicast reverse-path
389 * ip verify unicast source reachable-via any
390 *
391 * which implements the same functionality. But note that the syntax
392 * is misleading, and the check may be performed on all IP packets
393 * whether unicast, multicast, or broadcast.
394 */
395static int
396verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
397{
398#ifndef __FreeBSD__
399 return 0;
400#else
401 struct route ro;
402 struct sockaddr_in *dst;
403
404 bzero(&ro, sizeof(ro));
405
406 dst = (struct sockaddr_in *)&(ro.ro_dst);
407 dst->sin_family = AF_INET;
408 dst->sin_len = sizeof(*dst);
409 dst->sin_addr = src;
410 in_rtalloc_ign(&ro, 0, fib);
411
412 if (ro.ro_rt == NULL)
413 return 0;
414
415 /*
416 * If ifp is provided, check for equality with rtentry.
417 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
418 * in order to pass packets injected back by if_simloop():
419 * if useloopback == 1 routing entry (via lo0) for our own address
420 * may exist, so we need to handle routing assymetry.
421 */
422 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
423 RTFREE(ro.ro_rt);
424 return 0;
425 }
426
427 /* if no ifp provided, check if rtentry is not default route */
428 if (ifp == NULL &&
429 satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
430 RTFREE(ro.ro_rt);
431 return 0;
432 }
433
434 /* or if this is a blackhole/reject route */
435 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
436 RTFREE(ro.ro_rt);
437 return 0;
438 }
439
440 /* found valid route */
441 RTFREE(ro.ro_rt);
442 return 1;
443#endif /* __FreeBSD__ */
444}
445
446#ifdef INET6
447/*
448 * ipv6 specific rules here...
449 */
450static __inline int
451icmp6type_match (int type, ipfw_insn_u32 *cmd)
452{
453 return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
454}
455
456static int
457flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
458{
459 int i;
460 for (i=0; i <= cmd->o.arg1; ++i )
461 if (curr_flow == cmd->d[i] )
462 return 1;
463 return 0;
464}
465
466/* support for IP6_*_ME opcodes */
467static int
468search_ip6_addr_net (struct in6_addr * ip6_addr)
469{
470 struct ifnet *mdc;
471 struct ifaddr *mdc2;
472 struct in6_ifaddr *fdm;
473 struct in6_addr copia;
474
475 TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
476 if_addr_rlock(mdc);
477 TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
478 if (mdc2->ifa_addr->sa_family == AF_INET6) {
479 fdm = (struct in6_ifaddr *)mdc2;
480 copia = fdm->ia_addr.sin6_addr;
481 /* need for leaving scope_id in the sock_addr */
482 in6_clearscope(&copia);
483 if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
484 if_addr_runlock(mdc);
485 return 1;
486 }
487 }
488 }
489 if_addr_runlock(mdc);
490 }
491 return 0;
492}
493
494static int
495verify_path6(struct in6_addr *src, struct ifnet *ifp)
496{
497 struct route_in6 ro;
498 struct sockaddr_in6 *dst;
499
500 bzero(&ro, sizeof(ro));
501
502 dst = (struct sockaddr_in6 * )&(ro.ro_dst);
503 dst->sin6_family = AF_INET6;
504 dst->sin6_len = sizeof(*dst);
505 dst->sin6_addr = *src;
506 /* XXX MRT 0 for ipv6 at this time */
507 rtalloc_ign((struct route *)&ro, 0);
508
509 if (ro.ro_rt == NULL)
510 return 0;
511
512 /*
513 * if ifp is provided, check for equality with rtentry
514 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
515 * to support the case of sending packets to an address of our own.
516 * (where the former interface is the first argument of if_simloop()
517 * (=ifp), the latter is lo0)
518 */
519 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
520 RTFREE(ro.ro_rt);
521 return 0;
522 }
523
524 /* if no ifp provided, check if rtentry is not default route */
525 if (ifp == NULL &&
526 IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
527 RTFREE(ro.ro_rt);
528 return 0;
529 }
530
531 /* or if this is a blackhole/reject route */
532 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
533 RTFREE(ro.ro_rt);
534 return 0;
535 }
536
537 /* found valid route */
538 RTFREE(ro.ro_rt);
539 return 1;
540
541}
542
543static int
544is_icmp6_query(int icmp6_type)
545{
546 if ((icmp6_type <= ICMP6_MAXTYPE) &&
547 (icmp6_type == ICMP6_ECHO_REQUEST ||
548 icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
549 icmp6_type == ICMP6_WRUREQUEST ||
550 icmp6_type == ICMP6_FQDN_QUERY ||
551 icmp6_type == ICMP6_NI_QUERY))
552 return (1);
553
554 return (0);
555}
556
557static void
558send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
559{
560 struct mbuf *m;
561
562 m = args->m;
563 if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
564 struct tcphdr *tcp;
565 tcp = (struct tcphdr *)((char *)ip6 + hlen);
566
567 if ((tcp->th_flags & TH_RST) == 0) {
568 struct mbuf *m0;
569 m0 = ipfw_send_pkt(args->m, &(args->f_id),
570 ntohl(tcp->th_seq), ntohl(tcp->th_ack),
571 tcp->th_flags | TH_RST);
572 if (m0 != NULL)
573 ip6_output(m0, NULL, NULL, 0, NULL, NULL,
574 NULL);
575 }
576 FREE_PKT(m);
577 } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
578#if 0
579 /*
580 * Unlike above, the mbufs need to line up with the ip6 hdr,
581 * as the contents are read. We need to m_adj() the
582 * needed amount.
583 * The mbuf will however be thrown away so we can adjust it.
584 * Remember we did an m_pullup on it already so we
585 * can make some assumptions about contiguousness.
586 */
587 if (args->L3offset)
588 m_adj(m, args->L3offset);
589#endif
590 icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
591 } else
592 FREE_PKT(m);
593
594 args->m = NULL;
595}
596
597#endif /* INET6 */
598
599
600/*
601 * sends a reject message, consuming the mbuf passed as an argument.
602 */
603static void
604send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
605{
606
607#if 0
608 /* XXX When ip is not guaranteed to be at mtod() we will
609 * need to account for this */
610 * The mbuf will however be thrown away so we can adjust it.
611 * Remember we did an m_pullup on it already so we
612 * can make some assumptions about contiguousness.
613 */
614 if (args->L3offset)
615 m_adj(m, args->L3offset);
616#endif
617 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
618 /* We need the IP header in host order for icmp_error(). */
619 SET_HOST_IPLEN(ip);
620 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
621 } else if (args->f_id.proto == IPPROTO_TCP) {
622 struct tcphdr *const tcp =
623 L3HDR(struct tcphdr, mtod(args->m, struct ip *));
624 if ( (tcp->th_flags & TH_RST) == 0) {
625 struct mbuf *m;
626 m = ipfw_send_pkt(args->m, &(args->f_id),
627 ntohl(tcp->th_seq), ntohl(tcp->th_ack),
628 tcp->th_flags | TH_RST);
629 if (m != NULL)
630 ip_output(m, NULL, NULL, 0, NULL, NULL);
631 }
632 FREE_PKT(args->m);
633 } else
634 FREE_PKT(args->m);
635 args->m = NULL;
636}
637
638/*
639 * Support for uid/gid/jail lookup. These tests are expensive
640 * (because we may need to look into the list of active sockets)
641 * so we cache the results. ugid_lookupp is 0 if we have not
642 * yet done a lookup, 1 if we succeeded, and -1 if we tried
643 * and failed. The function always returns the match value.
644 * We could actually spare the variable and use *uc, setting
645 * it to '(void *)check_uidgid if we have no info, NULL if
646 * we tried and failed, or any other value if successful.
647 */
648static int
649check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
650 struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
651 u_int16_t src_port, int *ugid_lookupp,
652 struct ucred **uc, struct inpcb *inp)
653{
654#ifndef __FreeBSD__
655 return cred_check(insn, proto, oif,
656 dst_ip, dst_port, src_ip, src_port,
657 (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
658#else /* FreeBSD */
659 struct inpcbinfo *pi;
660 int lookupflags;
661 struct inpcb *pcb;
662 int match;
663
664 /*
665 * Check to see if the UDP or TCP stack supplied us with
666 * the PCB. If so, rather then holding a lock and looking
667 * up the PCB, we can use the one that was supplied.
668 */
669 if (inp && *ugid_lookupp == 0) {
670 INP_LOCK_ASSERT(inp);
671 if (inp->inp_socket != NULL) {
672 *uc = crhold(inp->inp_cred);
673 *ugid_lookupp = 1;
674 } else
675 *ugid_lookupp = -1;
676 }
677 /*
678 * If we have already been here and the packet has no
679 * PCB entry associated with it, then we can safely
680 * assume that this is a no match.
681 */
682 if (*ugid_lookupp == -1)
683 return (0);
684 if (proto == IPPROTO_TCP) {
685 lookupflags = 0;
686 pi = &V_tcbinfo;
687 } else if (proto == IPPROTO_UDP) {
688 lookupflags = INPLOOKUP_WILDCARD;
689 pi = &V_udbinfo;
690 } else
691 return 0;
692 lookupflags |= INPLOOKUP_RLOCKPCB;
693 match = 0;
694 if (*ugid_lookupp == 0) {
28
29/*
30 * The FreeBSD IP packet firewall, main file
31 */
32
33#if !defined(KLD_MODULE)
34#include "opt_ipfw.h"
35#include "opt_ipdivert.h"
36#include "opt_ipdn.h"
37#include "opt_inet.h"
38#ifndef INET
39#error IPFIREWALL requires INET.
40#endif /* INET */
41#endif
42#include "opt_inet6.h"
43#include "opt_ipsec.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/condvar.h>
48#include <sys/eventhandler.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/kernel.h>
52#include <sys/lock.h>
53#include <sys/jail.h>
54#include <sys/module.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/rwlock.h>
58#include <sys/socket.h>
59#include <sys/socketvar.h>
60#include <sys/sysctl.h>
61#include <sys/syslog.h>
62#include <sys/ucred.h>
63#include <net/ethernet.h> /* for ETHERTYPE_IP */
64#include <net/if.h>
65#include <net/route.h>
66#include <net/pf_mtag.h>
67#include <net/vnet.h>
68
69#include <netinet/in.h>
70#include <netinet/in_var.h>
71#include <netinet/in_pcb.h>
72#include <netinet/ip.h>
73#include <netinet/ip_var.h>
74#include <netinet/ip_icmp.h>
75#include <netinet/ip_fw.h>
76#include <netinet/ipfw/ip_fw_private.h>
77#include <netinet/ip_carp.h>
78#include <netinet/pim.h>
79#include <netinet/tcp_var.h>
80#include <netinet/udp.h>
81#include <netinet/udp_var.h>
82#include <netinet/sctp.h>
83
84#include <netinet/ip6.h>
85#include <netinet/icmp6.h>
86#ifdef INET6
87#include <netinet6/scope6_var.h>
88#include <netinet6/ip6_var.h>
89#endif
90
91#include <machine/in_cksum.h> /* XXX for in_cksum */
92
93#ifdef MAC
94#include <security/mac/mac_framework.h>
95#endif
96
97/*
98 * static variables followed by global ones.
99 * All ipfw global variables are here.
100 */
101
102/* ipfw_vnet_ready controls when we are open for business */
103static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
104#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
105
106static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
107#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs)
108
109#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
110static int default_to_accept = 1;
111#else
112static int default_to_accept;
113#endif
114
115VNET_DEFINE(int, autoinc_step);
116VNET_DEFINE(int, fw_one_pass) = 1;
117
118/*
119 * Each rule belongs to one of 32 different sets (0..31).
120 * The variable set_disable contains one bit per set.
121 * If the bit is set, all rules in the corresponding set
122 * are disabled. Set RESVD_SET(31) is reserved for the default rule
123 * and rules that are not deleted by the flush command,
124 * and CANNOT be disabled.
125 * Rules in set RESVD_SET can only be deleted individually.
126 */
127VNET_DEFINE(u_int32_t, set_disable);
128#define V_set_disable VNET(set_disable)
129
130VNET_DEFINE(int, fw_verbose);
131/* counter for ipfw_log(NULL...) */
132VNET_DEFINE(u_int64_t, norule_counter);
133VNET_DEFINE(int, verbose_limit);
134
135/* layer3_chain contains the list of rules for layer 3 */
136VNET_DEFINE(struct ip_fw_chain, layer3_chain);
137
138ipfw_nat_t *ipfw_nat_ptr = NULL;
139struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
140ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
141ipfw_nat_cfg_t *ipfw_nat_del_ptr;
142ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
143ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
144
145#ifdef SYSCTL_NODE
146uint32_t dummy_def = IPFW_DEFAULT_RULE;
147uint32_t dummy_tables_max = IPFW_TABLES_MAX;
148
149SYSBEGIN(f3)
150
151SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
152SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
153 CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
154 "Only do a single pass through ipfw when using dummynet(4)");
155SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
156 CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
157 "Rule number auto-increment step");
158SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
159 CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
160 "Log matches to ipfw rules");
161SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
162 CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
163 "Set upper limit of matches of ipfw rules logged");
164SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
165 &dummy_def, 0,
166 "The default/max possible rule number.");
167SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD,
168 &dummy_tables_max, 0,
169 "The maximum number of tables.");
170SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
171 &default_to_accept, 0,
172 "Make the default rule accept all packets.");
173TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
174SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
175 CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
176 "Number of static rules");
177
178#ifdef INET6
179SYSCTL_DECL(_net_inet6_ip6);
180SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
181SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
182 CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
183 "Deny packets with unknown IPv6 Extension Headers");
184#endif /* INET6 */
185
186SYSEND
187
188#endif /* SYSCTL_NODE */
189
190
191/*
192 * Some macros used in the various matching options.
193 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
194 * Other macros just cast void * into the appropriate type
195 */
196#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
197#define TCP(p) ((struct tcphdr *)(p))
198#define SCTP(p) ((struct sctphdr *)(p))
199#define UDP(p) ((struct udphdr *)(p))
200#define ICMP(p) ((struct icmphdr *)(p))
201#define ICMP6(p) ((struct icmp6_hdr *)(p))
202
203static __inline int
204icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
205{
206 int type = icmp->icmp_type;
207
208 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
209}
210
211#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
212 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
213
214static int
215is_icmp_query(struct icmphdr *icmp)
216{
217 int type = icmp->icmp_type;
218
219 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
220}
221#undef TT
222
223/*
224 * The following checks use two arrays of 8 or 16 bits to store the
225 * bits that we want set or clear, respectively. They are in the
226 * low and high half of cmd->arg1 or cmd->d[0].
227 *
228 * We scan options and store the bits we find set. We succeed if
229 *
230 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
231 *
232 * The code is sometimes optimized not to store additional variables.
233 */
234
235static int
236flags_match(ipfw_insn *cmd, u_int8_t bits)
237{
238 u_char want_clear;
239 bits = ~bits;
240
241 if ( ((cmd->arg1 & 0xff) & bits) != 0)
242 return 0; /* some bits we want set were clear */
243 want_clear = (cmd->arg1 >> 8) & 0xff;
244 if ( (want_clear & bits) != want_clear)
245 return 0; /* some bits we want clear were set */
246 return 1;
247}
248
249static int
250ipopts_match(struct ip *ip, ipfw_insn *cmd)
251{
252 int optlen, bits = 0;
253 u_char *cp = (u_char *)(ip + 1);
254 int x = (ip->ip_hl << 2) - sizeof (struct ip);
255
256 for (; x > 0; x -= optlen, cp += optlen) {
257 int opt = cp[IPOPT_OPTVAL];
258
259 if (opt == IPOPT_EOL)
260 break;
261 if (opt == IPOPT_NOP)
262 optlen = 1;
263 else {
264 optlen = cp[IPOPT_OLEN];
265 if (optlen <= 0 || optlen > x)
266 return 0; /* invalid or truncated */
267 }
268 switch (opt) {
269
270 default:
271 break;
272
273 case IPOPT_LSRR:
274 bits |= IP_FW_IPOPT_LSRR;
275 break;
276
277 case IPOPT_SSRR:
278 bits |= IP_FW_IPOPT_SSRR;
279 break;
280
281 case IPOPT_RR:
282 bits |= IP_FW_IPOPT_RR;
283 break;
284
285 case IPOPT_TS:
286 bits |= IP_FW_IPOPT_TS;
287 break;
288 }
289 }
290 return (flags_match(cmd, bits));
291}
292
293static int
294tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
295{
296 int optlen, bits = 0;
297 u_char *cp = (u_char *)(tcp + 1);
298 int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
299
300 for (; x > 0; x -= optlen, cp += optlen) {
301 int opt = cp[0];
302 if (opt == TCPOPT_EOL)
303 break;
304 if (opt == TCPOPT_NOP)
305 optlen = 1;
306 else {
307 optlen = cp[1];
308 if (optlen <= 0)
309 break;
310 }
311
312 switch (opt) {
313
314 default:
315 break;
316
317 case TCPOPT_MAXSEG:
318 bits |= IP_FW_TCPOPT_MSS;
319 break;
320
321 case TCPOPT_WINDOW:
322 bits |= IP_FW_TCPOPT_WINDOW;
323 break;
324
325 case TCPOPT_SACK_PERMITTED:
326 case TCPOPT_SACK:
327 bits |= IP_FW_TCPOPT_SACK;
328 break;
329
330 case TCPOPT_TIMESTAMP:
331 bits |= IP_FW_TCPOPT_TS;
332 break;
333
334 }
335 }
336 return (flags_match(cmd, bits));
337}
338
339static int
340iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
341{
342 if (ifp == NULL) /* no iface with this packet, match fails */
343 return 0;
344 /* Check by name or by IP address */
345 if (cmd->name[0] != '\0') { /* match by name */
346 /* Check name */
347 if (cmd->p.glob) {
348 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
349 return(1);
350 } else {
351 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
352 return(1);
353 }
354 } else {
355#ifdef __FreeBSD__ /* and OSX too ? */
356 struct ifaddr *ia;
357
358 if_addr_rlock(ifp);
359 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
360 if (ia->ifa_addr->sa_family != AF_INET)
361 continue;
362 if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
363 (ia->ifa_addr))->sin_addr.s_addr) {
364 if_addr_runlock(ifp);
365 return(1); /* match */
366 }
367 }
368 if_addr_runlock(ifp);
369#endif /* __FreeBSD__ */
370 }
371 return(0); /* no match, fail ... */
372}
373
374/*
375 * The verify_path function checks if a route to the src exists and
376 * if it is reachable via ifp (when provided).
377 *
378 * The 'verrevpath' option checks that the interface that an IP packet
379 * arrives on is the same interface that traffic destined for the
380 * packet's source address would be routed out of.
381 * The 'versrcreach' option just checks that the source address is
382 * reachable via any route (except default) in the routing table.
383 * These two are a measure to block forged packets. This is also
384 * commonly known as "anti-spoofing" or Unicast Reverse Path
385 * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
386 * is purposely reminiscent of the Cisco IOS command,
387 *
388 * ip verify unicast reverse-path
389 * ip verify unicast source reachable-via any
390 *
391 * which implements the same functionality. But note that the syntax
392 * is misleading, and the check may be performed on all IP packets
393 * whether unicast, multicast, or broadcast.
394 */
395static int
396verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
397{
398#ifndef __FreeBSD__
399 return 0;
400#else
401 struct route ro;
402 struct sockaddr_in *dst;
403
404 bzero(&ro, sizeof(ro));
405
406 dst = (struct sockaddr_in *)&(ro.ro_dst);
407 dst->sin_family = AF_INET;
408 dst->sin_len = sizeof(*dst);
409 dst->sin_addr = src;
410 in_rtalloc_ign(&ro, 0, fib);
411
412 if (ro.ro_rt == NULL)
413 return 0;
414
415 /*
416 * If ifp is provided, check for equality with rtentry.
417 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
418 * in order to pass packets injected back by if_simloop():
419 * if useloopback == 1 routing entry (via lo0) for our own address
420 * may exist, so we need to handle routing assymetry.
421 */
422 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
423 RTFREE(ro.ro_rt);
424 return 0;
425 }
426
427 /* if no ifp provided, check if rtentry is not default route */
428 if (ifp == NULL &&
429 satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
430 RTFREE(ro.ro_rt);
431 return 0;
432 }
433
434 /* or if this is a blackhole/reject route */
435 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
436 RTFREE(ro.ro_rt);
437 return 0;
438 }
439
440 /* found valid route */
441 RTFREE(ro.ro_rt);
442 return 1;
443#endif /* __FreeBSD__ */
444}
445
446#ifdef INET6
447/*
448 * ipv6 specific rules here...
449 */
450static __inline int
451icmp6type_match (int type, ipfw_insn_u32 *cmd)
452{
453 return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
454}
455
456static int
457flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
458{
459 int i;
460 for (i=0; i <= cmd->o.arg1; ++i )
461 if (curr_flow == cmd->d[i] )
462 return 1;
463 return 0;
464}
465
466/* support for IP6_*_ME opcodes */
467static int
468search_ip6_addr_net (struct in6_addr * ip6_addr)
469{
470 struct ifnet *mdc;
471 struct ifaddr *mdc2;
472 struct in6_ifaddr *fdm;
473 struct in6_addr copia;
474
475 TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
476 if_addr_rlock(mdc);
477 TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
478 if (mdc2->ifa_addr->sa_family == AF_INET6) {
479 fdm = (struct in6_ifaddr *)mdc2;
480 copia = fdm->ia_addr.sin6_addr;
481 /* need for leaving scope_id in the sock_addr */
482 in6_clearscope(&copia);
483 if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
484 if_addr_runlock(mdc);
485 return 1;
486 }
487 }
488 }
489 if_addr_runlock(mdc);
490 }
491 return 0;
492}
493
494static int
495verify_path6(struct in6_addr *src, struct ifnet *ifp)
496{
497 struct route_in6 ro;
498 struct sockaddr_in6 *dst;
499
500 bzero(&ro, sizeof(ro));
501
502 dst = (struct sockaddr_in6 * )&(ro.ro_dst);
503 dst->sin6_family = AF_INET6;
504 dst->sin6_len = sizeof(*dst);
505 dst->sin6_addr = *src;
506 /* XXX MRT 0 for ipv6 at this time */
507 rtalloc_ign((struct route *)&ro, 0);
508
509 if (ro.ro_rt == NULL)
510 return 0;
511
512 /*
513 * if ifp is provided, check for equality with rtentry
514 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
515 * to support the case of sending packets to an address of our own.
516 * (where the former interface is the first argument of if_simloop()
517 * (=ifp), the latter is lo0)
518 */
519 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
520 RTFREE(ro.ro_rt);
521 return 0;
522 }
523
524 /* if no ifp provided, check if rtentry is not default route */
525 if (ifp == NULL &&
526 IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
527 RTFREE(ro.ro_rt);
528 return 0;
529 }
530
531 /* or if this is a blackhole/reject route */
532 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
533 RTFREE(ro.ro_rt);
534 return 0;
535 }
536
537 /* found valid route */
538 RTFREE(ro.ro_rt);
539 return 1;
540
541}
542
543static int
544is_icmp6_query(int icmp6_type)
545{
546 if ((icmp6_type <= ICMP6_MAXTYPE) &&
547 (icmp6_type == ICMP6_ECHO_REQUEST ||
548 icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
549 icmp6_type == ICMP6_WRUREQUEST ||
550 icmp6_type == ICMP6_FQDN_QUERY ||
551 icmp6_type == ICMP6_NI_QUERY))
552 return (1);
553
554 return (0);
555}
556
557static void
558send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
559{
560 struct mbuf *m;
561
562 m = args->m;
563 if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
564 struct tcphdr *tcp;
565 tcp = (struct tcphdr *)((char *)ip6 + hlen);
566
567 if ((tcp->th_flags & TH_RST) == 0) {
568 struct mbuf *m0;
569 m0 = ipfw_send_pkt(args->m, &(args->f_id),
570 ntohl(tcp->th_seq), ntohl(tcp->th_ack),
571 tcp->th_flags | TH_RST);
572 if (m0 != NULL)
573 ip6_output(m0, NULL, NULL, 0, NULL, NULL,
574 NULL);
575 }
576 FREE_PKT(m);
577 } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
578#if 0
579 /*
580 * Unlike above, the mbufs need to line up with the ip6 hdr,
581 * as the contents are read. We need to m_adj() the
582 * needed amount.
583 * The mbuf will however be thrown away so we can adjust it.
584 * Remember we did an m_pullup on it already so we
585 * can make some assumptions about contiguousness.
586 */
587 if (args->L3offset)
588 m_adj(m, args->L3offset);
589#endif
590 icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
591 } else
592 FREE_PKT(m);
593
594 args->m = NULL;
595}
596
597#endif /* INET6 */
598
599
600/*
601 * sends a reject message, consuming the mbuf passed as an argument.
602 */
603static void
604send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
605{
606
607#if 0
608 /* XXX When ip is not guaranteed to be at mtod() we will
609 * need to account for this */
610 * The mbuf will however be thrown away so we can adjust it.
611 * Remember we did an m_pullup on it already so we
612 * can make some assumptions about contiguousness.
613 */
614 if (args->L3offset)
615 m_adj(m, args->L3offset);
616#endif
617 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
618 /* We need the IP header in host order for icmp_error(). */
619 SET_HOST_IPLEN(ip);
620 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
621 } else if (args->f_id.proto == IPPROTO_TCP) {
622 struct tcphdr *const tcp =
623 L3HDR(struct tcphdr, mtod(args->m, struct ip *));
624 if ( (tcp->th_flags & TH_RST) == 0) {
625 struct mbuf *m;
626 m = ipfw_send_pkt(args->m, &(args->f_id),
627 ntohl(tcp->th_seq), ntohl(tcp->th_ack),
628 tcp->th_flags | TH_RST);
629 if (m != NULL)
630 ip_output(m, NULL, NULL, 0, NULL, NULL);
631 }
632 FREE_PKT(args->m);
633 } else
634 FREE_PKT(args->m);
635 args->m = NULL;
636}
637
638/*
639 * Support for uid/gid/jail lookup. These tests are expensive
640 * (because we may need to look into the list of active sockets)
641 * so we cache the results. ugid_lookupp is 0 if we have not
642 * yet done a lookup, 1 if we succeeded, and -1 if we tried
643 * and failed. The function always returns the match value.
644 * We could actually spare the variable and use *uc, setting
645 * it to '(void *)check_uidgid if we have no info, NULL if
646 * we tried and failed, or any other value if successful.
647 */
648static int
649check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
650 struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
651 u_int16_t src_port, int *ugid_lookupp,
652 struct ucred **uc, struct inpcb *inp)
653{
654#ifndef __FreeBSD__
655 return cred_check(insn, proto, oif,
656 dst_ip, dst_port, src_ip, src_port,
657 (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
658#else /* FreeBSD */
659 struct inpcbinfo *pi;
660 int lookupflags;
661 struct inpcb *pcb;
662 int match;
663
664 /*
665 * Check to see if the UDP or TCP stack supplied us with
666 * the PCB. If so, rather then holding a lock and looking
667 * up the PCB, we can use the one that was supplied.
668 */
669 if (inp && *ugid_lookupp == 0) {
670 INP_LOCK_ASSERT(inp);
671 if (inp->inp_socket != NULL) {
672 *uc = crhold(inp->inp_cred);
673 *ugid_lookupp = 1;
674 } else
675 *ugid_lookupp = -1;
676 }
677 /*
678 * If we have already been here and the packet has no
679 * PCB entry associated with it, then we can safely
680 * assume that this is a no match.
681 */
682 if (*ugid_lookupp == -1)
683 return (0);
684 if (proto == IPPROTO_TCP) {
685 lookupflags = 0;
686 pi = &V_tcbinfo;
687 } else if (proto == IPPROTO_UDP) {
688 lookupflags = INPLOOKUP_WILDCARD;
689 pi = &V_udbinfo;
690 } else
691 return 0;
692 lookupflags |= INPLOOKUP_RLOCKPCB;
693 match = 0;
694 if (*ugid_lookupp == 0) {
695 /*
696 * XXXRW: If we had the mbuf here, could use
697 * in_pcblookup_mbuf().
698 */
695 pcb = (oif) ?
696 in_pcblookup(pi,
697 dst_ip, htons(dst_port),
698 src_ip, htons(src_port),
699 lookupflags, oif) :
700 in_pcblookup(pi,
701 src_ip, htons(src_port),
702 dst_ip, htons(dst_port),
703 lookupflags, NULL);
704 if (pcb != NULL) {
705 INP_RLOCK_ASSERT(pcb);
706 *uc = crhold(pcb->inp_cred);
707 *ugid_lookupp = 1;
708 INP_RUNLOCK(pcb);
709 }
710 if (*ugid_lookupp == 0) {
711 /*
712 * We tried and failed, set the variable to -1
713 * so we will not try again on this packet.
714 */
715 *ugid_lookupp = -1;
716 return (0);
717 }
718 }
719 if (insn->o.opcode == O_UID)
720 match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
721 else if (insn->o.opcode == O_GID)
722 match = groupmember((gid_t)insn->d[0], *uc);
723 else if (insn->o.opcode == O_JAIL)
724 match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
725 return match;
726#endif /* __FreeBSD__ */
727}
728
729/*
730 * Helper function to set args with info on the rule after the matching
731 * one. slot is precise, whereas we guess rule_id as they are
732 * assigned sequentially.
733 */
734static inline void
735set_match(struct ip_fw_args *args, int slot,
736 struct ip_fw_chain *chain)
737{
738 args->rule.chain_id = chain->id;
739 args->rule.slot = slot + 1; /* we use 0 as a marker */
740 args->rule.rule_id = 1 + chain->map[slot]->id;
741 args->rule.rulenum = chain->map[slot]->rulenum;
742}
743
744/*
745 * The main check routine for the firewall.
746 *
747 * All arguments are in args so we can modify them and return them
748 * back to the caller.
749 *
750 * Parameters:
751 *
752 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
753 * Starts with the IP header.
754 * args->eh (in) Mac header if present, NULL for layer3 packet.
755 * args->L3offset Number of bytes bypassed if we came from L2.
756 * e.g. often sizeof(eh) ** NOTYET **
757 * args->oif Outgoing interface, NULL if packet is incoming.
758 * The incoming interface is in the mbuf. (in)
759 * args->divert_rule (in/out)
760 * Skip up to the first rule past this rule number;
761 * upon return, non-zero port number for divert or tee.
762 *
763 * args->rule Pointer to the last matching rule (in/out)
764 * args->next_hop Socket we are forwarding to (out).
765 * args->f_id Addresses grabbed from the packet (out)
766 * args->rule.info a cookie depending on rule action
767 *
768 * Return value:
769 *
770 * IP_FW_PASS the packet must be accepted
771 * IP_FW_DENY the packet must be dropped
772 * IP_FW_DIVERT divert packet, port in m_tag
773 * IP_FW_TEE tee packet, port in m_tag
774 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie
775 * IP_FW_NETGRAPH into netgraph, cookie args->cookie
776 * args->rule contains the matching rule,
777 * args->rule.info has additional information.
778 *
779 */
780int
781ipfw_chk(struct ip_fw_args *args)
782{
783
784 /*
785 * Local variables holding state while processing a packet:
786 *
787 * IMPORTANT NOTE: to speed up the processing of rules, there
788 * are some assumption on the values of the variables, which
789 * are documented here. Should you change them, please check
790 * the implementation of the various instructions to make sure
791 * that they still work.
792 *
793 * args->eh The MAC header. It is non-null for a layer2
794 * packet, it is NULL for a layer-3 packet.
795 * **notyet**
796 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
797 *
798 * m | args->m Pointer to the mbuf, as received from the caller.
799 * It may change if ipfw_chk() does an m_pullup, or if it
800 * consumes the packet because it calls send_reject().
801 * XXX This has to change, so that ipfw_chk() never modifies
802 * or consumes the buffer.
803 * ip is the beginning of the ip(4 or 6) header.
804 * Calculated by adding the L3offset to the start of data.
805 * (Until we start using L3offset, the packet is
806 * supposed to start with the ip header).
807 */
808 struct mbuf *m = args->m;
809 struct ip *ip = mtod(m, struct ip *);
810
811 /*
812 * For rules which contain uid/gid or jail constraints, cache
813 * a copy of the users credentials after the pcb lookup has been
814 * executed. This will speed up the processing of rules with
815 * these types of constraints, as well as decrease contention
816 * on pcb related locks.
817 */
818#ifndef __FreeBSD__
819 struct bsd_ucred ucred_cache;
820#else
821 struct ucred *ucred_cache = NULL;
822#endif
823 int ucred_lookup = 0;
824
825 /*
826 * oif | args->oif If NULL, ipfw_chk has been called on the
827 * inbound path (ether_input, ip_input).
828 * If non-NULL, ipfw_chk has been called on the outbound path
829 * (ether_output, ip_output).
830 */
831 struct ifnet *oif = args->oif;
832
833 int f_pos = 0; /* index of current rule in the array */
834 int retval = 0;
835
836 /*
837 * hlen The length of the IP header.
838 */
839 u_int hlen = 0; /* hlen >0 means we have an IP pkt */
840
841 /*
842 * offset The offset of a fragment. offset != 0 means that
843 * we have a fragment at this offset of an IPv4 packet.
844 * offset == 0 means that (if this is an IPv4 packet)
845 * this is the first or only fragment.
846 * For IPv6 offset == 0 means there is no Fragment Header.
847 * If offset != 0 for IPv6 always use correct mask to
848 * get the correct offset because we add IP6F_MORE_FRAG
849 * to be able to dectect the first fragment which would
850 * otherwise have offset = 0.
851 */
852 u_short offset = 0;
853
854 /*
855 * Local copies of addresses. They are only valid if we have
856 * an IP packet.
857 *
858 * proto The protocol. Set to 0 for non-ip packets,
859 * or to the protocol read from the packet otherwise.
860 * proto != 0 means that we have an IPv4 packet.
861 *
862 * src_port, dst_port port numbers, in HOST format. Only
863 * valid for TCP and UDP packets.
864 *
865 * src_ip, dst_ip ip addresses, in NETWORK format.
866 * Only valid for IPv4 packets.
867 */
868 uint8_t proto;
869 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
870 struct in_addr src_ip, dst_ip; /* NOTE: network format */
871 uint16_t iplen=0;
872 int pktlen;
873 uint16_t etype = 0; /* Host order stored ether type */
874
875 /*
876 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
877 * MATCH_NONE when checked and not matched (q = NULL),
878 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
879 */
880 int dyn_dir = MATCH_UNKNOWN;
881 ipfw_dyn_rule *q = NULL;
882 struct ip_fw_chain *chain = &V_layer3_chain;
883
884 /*
885 * We store in ulp a pointer to the upper layer protocol header.
886 * In the ipv4 case this is easy to determine from the header,
887 * but for ipv6 we might have some additional headers in the middle.
888 * ulp is NULL if not found.
889 */
890 void *ulp = NULL; /* upper layer protocol pointer. */
891
892 /* XXX ipv6 variables */
893 int is_ipv6 = 0;
894 uint8_t icmp6_type = 0;
895 uint16_t ext_hd = 0; /* bits vector for extension header filtering */
896 /* end of ipv6 variables */
897
898 int is_ipv4 = 0;
899
900 int done = 0; /* flag to exit the outer loop */
901
902 if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
903 return (IP_FW_PASS); /* accept */
904
905 dst_ip.s_addr = 0; /* make sure it is initialized */
906 src_ip.s_addr = 0; /* make sure it is initialized */
907 pktlen = m->m_pkthdr.len;
908 args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
909 proto = args->f_id.proto = 0; /* mark f_id invalid */
910 /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
911
912/*
913 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
914 * then it sets p to point at the offset "len" in the mbuf. WARNING: the
915 * pointer might become stale after other pullups (but we never use it
916 * this way).
917 */
918#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T))
919#define PULLUP_LEN(_len, p, T) \
920do { \
921 int x = (_len) + T; \
922 if ((m)->m_len < x) { \
923 args->m = m = m_pullup(m, x); \
924 if (m == NULL) \
925 goto pullup_failed; \
926 } \
927 p = (mtod(m, char *) + (_len)); \
928} while (0)
929
930 /*
931 * if we have an ether header,
932 */
933 if (args->eh)
934 etype = ntohs(args->eh->ether_type);
935
936 /* Identify IP packets and fill up variables. */
937 if (pktlen >= sizeof(struct ip6_hdr) &&
938 (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
939 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
940 is_ipv6 = 1;
941 args->f_id.addr_type = 6;
942 hlen = sizeof(struct ip6_hdr);
943 proto = ip6->ip6_nxt;
944
945 /* Search extension headers to find upper layer protocols */
946 while (ulp == NULL) {
947 switch (proto) {
948 case IPPROTO_ICMPV6:
949 PULLUP_TO(hlen, ulp, struct icmp6_hdr);
950 icmp6_type = ICMP6(ulp)->icmp6_type;
951 break;
952
953 case IPPROTO_TCP:
954 PULLUP_TO(hlen, ulp, struct tcphdr);
955 dst_port = TCP(ulp)->th_dport;
956 src_port = TCP(ulp)->th_sport;
957 /* save flags for dynamic rules */
958 args->f_id._flags = TCP(ulp)->th_flags;
959 break;
960
961 case IPPROTO_SCTP:
962 PULLUP_TO(hlen, ulp, struct sctphdr);
963 src_port = SCTP(ulp)->src_port;
964 dst_port = SCTP(ulp)->dest_port;
965 break;
966
967 case IPPROTO_UDP:
968 PULLUP_TO(hlen, ulp, struct udphdr);
969 dst_port = UDP(ulp)->uh_dport;
970 src_port = UDP(ulp)->uh_sport;
971 break;
972
973 case IPPROTO_HOPOPTS: /* RFC 2460 */
974 PULLUP_TO(hlen, ulp, struct ip6_hbh);
975 ext_hd |= EXT_HOPOPTS;
976 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
977 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
978 ulp = NULL;
979 break;
980
981 case IPPROTO_ROUTING: /* RFC 2460 */
982 PULLUP_TO(hlen, ulp, struct ip6_rthdr);
983 switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
984 case 0:
985 ext_hd |= EXT_RTHDR0;
986 break;
987 case 2:
988 ext_hd |= EXT_RTHDR2;
989 break;
990 default:
991 printf("IPFW2: IPV6 - Unknown Routing "
992 "Header type(%d)\n",
993 ((struct ip6_rthdr *)ulp)->ip6r_type);
994 if (V_fw_deny_unknown_exthdrs)
995 return (IP_FW_DENY);
996 break;
997 }
998 ext_hd |= EXT_ROUTING;
999 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
1000 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
1001 ulp = NULL;
1002 break;
1003
1004 case IPPROTO_FRAGMENT: /* RFC 2460 */
1005 PULLUP_TO(hlen, ulp, struct ip6_frag);
1006 ext_hd |= EXT_FRAGMENT;
1007 hlen += sizeof (struct ip6_frag);
1008 proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
1009 offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
1010 IP6F_OFF_MASK;
1011 /* Add IP6F_MORE_FRAG for offset of first
1012 * fragment to be != 0. */
1013 offset |= ((struct ip6_frag *)ulp)->ip6f_offlg &
1014 IP6F_MORE_FRAG;
1015 if (offset == 0) {
1016 printf("IPFW2: IPV6 - Invalid Fragment "
1017 "Header\n");
1018 if (V_fw_deny_unknown_exthdrs)
1019 return (IP_FW_DENY);
1020 break;
1021 }
1022 args->f_id.extra =
1023 ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
1024 ulp = NULL;
1025 break;
1026
1027 case IPPROTO_DSTOPTS: /* RFC 2460 */
1028 PULLUP_TO(hlen, ulp, struct ip6_hbh);
1029 ext_hd |= EXT_DSTOPTS;
1030 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
1031 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
1032 ulp = NULL;
1033 break;
1034
1035 case IPPROTO_AH: /* RFC 2402 */
1036 PULLUP_TO(hlen, ulp, struct ip6_ext);
1037 ext_hd |= EXT_AH;
1038 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
1039 proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
1040 ulp = NULL;
1041 break;
1042
1043 case IPPROTO_ESP: /* RFC 2406 */
1044 PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */
1045 /* Anything past Seq# is variable length and
1046 * data past this ext. header is encrypted. */
1047 ext_hd |= EXT_ESP;
1048 break;
1049
1050 case IPPROTO_NONE: /* RFC 2460 */
1051 /*
1052 * Packet ends here, and IPv6 header has
1053 * already been pulled up. If ip6e_len!=0
1054 * then octets must be ignored.
1055 */
1056 ulp = ip; /* non-NULL to get out of loop. */
1057 break;
1058
1059 case IPPROTO_OSPFIGP:
1060 /* XXX OSPF header check? */
1061 PULLUP_TO(hlen, ulp, struct ip6_ext);
1062 break;
1063
1064 case IPPROTO_PIM:
1065 /* XXX PIM header check? */
1066 PULLUP_TO(hlen, ulp, struct pim);
1067 break;
1068
1069 case IPPROTO_CARP:
1070 PULLUP_TO(hlen, ulp, struct carp_header);
1071 if (((struct carp_header *)ulp)->carp_version !=
1072 CARP_VERSION)
1073 return (IP_FW_DENY);
1074 if (((struct carp_header *)ulp)->carp_type !=
1075 CARP_ADVERTISEMENT)
1076 return (IP_FW_DENY);
1077 break;
1078
1079 case IPPROTO_IPV6: /* RFC 2893 */
1080 PULLUP_TO(hlen, ulp, struct ip6_hdr);
1081 break;
1082
1083 case IPPROTO_IPV4: /* RFC 2893 */
1084 PULLUP_TO(hlen, ulp, struct ip);
1085 break;
1086
1087 default:
1088 printf("IPFW2: IPV6 - Unknown Extension "
1089 "Header(%d), ext_hd=%x\n", proto, ext_hd);
1090 if (V_fw_deny_unknown_exthdrs)
1091 return (IP_FW_DENY);
1092 PULLUP_TO(hlen, ulp, struct ip6_ext);
1093 break;
1094 } /*switch */
1095 }
1096 ip = mtod(m, struct ip *);
1097 ip6 = (struct ip6_hdr *)ip;
1098 args->f_id.src_ip6 = ip6->ip6_src;
1099 args->f_id.dst_ip6 = ip6->ip6_dst;
1100 args->f_id.src_ip = 0;
1101 args->f_id.dst_ip = 0;
1102 args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
1103 } else if (pktlen >= sizeof(struct ip) &&
1104 (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
1105 is_ipv4 = 1;
1106 hlen = ip->ip_hl << 2;
1107 args->f_id.addr_type = 4;
1108
1109 /*
1110 * Collect parameters into local variables for faster matching.
1111 */
1112 proto = ip->ip_p;
1113 src_ip = ip->ip_src;
1114 dst_ip = ip->ip_dst;
1115 offset = ntohs(ip->ip_off) & IP_OFFMASK;
1116 iplen = ntohs(ip->ip_len);
1117 pktlen = iplen < pktlen ? iplen : pktlen;
1118
1119 if (offset == 0) {
1120 switch (proto) {
1121 case IPPROTO_TCP:
1122 PULLUP_TO(hlen, ulp, struct tcphdr);
1123 dst_port = TCP(ulp)->th_dport;
1124 src_port = TCP(ulp)->th_sport;
1125 /* save flags for dynamic rules */
1126 args->f_id._flags = TCP(ulp)->th_flags;
1127 break;
1128
1129 case IPPROTO_SCTP:
1130 PULLUP_TO(hlen, ulp, struct sctphdr);
1131 src_port = SCTP(ulp)->src_port;
1132 dst_port = SCTP(ulp)->dest_port;
1133 break;
1134
1135 case IPPROTO_UDP:
1136 PULLUP_TO(hlen, ulp, struct udphdr);
1137 dst_port = UDP(ulp)->uh_dport;
1138 src_port = UDP(ulp)->uh_sport;
1139 break;
1140
1141 case IPPROTO_ICMP:
1142 PULLUP_TO(hlen, ulp, struct icmphdr);
1143 //args->f_id.flags = ICMP(ulp)->icmp_type;
1144 break;
1145
1146 default:
1147 break;
1148 }
1149 }
1150
1151 ip = mtod(m, struct ip *);
1152 args->f_id.src_ip = ntohl(src_ip.s_addr);
1153 args->f_id.dst_ip = ntohl(dst_ip.s_addr);
1154 }
1155#undef PULLUP_TO
1156 if (proto) { /* we may have port numbers, store them */
1157 args->f_id.proto = proto;
1158 args->f_id.src_port = src_port = ntohs(src_port);
1159 args->f_id.dst_port = dst_port = ntohs(dst_port);
1160 }
1161
1162 IPFW_RLOCK(chain);
1163 if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
1164 IPFW_RUNLOCK(chain);
1165 return (IP_FW_PASS); /* accept */
1166 }
1167 if (args->rule.slot) {
1168 /*
1169 * Packet has already been tagged as a result of a previous
1170 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
1171 * REASS, NETGRAPH, DIVERT/TEE...)
1172 * Validate the slot and continue from the next one
1173 * if still present, otherwise do a lookup.
1174 */
1175 f_pos = (args->rule.chain_id == chain->id) ?
1176 args->rule.slot :
1177 ipfw_find_rule(chain, args->rule.rulenum,
1178 args->rule.rule_id);
1179 } else {
1180 f_pos = 0;
1181 }
1182
1183 /*
1184 * Now scan the rules, and parse microinstructions for each rule.
1185 * We have two nested loops and an inner switch. Sometimes we
1186 * need to break out of one or both loops, or re-enter one of
1187 * the loops with updated variables. Loop variables are:
1188 *
1189 * f_pos (outer loop) points to the current rule.
1190 * On output it points to the matching rule.
1191 * done (outer loop) is used as a flag to break the loop.
1192 * l (inner loop) residual length of current rule.
1193 * cmd points to the current microinstruction.
1194 *
1195 * We break the inner loop by setting l=0 and possibly
1196 * cmdlen=0 if we don't want to advance cmd.
1197 * We break the outer loop by setting done=1
1198 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
1199 * as needed.
1200 */
1201 for (; f_pos < chain->n_rules; f_pos++) {
1202 ipfw_insn *cmd;
1203 uint32_t tablearg = 0;
1204 int l, cmdlen, skip_or; /* skip rest of OR block */
1205 struct ip_fw *f;
1206
1207 f = chain->map[f_pos];
1208 if (V_set_disable & (1 << f->set) )
1209 continue;
1210
1211 skip_or = 0;
1212 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
1213 l -= cmdlen, cmd += cmdlen) {
1214 int match;
1215
1216 /*
1217 * check_body is a jump target used when we find a
1218 * CHECK_STATE, and need to jump to the body of
1219 * the target rule.
1220 */
1221
1222/* check_body: */
1223 cmdlen = F_LEN(cmd);
1224 /*
1225 * An OR block (insn_1 || .. || insn_n) has the
1226 * F_OR bit set in all but the last instruction.
1227 * The first match will set "skip_or", and cause
1228 * the following instructions to be skipped until
1229 * past the one with the F_OR bit clear.
1230 */
1231 if (skip_or) { /* skip this instruction */
1232 if ((cmd->len & F_OR) == 0)
1233 skip_or = 0; /* next one is good */
1234 continue;
1235 }
1236 match = 0; /* set to 1 if we succeed */
1237
1238 switch (cmd->opcode) {
1239 /*
1240 * The first set of opcodes compares the packet's
1241 * fields with some pattern, setting 'match' if a
1242 * match is found. At the end of the loop there is
1243 * logic to deal with F_NOT and F_OR flags associated
1244 * with the opcode.
1245 */
1246 case O_NOP:
1247 match = 1;
1248 break;
1249
1250 case O_FORWARD_MAC:
1251 printf("ipfw: opcode %d unimplemented\n",
1252 cmd->opcode);
1253 break;
1254
1255 case O_GID:
1256 case O_UID:
1257 case O_JAIL:
1258 /*
1259 * We only check offset == 0 && proto != 0,
1260 * as this ensures that we have a
1261 * packet with the ports info.
1262 */
1263 if (offset!=0)
1264 break;
1265 if (is_ipv6) /* XXX to be fixed later */
1266 break;
1267 if (proto == IPPROTO_TCP ||
1268 proto == IPPROTO_UDP)
1269 match = check_uidgid(
1270 (ipfw_insn_u32 *)cmd,
1271 proto, oif,
1272 dst_ip, dst_port,
1273 src_ip, src_port, &ucred_lookup,
1274#ifdef __FreeBSD__
1275 &ucred_cache, args->inp);
1276#else
1277 (void *)&ucred_cache,
1278 (struct inpcb *)args->m);
1279#endif
1280 break;
1281
1282 case O_RECV:
1283 match = iface_match(m->m_pkthdr.rcvif,
1284 (ipfw_insn_if *)cmd);
1285 break;
1286
1287 case O_XMIT:
1288 match = iface_match(oif, (ipfw_insn_if *)cmd);
1289 break;
1290
1291 case O_VIA:
1292 match = iface_match(oif ? oif :
1293 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
1294 break;
1295
1296 case O_MACADDR2:
1297 if (args->eh != NULL) { /* have MAC header */
1298 u_int32_t *want = (u_int32_t *)
1299 ((ipfw_insn_mac *)cmd)->addr;
1300 u_int32_t *mask = (u_int32_t *)
1301 ((ipfw_insn_mac *)cmd)->mask;
1302 u_int32_t *hdr = (u_int32_t *)args->eh;
1303
1304 match =
1305 ( want[0] == (hdr[0] & mask[0]) &&
1306 want[1] == (hdr[1] & mask[1]) &&
1307 want[2] == (hdr[2] & mask[2]) );
1308 }
1309 break;
1310
1311 case O_MAC_TYPE:
1312 if (args->eh != NULL) {
1313 u_int16_t *p =
1314 ((ipfw_insn_u16 *)cmd)->ports;
1315 int i;
1316
1317 for (i = cmdlen - 1; !match && i>0;
1318 i--, p += 2)
1319 match = (etype >= p[0] &&
1320 etype <= p[1]);
1321 }
1322 break;
1323
1324 case O_FRAG:
1325 match = (offset != 0);
1326 break;
1327
1328 case O_IN: /* "out" is "not in" */
1329 match = (oif == NULL);
1330 break;
1331
1332 case O_LAYER2:
1333 match = (args->eh != NULL);
1334 break;
1335
1336 case O_DIVERTED:
1337 {
1338 /* For diverted packets, args->rule.info
1339 * contains the divert port (in host format)
1340 * reason and direction.
1341 */
1342 uint32_t i = args->rule.info;
1343 match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
1344 cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
1345 }
1346 break;
1347
1348 case O_PROTO:
1349 /*
1350 * We do not allow an arg of 0 so the
1351 * check of "proto" only suffices.
1352 */
1353 match = (proto == cmd->arg1);
1354 break;
1355
1356 case O_IP_SRC:
1357 match = is_ipv4 &&
1358 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
1359 src_ip.s_addr);
1360 break;
1361
1362 case O_IP_SRC_LOOKUP:
1363 case O_IP_DST_LOOKUP:
1364 if (is_ipv4) {
1365 uint32_t key =
1366 (cmd->opcode == O_IP_DST_LOOKUP) ?
1367 dst_ip.s_addr : src_ip.s_addr;
1368 uint32_t v = 0;
1369
1370 if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
1371 /* generic lookup. The key must be
1372 * in 32bit big-endian format.
1373 */
1374 v = ((ipfw_insn_u32 *)cmd)->d[1];
1375 if (v == 0)
1376 key = dst_ip.s_addr;
1377 else if (v == 1)
1378 key = src_ip.s_addr;
1379 else if (v == 6) /* dscp */
1380 key = (ip->ip_tos >> 2) & 0x3f;
1381 else if (offset != 0)
1382 break;
1383 else if (proto != IPPROTO_TCP &&
1384 proto != IPPROTO_UDP)
1385 break;
1386 else if (v == 2)
1387 key = htonl(dst_port);
1388 else if (v == 3)
1389 key = htonl(src_port);
1390 else if (v == 4 || v == 5) {
1391 check_uidgid(
1392 (ipfw_insn_u32 *)cmd,
1393 proto, oif,
1394 dst_ip, dst_port,
1395 src_ip, src_port, &ucred_lookup,
1396#ifdef __FreeBSD__
1397 &ucred_cache, args->inp);
1398 if (v == 4 /* O_UID */)
1399 key = ucred_cache->cr_uid;
1400 else if (v == 5 /* O_JAIL */)
1401 key = ucred_cache->cr_prison->pr_id;
1402#else /* !__FreeBSD__ */
1403 (void *)&ucred_cache,
1404 (struct inpcb *)args->m);
1405 if (v ==4 /* O_UID */)
1406 key = ucred_cache.uid;
1407 else if (v == 5 /* O_JAIL */)
1408 key = ucred_cache.xid;
1409#endif /* !__FreeBSD__ */
1410 key = htonl(key);
1411 } else
1412 break;
1413 }
1414 match = ipfw_lookup_table(chain,
1415 cmd->arg1, key, &v);
1416 if (!match)
1417 break;
1418 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
1419 match =
1420 ((ipfw_insn_u32 *)cmd)->d[0] == v;
1421 else
1422 tablearg = v;
1423 }
1424 break;
1425
1426 case O_IP_SRC_MASK:
1427 case O_IP_DST_MASK:
1428 if (is_ipv4) {
1429 uint32_t a =
1430 (cmd->opcode == O_IP_DST_MASK) ?
1431 dst_ip.s_addr : src_ip.s_addr;
1432 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
1433 int i = cmdlen-1;
1434
1435 for (; !match && i>0; i-= 2, p+= 2)
1436 match = (p[0] == (a & p[1]));
1437 }
1438 break;
1439
1440 case O_IP_SRC_ME:
1441 if (is_ipv4) {
1442 struct ifnet *tif;
1443
1444 INADDR_TO_IFP(src_ip, tif);
1445 match = (tif != NULL);
1446 break;
1447 }
1448#ifdef INET6
1449 /* FALLTHROUGH */
1450 case O_IP6_SRC_ME:
1451 match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
1452#endif
1453 break;
1454
1455 case O_IP_DST_SET:
1456 case O_IP_SRC_SET:
1457 if (is_ipv4) {
1458 u_int32_t *d = (u_int32_t *)(cmd+1);
1459 u_int32_t addr =
1460 cmd->opcode == O_IP_DST_SET ?
1461 args->f_id.dst_ip :
1462 args->f_id.src_ip;
1463
1464 if (addr < d[0])
1465 break;
1466 addr -= d[0]; /* subtract base */
1467 match = (addr < cmd->arg1) &&
1468 ( d[ 1 + (addr>>5)] &
1469 (1<<(addr & 0x1f)) );
1470 }
1471 break;
1472
1473 case O_IP_DST:
1474 match = is_ipv4 &&
1475 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
1476 dst_ip.s_addr);
1477 break;
1478
1479 case O_IP_DST_ME:
1480 if (is_ipv4) {
1481 struct ifnet *tif;
1482
1483 INADDR_TO_IFP(dst_ip, tif);
1484 match = (tif != NULL);
1485 break;
1486 }
1487#ifdef INET6
1488 /* FALLTHROUGH */
1489 case O_IP6_DST_ME:
1490 match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
1491#endif
1492 break;
1493
1494
1495 case O_IP_SRCPORT:
1496 case O_IP_DSTPORT:
1497 /*
1498 * offset == 0 && proto != 0 is enough
1499 * to guarantee that we have a
1500 * packet with port info.
1501 */
1502 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
1503 && offset == 0) {
1504 u_int16_t x =
1505 (cmd->opcode == O_IP_SRCPORT) ?
1506 src_port : dst_port ;
1507 u_int16_t *p =
1508 ((ipfw_insn_u16 *)cmd)->ports;
1509 int i;
1510
1511 for (i = cmdlen - 1; !match && i>0;
1512 i--, p += 2)
1513 match = (x>=p[0] && x<=p[1]);
1514 }
1515 break;
1516
1517 case O_ICMPTYPE:
1518 match = (offset == 0 && proto==IPPROTO_ICMP &&
1519 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
1520 break;
1521
1522#ifdef INET6
1523 case O_ICMP6TYPE:
1524 match = is_ipv6 && offset == 0 &&
1525 proto==IPPROTO_ICMPV6 &&
1526 icmp6type_match(
1527 ICMP6(ulp)->icmp6_type,
1528 (ipfw_insn_u32 *)cmd);
1529 break;
1530#endif /* INET6 */
1531
1532 case O_IPOPT:
1533 match = (is_ipv4 &&
1534 ipopts_match(ip, cmd) );
1535 break;
1536
1537 case O_IPVER:
1538 match = (is_ipv4 &&
1539 cmd->arg1 == ip->ip_v);
1540 break;
1541
1542 case O_IPID:
1543 case O_IPLEN:
1544 case O_IPTTL:
1545 if (is_ipv4) { /* only for IP packets */
1546 uint16_t x;
1547 uint16_t *p;
1548 int i;
1549
1550 if (cmd->opcode == O_IPLEN)
1551 x = iplen;
1552 else if (cmd->opcode == O_IPTTL)
1553 x = ip->ip_ttl;
1554 else /* must be IPID */
1555 x = ntohs(ip->ip_id);
1556 if (cmdlen == 1) {
1557 match = (cmd->arg1 == x);
1558 break;
1559 }
1560 /* otherwise we have ranges */
1561 p = ((ipfw_insn_u16 *)cmd)->ports;
1562 i = cmdlen - 1;
1563 for (; !match && i>0; i--, p += 2)
1564 match = (x >= p[0] && x <= p[1]);
1565 }
1566 break;
1567
1568 case O_IPPRECEDENCE:
1569 match = (is_ipv4 &&
1570 (cmd->arg1 == (ip->ip_tos & 0xe0)) );
1571 break;
1572
1573 case O_IPTOS:
1574 match = (is_ipv4 &&
1575 flags_match(cmd, ip->ip_tos));
1576 break;
1577
1578 case O_TCPDATALEN:
1579 if (proto == IPPROTO_TCP && offset == 0) {
1580 struct tcphdr *tcp;
1581 uint16_t x;
1582 uint16_t *p;
1583 int i;
1584
1585 tcp = TCP(ulp);
1586 x = iplen -
1587 ((ip->ip_hl + tcp->th_off) << 2);
1588 if (cmdlen == 1) {
1589 match = (cmd->arg1 == x);
1590 break;
1591 }
1592 /* otherwise we have ranges */
1593 p = ((ipfw_insn_u16 *)cmd)->ports;
1594 i = cmdlen - 1;
1595 for (; !match && i>0; i--, p += 2)
1596 match = (x >= p[0] && x <= p[1]);
1597 }
1598 break;
1599
1600 case O_TCPFLAGS:
1601 match = (proto == IPPROTO_TCP && offset == 0 &&
1602 flags_match(cmd, TCP(ulp)->th_flags));
1603 break;
1604
1605 case O_TCPOPTS:
1606 PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
1607 match = (proto == IPPROTO_TCP && offset == 0 &&
1608 tcpopts_match(TCP(ulp), cmd));
1609 break;
1610
1611 case O_TCPSEQ:
1612 match = (proto == IPPROTO_TCP && offset == 0 &&
1613 ((ipfw_insn_u32 *)cmd)->d[0] ==
1614 TCP(ulp)->th_seq);
1615 break;
1616
1617 case O_TCPACK:
1618 match = (proto == IPPROTO_TCP && offset == 0 &&
1619 ((ipfw_insn_u32 *)cmd)->d[0] ==
1620 TCP(ulp)->th_ack);
1621 break;
1622
1623 case O_TCPWIN:
1624 match = (proto == IPPROTO_TCP && offset == 0 &&
1625 cmd->arg1 == TCP(ulp)->th_win);
1626 break;
1627
1628 case O_ESTAB:
1629 /* reject packets which have SYN only */
1630 /* XXX should i also check for TH_ACK ? */
1631 match = (proto == IPPROTO_TCP && offset == 0 &&
1632 (TCP(ulp)->th_flags &
1633 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
1634 break;
1635
1636 case O_ALTQ: {
1637 struct pf_mtag *at;
1638 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
1639
1640 match = 1;
1641 at = pf_find_mtag(m);
1642 if (at != NULL && at->qid != 0)
1643 break;
1644 at = pf_get_mtag(m);
1645 if (at == NULL) {
1646 /*
1647 * Let the packet fall back to the
1648 * default ALTQ.
1649 */
1650 break;
1651 }
1652 at->qid = altq->qid;
1653 if (is_ipv4)
1654 at->af = AF_INET;
1655 else
1656 at->af = AF_LINK;
1657 at->hdr = ip;
1658 break;
1659 }
1660
1661 case O_LOG:
1662 ipfw_log(f, hlen, args, m,
1663 oif, offset, tablearg, ip);
1664 match = 1;
1665 break;
1666
1667 case O_PROB:
1668 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
1669 break;
1670
1671 case O_VERREVPATH:
1672 /* Outgoing packets automatically pass/match */
1673 match = ((oif != NULL) ||
1674 (m->m_pkthdr.rcvif == NULL) ||
1675 (
1676#ifdef INET6
1677 is_ipv6 ?
1678 verify_path6(&(args->f_id.src_ip6),
1679 m->m_pkthdr.rcvif) :
1680#endif
1681 verify_path(src_ip, m->m_pkthdr.rcvif,
1682 args->f_id.fib)));
1683 break;
1684
1685 case O_VERSRCREACH:
1686 /* Outgoing packets automatically pass/match */
1687 match = (hlen > 0 && ((oif != NULL) ||
1688#ifdef INET6
1689 is_ipv6 ?
1690 verify_path6(&(args->f_id.src_ip6),
1691 NULL) :
1692#endif
1693 verify_path(src_ip, NULL, args->f_id.fib)));
1694 break;
1695
1696 case O_ANTISPOOF:
1697 /* Outgoing packets automatically pass/match */
1698 if (oif == NULL && hlen > 0 &&
1699 ( (is_ipv4 && in_localaddr(src_ip))
1700#ifdef INET6
1701 || (is_ipv6 &&
1702 in6_localaddr(&(args->f_id.src_ip6)))
1703#endif
1704 ))
1705 match =
1706#ifdef INET6
1707 is_ipv6 ? verify_path6(
1708 &(args->f_id.src_ip6),
1709 m->m_pkthdr.rcvif) :
1710#endif
1711 verify_path(src_ip,
1712 m->m_pkthdr.rcvif,
1713 args->f_id.fib);
1714 else
1715 match = 1;
1716 break;
1717
1718 case O_IPSEC:
1719#ifdef IPSEC
1720 match = (m_tag_find(m,
1721 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
1722#endif
1723 /* otherwise no match */
1724 break;
1725
1726#ifdef INET6
1727 case O_IP6_SRC:
1728 match = is_ipv6 &&
1729 IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
1730 &((ipfw_insn_ip6 *)cmd)->addr6);
1731 break;
1732
1733 case O_IP6_DST:
1734 match = is_ipv6 &&
1735 IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
1736 &((ipfw_insn_ip6 *)cmd)->addr6);
1737 break;
1738 case O_IP6_SRC_MASK:
1739 case O_IP6_DST_MASK:
1740 if (is_ipv6) {
1741 int i = cmdlen - 1;
1742 struct in6_addr p;
1743 struct in6_addr *d =
1744 &((ipfw_insn_ip6 *)cmd)->addr6;
1745
1746 for (; !match && i > 0; d += 2,
1747 i -= F_INSN_SIZE(struct in6_addr)
1748 * 2) {
1749 p = (cmd->opcode ==
1750 O_IP6_SRC_MASK) ?
1751 args->f_id.src_ip6:
1752 args->f_id.dst_ip6;
1753 APPLY_MASK(&p, &d[1]);
1754 match =
1755 IN6_ARE_ADDR_EQUAL(&d[0],
1756 &p);
1757 }
1758 }
1759 break;
1760
1761 case O_FLOW6ID:
1762 match = is_ipv6 &&
1763 flow6id_match(args->f_id.flow_id6,
1764 (ipfw_insn_u32 *) cmd);
1765 break;
1766
1767 case O_EXT_HDR:
1768 match = is_ipv6 &&
1769 (ext_hd & ((ipfw_insn *) cmd)->arg1);
1770 break;
1771
1772 case O_IP6:
1773 match = is_ipv6;
1774 break;
1775#endif
1776
1777 case O_IP4:
1778 match = is_ipv4;
1779 break;
1780
1781 case O_TAG: {
1782 struct m_tag *mtag;
1783 uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
1784 tablearg : cmd->arg1;
1785
1786 /* Packet is already tagged with this tag? */
1787 mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
1788
1789 /* We have `untag' action when F_NOT flag is
1790 * present. And we must remove this mtag from
1791 * mbuf and reset `match' to zero (`match' will
1792 * be inversed later).
1793 * Otherwise we should allocate new mtag and
1794 * push it into mbuf.
1795 */
1796 if (cmd->len & F_NOT) { /* `untag' action */
1797 if (mtag != NULL)
1798 m_tag_delete(m, mtag);
1799 match = 0;
1800 } else {
1801 if (mtag == NULL) {
1802 mtag = m_tag_alloc( MTAG_IPFW,
1803 tag, 0, M_NOWAIT);
1804 if (mtag != NULL)
1805 m_tag_prepend(m, mtag);
1806 }
1807 match = 1;
1808 }
1809 break;
1810 }
1811
1812 case O_FIB: /* try match the specified fib */
1813 if (args->f_id.fib == cmd->arg1)
1814 match = 1;
1815 break;
1816
1817 case O_SOCKARG: {
1818 struct inpcb *inp = args->inp;
1819 struct inpcbinfo *pi;
1820
1821 if (is_ipv6) /* XXX can we remove this ? */
1822 break;
1823
1824 if (proto == IPPROTO_TCP)
1825 pi = &V_tcbinfo;
1826 else if (proto == IPPROTO_UDP)
1827 pi = &V_udbinfo;
1828 else
1829 break;
1830
1831 /*
1832 * XXXRW: so_user_cookie should almost
1833 * certainly be inp_user_cookie?
1834 */
1835
1836 /* For incomming packet, lookup up the
1837 inpcb using the src/dest ip/port tuple */
1838 if (inp == NULL) {
1839 inp = in_pcblookup(pi,
1840 src_ip, htons(src_port),
1841 dst_ip, htons(dst_port),
1842 INPLOOKUP_RLOCKPCB, NULL);
1843 if (inp != NULL) {
1844 tablearg =
1845 inp->inp_socket->so_user_cookie;
1846 if (tablearg)
1847 match = 1;
1848 INP_RUNLOCK(inp);
1849 }
1850 } else {
1851 if (inp->inp_socket) {
1852 tablearg =
1853 inp->inp_socket->so_user_cookie;
1854 if (tablearg)
1855 match = 1;
1856 }
1857 }
1858 break;
1859 }
1860
1861 case O_TAGGED: {
1862 struct m_tag *mtag;
1863 uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
1864 tablearg : cmd->arg1;
1865
1866 if (cmdlen == 1) {
1867 match = m_tag_locate(m, MTAG_IPFW,
1868 tag, NULL) != NULL;
1869 break;
1870 }
1871
1872 /* we have ranges */
1873 for (mtag = m_tag_first(m);
1874 mtag != NULL && !match;
1875 mtag = m_tag_next(m, mtag)) {
1876 uint16_t *p;
1877 int i;
1878
1879 if (mtag->m_tag_cookie != MTAG_IPFW)
1880 continue;
1881
1882 p = ((ipfw_insn_u16 *)cmd)->ports;
1883 i = cmdlen - 1;
1884 for(; !match && i > 0; i--, p += 2)
1885 match =
1886 mtag->m_tag_id >= p[0] &&
1887 mtag->m_tag_id <= p[1];
1888 }
1889 break;
1890 }
1891
1892 /*
1893 * The second set of opcodes represents 'actions',
1894 * i.e. the terminal part of a rule once the packet
1895 * matches all previous patterns.
1896 * Typically there is only one action for each rule,
1897 * and the opcode is stored at the end of the rule
1898 * (but there are exceptions -- see below).
1899 *
1900 * In general, here we set retval and terminate the
1901 * outer loop (would be a 'break 3' in some language,
1902 * but we need to set l=0, done=1)
1903 *
1904 * Exceptions:
1905 * O_COUNT and O_SKIPTO actions:
1906 * instead of terminating, we jump to the next rule
1907 * (setting l=0), or to the SKIPTO target (setting
1908 * f/f_len, cmd and l as needed), respectively.
1909 *
1910 * O_TAG, O_LOG and O_ALTQ action parameters:
1911 * perform some action and set match = 1;
1912 *
1913 * O_LIMIT and O_KEEP_STATE: these opcodes are
1914 * not real 'actions', and are stored right
1915 * before the 'action' part of the rule.
1916 * These opcodes try to install an entry in the
1917 * state tables; if successful, we continue with
1918 * the next opcode (match=1; break;), otherwise
1919 * the packet must be dropped (set retval,
1920 * break loops with l=0, done=1)
1921 *
1922 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
1923 * cause a lookup of the state table, and a jump
1924 * to the 'action' part of the parent rule
1925 * if an entry is found, or
1926 * (CHECK_STATE only) a jump to the next rule if
1927 * the entry is not found.
1928 * The result of the lookup is cached so that
1929 * further instances of these opcodes become NOPs.
1930 * The jump to the next rule is done by setting
1931 * l=0, cmdlen=0.
1932 */
1933 case O_LIMIT:
1934 case O_KEEP_STATE:
1935 if (ipfw_install_state(f,
1936 (ipfw_insn_limit *)cmd, args, tablearg)) {
1937 /* error or limit violation */
1938 retval = IP_FW_DENY;
1939 l = 0; /* exit inner loop */
1940 done = 1; /* exit outer loop */
1941 }
1942 match = 1;
1943 break;
1944
1945 case O_PROBE_STATE:
1946 case O_CHECK_STATE:
1947 /*
1948 * dynamic rules are checked at the first
1949 * keep-state or check-state occurrence,
1950 * with the result being stored in dyn_dir.
1951 * The compiler introduces a PROBE_STATE
1952 * instruction for us when we have a
1953 * KEEP_STATE (because PROBE_STATE needs
1954 * to be run first).
1955 */
1956 if (dyn_dir == MATCH_UNKNOWN &&
1957 (q = ipfw_lookup_dyn_rule(&args->f_id,
1958 &dyn_dir, proto == IPPROTO_TCP ?
1959 TCP(ulp) : NULL))
1960 != NULL) {
1961 /*
1962 * Found dynamic entry, update stats
1963 * and jump to the 'action' part of
1964 * the parent rule by setting
1965 * f, cmd, l and clearing cmdlen.
1966 */
1967 q->pcnt++;
1968 q->bcnt += pktlen;
1969 /* XXX we would like to have f_pos
1970 * readily accessible in the dynamic
1971 * rule, instead of having to
1972 * lookup q->rule.
1973 */
1974 f = q->rule;
1975 f_pos = ipfw_find_rule(chain,
1976 f->rulenum, f->id);
1977 cmd = ACTION_PTR(f);
1978 l = f->cmd_len - f->act_ofs;
1979 ipfw_dyn_unlock();
1980 cmdlen = 0;
1981 match = 1;
1982 break;
1983 }
1984 /*
1985 * Dynamic entry not found. If CHECK_STATE,
1986 * skip to next rule, if PROBE_STATE just
1987 * ignore and continue with next opcode.
1988 */
1989 if (cmd->opcode == O_CHECK_STATE)
1990 l = 0; /* exit inner loop */
1991 match = 1;
1992 break;
1993
1994 case O_ACCEPT:
1995 retval = 0; /* accept */
1996 l = 0; /* exit inner loop */
1997 done = 1; /* exit outer loop */
1998 break;
1999
2000 case O_PIPE:
2001 case O_QUEUE:
2002 set_match(args, f_pos, chain);
2003 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
2004 tablearg : cmd->arg1;
2005 if (cmd->opcode == O_PIPE)
2006 args->rule.info |= IPFW_IS_PIPE;
2007 if (V_fw_one_pass)
2008 args->rule.info |= IPFW_ONEPASS;
2009 retval = IP_FW_DUMMYNET;
2010 l = 0; /* exit inner loop */
2011 done = 1; /* exit outer loop */
2012 break;
2013
2014 case O_DIVERT:
2015 case O_TEE:
2016 if (args->eh) /* not on layer 2 */
2017 break;
2018 /* otherwise this is terminal */
2019 l = 0; /* exit inner loop */
2020 done = 1; /* exit outer loop */
2021 retval = (cmd->opcode == O_DIVERT) ?
2022 IP_FW_DIVERT : IP_FW_TEE;
2023 set_match(args, f_pos, chain);
2024 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
2025 tablearg : cmd->arg1;
2026 break;
2027
2028 case O_COUNT:
2029 f->pcnt++; /* update stats */
2030 f->bcnt += pktlen;
2031 f->timestamp = time_uptime;
2032 l = 0; /* exit inner loop */
2033 break;
2034
2035 case O_SKIPTO:
2036 f->pcnt++; /* update stats */
2037 f->bcnt += pktlen;
2038 f->timestamp = time_uptime;
2039 /* If possible use cached f_pos (in f->next_rule),
2040 * whose version is written in f->next_rule
2041 * (horrible hacks to avoid changing the ABI).
2042 */
2043 if (cmd->arg1 != IP_FW_TABLEARG &&
2044 (uintptr_t)f->x_next == chain->id) {
2045 f_pos = (uintptr_t)f->next_rule;
2046 } else {
2047 int i = (cmd->arg1 == IP_FW_TABLEARG) ?
2048 tablearg : cmd->arg1;
2049 /* make sure we do not jump backward */
2050 if (i <= f->rulenum)
2051 i = f->rulenum + 1;
2052 f_pos = ipfw_find_rule(chain, i, 0);
2053 /* update the cache */
2054 if (cmd->arg1 != IP_FW_TABLEARG) {
2055 f->next_rule =
2056 (void *)(uintptr_t)f_pos;
2057 f->x_next =
2058 (void *)(uintptr_t)chain->id;
2059 }
2060 }
2061 /*
2062 * Skip disabled rules, and re-enter
2063 * the inner loop with the correct
2064 * f_pos, f, l and cmd.
2065 * Also clear cmdlen and skip_or
2066 */
2067 for (; f_pos < chain->n_rules - 1 &&
2068 (V_set_disable &
2069 (1 << chain->map[f_pos]->set));
2070 f_pos++)
2071 ;
2072 /* Re-enter the inner loop at the skipto rule. */
2073 f = chain->map[f_pos];
2074 l = f->cmd_len;
2075 cmd = f->cmd;
2076 match = 1;
2077 cmdlen = 0;
2078 skip_or = 0;
2079 continue;
2080 break; /* not reached */
2081
2082 case O_REJECT:
2083 /*
2084 * Drop the packet and send a reject notice
2085 * if the packet is not ICMP (or is an ICMP
2086 * query), and it is not multicast/broadcast.
2087 */
2088 if (hlen > 0 && is_ipv4 && offset == 0 &&
2089 (proto != IPPROTO_ICMP ||
2090 is_icmp_query(ICMP(ulp))) &&
2091 !(m->m_flags & (M_BCAST|M_MCAST)) &&
2092 !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
2093 send_reject(args, cmd->arg1, iplen, ip);
2094 m = args->m;
2095 }
2096 /* FALLTHROUGH */
2097#ifdef INET6
2098 case O_UNREACH6:
2099 if (hlen > 0 && is_ipv6 &&
2100 ((offset & IP6F_OFF_MASK) == 0) &&
2101 (proto != IPPROTO_ICMPV6 ||
2102 (is_icmp6_query(icmp6_type) == 1)) &&
2103 !(m->m_flags & (M_BCAST|M_MCAST)) &&
2104 !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
2105 send_reject6(
2106 args, cmd->arg1, hlen,
2107 (struct ip6_hdr *)ip);
2108 m = args->m;
2109 }
2110 /* FALLTHROUGH */
2111#endif
2112 case O_DENY:
2113 retval = IP_FW_DENY;
2114 l = 0; /* exit inner loop */
2115 done = 1; /* exit outer loop */
2116 break;
2117
2118 case O_FORWARD_IP:
2119 if (args->eh) /* not valid on layer2 pkts */
2120 break;
2121 if (q == NULL || q->rule != f ||
2122 dyn_dir == MATCH_FORWARD) {
2123 struct sockaddr_in *sa;
2124 sa = &(((ipfw_insn_sa *)cmd)->sa);
2125 if (sa->sin_addr.s_addr == INADDR_ANY) {
2126 bcopy(sa, &args->hopstore,
2127 sizeof(*sa));
2128 args->hopstore.sin_addr.s_addr =
2129 htonl(tablearg);
2130 args->next_hop = &args->hopstore;
2131 } else {
2132 args->next_hop = sa;
2133 }
2134 }
2135 retval = IP_FW_PASS;
2136 l = 0; /* exit inner loop */
2137 done = 1; /* exit outer loop */
2138 break;
2139
2140 case O_NETGRAPH:
2141 case O_NGTEE:
2142 set_match(args, f_pos, chain);
2143 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
2144 tablearg : cmd->arg1;
2145 if (V_fw_one_pass)
2146 args->rule.info |= IPFW_ONEPASS;
2147 retval = (cmd->opcode == O_NETGRAPH) ?
2148 IP_FW_NETGRAPH : IP_FW_NGTEE;
2149 l = 0; /* exit inner loop */
2150 done = 1; /* exit outer loop */
2151 break;
2152
2153 case O_SETFIB: {
2154 uint32_t fib;
2155
2156 f->pcnt++; /* update stats */
2157 f->bcnt += pktlen;
2158 f->timestamp = time_uptime;
2159 fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg:
2160 cmd->arg1;
2161 if (fib >= rt_numfibs)
2162 fib = 0;
2163 M_SETFIB(m, fib);
2164 args->f_id.fib = fib;
2165 l = 0; /* exit inner loop */
2166 break;
2167 }
2168
2169 case O_NAT:
2170 if (!IPFW_NAT_LOADED) {
2171 retval = IP_FW_DENY;
2172 } else {
2173 struct cfg_nat *t;
2174 int nat_id;
2175
2176 set_match(args, f_pos, chain);
2177 t = ((ipfw_insn_nat *)cmd)->nat;
2178 if (t == NULL) {
2179 nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
2180 tablearg : cmd->arg1;
2181 t = (*lookup_nat_ptr)(&chain->nat, nat_id);
2182
2183 if (t == NULL) {
2184 retval = IP_FW_DENY;
2185 l = 0; /* exit inner loop */
2186 done = 1; /* exit outer loop */
2187 break;
2188 }
2189 if (cmd->arg1 != IP_FW_TABLEARG)
2190 ((ipfw_insn_nat *)cmd)->nat = t;
2191 }
2192 retval = ipfw_nat_ptr(args, t, m);
2193 }
2194 l = 0; /* exit inner loop */
2195 done = 1; /* exit outer loop */
2196 break;
2197
2198 case O_REASS: {
2199 int ip_off;
2200
2201 f->pcnt++;
2202 f->bcnt += pktlen;
2203 l = 0; /* in any case exit inner loop */
2204 ip_off = ntohs(ip->ip_off);
2205
2206 /* if not fragmented, go to next rule */
2207 if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
2208 break;
2209 /*
2210 * ip_reass() expects len & off in host
2211 * byte order.
2212 */
2213 SET_HOST_IPLEN(ip);
2214
2215 args->m = m = ip_reass(m);
2216
2217 /*
2218 * do IP header checksum fixup.
2219 */
2220 if (m == NULL) { /* fragment got swallowed */
2221 retval = IP_FW_DENY;
2222 } else { /* good, packet complete */
2223 int hlen;
2224
2225 ip = mtod(m, struct ip *);
2226 hlen = ip->ip_hl << 2;
2227 SET_NET_IPLEN(ip);
2228 ip->ip_sum = 0;
2229 if (hlen == sizeof(struct ip))
2230 ip->ip_sum = in_cksum_hdr(ip);
2231 else
2232 ip->ip_sum = in_cksum(m, hlen);
2233 retval = IP_FW_REASS;
2234 set_match(args, f_pos, chain);
2235 }
2236 done = 1; /* exit outer loop */
2237 break;
2238 }
2239
2240 default:
2241 panic("-- unknown opcode %d\n", cmd->opcode);
2242 } /* end of switch() on opcodes */
2243 /*
2244 * if we get here with l=0, then match is irrelevant.
2245 */
2246
2247 if (cmd->len & F_NOT)
2248 match = !match;
2249
2250 if (match) {
2251 if (cmd->len & F_OR)
2252 skip_or = 1;
2253 } else {
2254 if (!(cmd->len & F_OR)) /* not an OR block, */
2255 break; /* try next rule */
2256 }
2257
2258 } /* end of inner loop, scan opcodes */
2259#undef PULLUP_LEN
2260
2261 if (done)
2262 break;
2263
2264/* next_rule:; */ /* try next rule */
2265
2266 } /* end of outer for, scan rules */
2267
2268 if (done) {
2269 struct ip_fw *rule = chain->map[f_pos];
2270 /* Update statistics */
2271 rule->pcnt++;
2272 rule->bcnt += pktlen;
2273 rule->timestamp = time_uptime;
2274 } else {
2275 retval = IP_FW_DENY;
2276 printf("ipfw: ouch!, skip past end of rules, denying packet\n");
2277 }
2278 IPFW_RUNLOCK(chain);
2279#ifdef __FreeBSD__
2280 if (ucred_cache != NULL)
2281 crfree(ucred_cache);
2282#endif
2283 return (retval);
2284
2285pullup_failed:
2286 if (V_fw_verbose)
2287 printf("ipfw: pullup failed\n");
2288 return (IP_FW_DENY);
2289}
2290
2291/*
2292 * Module and VNET glue
2293 */
2294
2295/*
2296 * Stuff that must be initialised only on boot or module load
2297 */
2298static int
2299ipfw_init(void)
2300{
2301 int error = 0;
2302
2303 ipfw_dyn_attach();
2304 /*
2305 * Only print out this stuff the first time around,
2306 * when called from the sysinit code.
2307 */
2308 printf("ipfw2 "
2309#ifdef INET6
2310 "(+ipv6) "
2311#endif
2312 "initialized, divert %s, nat %s, "
2313 "rule-based forwarding "
2314#ifdef IPFIREWALL_FORWARD
2315 "enabled, "
2316#else
2317 "disabled, "
2318#endif
2319 "default to %s, logging ",
2320#ifdef IPDIVERT
2321 "enabled",
2322#else
2323 "loadable",
2324#endif
2325#ifdef IPFIREWALL_NAT
2326 "enabled",
2327#else
2328 "loadable",
2329#endif
2330 default_to_accept ? "accept" : "deny");
2331
2332 /*
2333 * Note: V_xxx variables can be accessed here but the vnet specific
2334 * initializer may not have been called yet for the VIMAGE case.
2335 * Tuneables will have been processed. We will print out values for
2336 * the default vnet.
2337 * XXX This should all be rationalized AFTER 8.0
2338 */
2339 if (V_fw_verbose == 0)
2340 printf("disabled\n");
2341 else if (V_verbose_limit == 0)
2342 printf("unlimited\n");
2343 else
2344 printf("limited to %d packets/entry by default\n",
2345 V_verbose_limit);
2346
2347 ipfw_log_bpf(1); /* init */
2348 return (error);
2349}
2350
2351/*
2352 * Called for the removal of the last instance only on module unload.
2353 */
2354static void
2355ipfw_destroy(void)
2356{
2357
2358 ipfw_log_bpf(0); /* uninit */
2359 ipfw_dyn_detach();
2360 printf("IP firewall unloaded\n");
2361}
2362
2363/*
2364 * Stuff that must be initialized for every instance
2365 * (including the first of course).
2366 */
2367static int
2368vnet_ipfw_init(const void *unused)
2369{
2370 int error;
2371 struct ip_fw *rule = NULL;
2372 struct ip_fw_chain *chain;
2373
2374 chain = &V_layer3_chain;
2375
2376 /* First set up some values that are compile time options */
2377 V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
2378 V_fw_deny_unknown_exthdrs = 1;
2379#ifdef IPFIREWALL_VERBOSE
2380 V_fw_verbose = 1;
2381#endif
2382#ifdef IPFIREWALL_VERBOSE_LIMIT
2383 V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
2384#endif
2385#ifdef IPFIREWALL_NAT
2386 LIST_INIT(&chain->nat);
2387#endif
2388
2389 /* insert the default rule and create the initial map */
2390 chain->n_rules = 1;
2391 chain->static_len = sizeof(struct ip_fw);
2392 chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
2393 if (chain->map)
2394 rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
2395 if (rule == NULL) {
2396 if (chain->map)
2397 free(chain->map, M_IPFW);
2398 printf("ipfw2: ENOSPC initializing default rule "
2399 "(support disabled)\n");
2400 return (ENOSPC);
2401 }
2402 error = ipfw_init_tables(chain);
2403 if (error) {
2404 panic("init_tables"); /* XXX Marko fix this ! */
2405 }
2406
2407 /* fill and insert the default rule */
2408 rule->act_ofs = 0;
2409 rule->rulenum = IPFW_DEFAULT_RULE;
2410 rule->cmd_len = 1;
2411 rule->set = RESVD_SET;
2412 rule->cmd[0].len = 1;
2413 rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
2414 chain->rules = chain->default_rule = chain->map[0] = rule;
2415 chain->id = rule->id = 1;
2416
2417 IPFW_LOCK_INIT(chain);
2418 ipfw_dyn_init();
2419
2420 /* First set up some values that are compile time options */
2421 V_ipfw_vnet_ready = 1; /* Open for business */
2422
2423 /*
2424 * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
2425 * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
2426 * we still keep the module alive because the sockopt and
2427 * layer2 paths are still useful.
2428 * ipfw[6]_hook return 0 on success, ENOENT on failure,
2429 * so we can ignore the exact return value and just set a flag.
2430 *
2431 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
2432 * changes in the underlying (per-vnet) variables trigger
2433 * immediate hook()/unhook() calls.
2434 * In layer2 we have the same behaviour, except that V_ether_ipfw
2435 * is checked on each packet because there are no pfil hooks.
2436 */
2437 V_ip_fw_ctl_ptr = ipfw_ctl;
2438 V_ip_fw_chk_ptr = ipfw_chk;
2439 error = ipfw_attach_hooks(1);
2440 return (error);
2441}
2442
2443/*
2444 * Called for the removal of each instance.
2445 */
2446static int
2447vnet_ipfw_uninit(const void *unused)
2448{
2449 struct ip_fw *reap, *rule;
2450 struct ip_fw_chain *chain = &V_layer3_chain;
2451 int i;
2452
2453 V_ipfw_vnet_ready = 0; /* tell new callers to go away */
2454 /*
2455 * disconnect from ipv4, ipv6, layer2 and sockopt.
2456 * Then grab, release and grab again the WLOCK so we make
2457 * sure the update is propagated and nobody will be in.
2458 */
2459 (void)ipfw_attach_hooks(0 /* detach */);
2460 V_ip_fw_chk_ptr = NULL;
2461 V_ip_fw_ctl_ptr = NULL;
2462 IPFW_UH_WLOCK(chain);
2463 IPFW_UH_WUNLOCK(chain);
2464 IPFW_UH_WLOCK(chain);
2465
2466 IPFW_WLOCK(chain);
2467 IPFW_WUNLOCK(chain);
2468 IPFW_WLOCK(chain);
2469
2470 ipfw_dyn_uninit(0); /* run the callout_drain */
2471 ipfw_destroy_tables(chain);
2472 reap = NULL;
2473 for (i = 0; i < chain->n_rules; i++) {
2474 rule = chain->map[i];
2475 rule->x_next = reap;
2476 reap = rule;
2477 }
2478 if (chain->map)
2479 free(chain->map, M_IPFW);
2480 IPFW_WUNLOCK(chain);
2481 IPFW_UH_WUNLOCK(chain);
2482 if (reap != NULL)
2483 ipfw_reap_rules(reap);
2484 IPFW_LOCK_DESTROY(chain);
2485 ipfw_dyn_uninit(1); /* free the remaining parts */
2486 return 0;
2487}
2488
2489/*
2490 * Module event handler.
2491 * In general we have the choice of handling most of these events by the
2492 * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
2493 * use the SYSINIT handlers as they are more capable of expressing the
2494 * flow of control during module and vnet operations, so this is just
2495 * a skeleton. Note there is no SYSINIT equivalent of the module
2496 * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
2497 */
2498static int
2499ipfw_modevent(module_t mod, int type, void *unused)
2500{
2501 int err = 0;
2502
2503 switch (type) {
2504 case MOD_LOAD:
2505 /* Called once at module load or
2506 * system boot if compiled in. */
2507 break;
2508 case MOD_QUIESCE:
2509 /* Called before unload. May veto unloading. */
2510 break;
2511 case MOD_UNLOAD:
2512 /* Called during unload. */
2513 break;
2514 case MOD_SHUTDOWN:
2515 /* Called during system shutdown. */
2516 break;
2517 default:
2518 err = EOPNOTSUPP;
2519 break;
2520 }
2521 return err;
2522}
2523
2524static moduledata_t ipfwmod = {
2525 "ipfw",
2526 ipfw_modevent,
2527 0
2528};
2529
2530/* Define startup order. */
2531#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
2532#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */
2533#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */
2534#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */
2535
2536DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
2537MODULE_VERSION(ipfw, 2);
2538/* should declare some dependencies here */
2539
2540/*
2541 * Starting up. Done in order after ipfwmod() has been called.
2542 * VNET_SYSINIT is also called for each existing vnet and each new vnet.
2543 */
2544SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
2545 ipfw_init, NULL);
2546VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
2547 vnet_ipfw_init, NULL);
2548
2549/*
2550 * Closing up shop. These are done in REVERSE ORDER, but still
2551 * after ipfwmod() has been called. Not called on reboot.
2552 * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
2553 * or when the module is unloaded.
2554 */
2555SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
2556 ipfw_destroy, NULL);
2557VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
2558 vnet_ipfw_uninit, NULL);
2559/* end of file */
699 pcb = (oif) ?
700 in_pcblookup(pi,
701 dst_ip, htons(dst_port),
702 src_ip, htons(src_port),
703 lookupflags, oif) :
704 in_pcblookup(pi,
705 src_ip, htons(src_port),
706 dst_ip, htons(dst_port),
707 lookupflags, NULL);
708 if (pcb != NULL) {
709 INP_RLOCK_ASSERT(pcb);
710 *uc = crhold(pcb->inp_cred);
711 *ugid_lookupp = 1;
712 INP_RUNLOCK(pcb);
713 }
714 if (*ugid_lookupp == 0) {
715 /*
716 * We tried and failed, set the variable to -1
717 * so we will not try again on this packet.
718 */
719 *ugid_lookupp = -1;
720 return (0);
721 }
722 }
723 if (insn->o.opcode == O_UID)
724 match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
725 else if (insn->o.opcode == O_GID)
726 match = groupmember((gid_t)insn->d[0], *uc);
727 else if (insn->o.opcode == O_JAIL)
728 match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
729 return match;
730#endif /* __FreeBSD__ */
731}
732
733/*
734 * Helper function to set args with info on the rule after the matching
735 * one. slot is precise, whereas we guess rule_id as they are
736 * assigned sequentially.
737 */
738static inline void
739set_match(struct ip_fw_args *args, int slot,
740 struct ip_fw_chain *chain)
741{
742 args->rule.chain_id = chain->id;
743 args->rule.slot = slot + 1; /* we use 0 as a marker */
744 args->rule.rule_id = 1 + chain->map[slot]->id;
745 args->rule.rulenum = chain->map[slot]->rulenum;
746}
747
748/*
749 * The main check routine for the firewall.
750 *
751 * All arguments are in args so we can modify them and return them
752 * back to the caller.
753 *
754 * Parameters:
755 *
756 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
757 * Starts with the IP header.
758 * args->eh (in) Mac header if present, NULL for layer3 packet.
759 * args->L3offset Number of bytes bypassed if we came from L2.
760 * e.g. often sizeof(eh) ** NOTYET **
761 * args->oif Outgoing interface, NULL if packet is incoming.
762 * The incoming interface is in the mbuf. (in)
763 * args->divert_rule (in/out)
764 * Skip up to the first rule past this rule number;
765 * upon return, non-zero port number for divert or tee.
766 *
767 * args->rule Pointer to the last matching rule (in/out)
768 * args->next_hop Socket we are forwarding to (out).
769 * args->f_id Addresses grabbed from the packet (out)
770 * args->rule.info a cookie depending on rule action
771 *
772 * Return value:
773 *
774 * IP_FW_PASS the packet must be accepted
775 * IP_FW_DENY the packet must be dropped
776 * IP_FW_DIVERT divert packet, port in m_tag
777 * IP_FW_TEE tee packet, port in m_tag
778 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie
779 * IP_FW_NETGRAPH into netgraph, cookie args->cookie
780 * args->rule contains the matching rule,
781 * args->rule.info has additional information.
782 *
783 */
784int
785ipfw_chk(struct ip_fw_args *args)
786{
787
788 /*
789 * Local variables holding state while processing a packet:
790 *
791 * IMPORTANT NOTE: to speed up the processing of rules, there
792 * are some assumption on the values of the variables, which
793 * are documented here. Should you change them, please check
794 * the implementation of the various instructions to make sure
795 * that they still work.
796 *
797 * args->eh The MAC header. It is non-null for a layer2
798 * packet, it is NULL for a layer-3 packet.
799 * **notyet**
800 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
801 *
802 * m | args->m Pointer to the mbuf, as received from the caller.
803 * It may change if ipfw_chk() does an m_pullup, or if it
804 * consumes the packet because it calls send_reject().
805 * XXX This has to change, so that ipfw_chk() never modifies
806 * or consumes the buffer.
807 * ip is the beginning of the ip(4 or 6) header.
808 * Calculated by adding the L3offset to the start of data.
809 * (Until we start using L3offset, the packet is
810 * supposed to start with the ip header).
811 */
812 struct mbuf *m = args->m;
813 struct ip *ip = mtod(m, struct ip *);
814
815 /*
816 * For rules which contain uid/gid or jail constraints, cache
817 * a copy of the users credentials after the pcb lookup has been
818 * executed. This will speed up the processing of rules with
819 * these types of constraints, as well as decrease contention
820 * on pcb related locks.
821 */
822#ifndef __FreeBSD__
823 struct bsd_ucred ucred_cache;
824#else
825 struct ucred *ucred_cache = NULL;
826#endif
827 int ucred_lookup = 0;
828
829 /*
830 * oif | args->oif If NULL, ipfw_chk has been called on the
831 * inbound path (ether_input, ip_input).
832 * If non-NULL, ipfw_chk has been called on the outbound path
833 * (ether_output, ip_output).
834 */
835 struct ifnet *oif = args->oif;
836
837 int f_pos = 0; /* index of current rule in the array */
838 int retval = 0;
839
840 /*
841 * hlen The length of the IP header.
842 */
843 u_int hlen = 0; /* hlen >0 means we have an IP pkt */
844
845 /*
846 * offset The offset of a fragment. offset != 0 means that
847 * we have a fragment at this offset of an IPv4 packet.
848 * offset == 0 means that (if this is an IPv4 packet)
849 * this is the first or only fragment.
850 * For IPv6 offset == 0 means there is no Fragment Header.
851 * If offset != 0 for IPv6 always use correct mask to
852 * get the correct offset because we add IP6F_MORE_FRAG
853 * to be able to dectect the first fragment which would
854 * otherwise have offset = 0.
855 */
856 u_short offset = 0;
857
858 /*
859 * Local copies of addresses. They are only valid if we have
860 * an IP packet.
861 *
862 * proto The protocol. Set to 0 for non-ip packets,
863 * or to the protocol read from the packet otherwise.
864 * proto != 0 means that we have an IPv4 packet.
865 *
866 * src_port, dst_port port numbers, in HOST format. Only
867 * valid for TCP and UDP packets.
868 *
869 * src_ip, dst_ip ip addresses, in NETWORK format.
870 * Only valid for IPv4 packets.
871 */
872 uint8_t proto;
873 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
874 struct in_addr src_ip, dst_ip; /* NOTE: network format */
875 uint16_t iplen=0;
876 int pktlen;
877 uint16_t etype = 0; /* Host order stored ether type */
878
879 /*
880 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
881 * MATCH_NONE when checked and not matched (q = NULL),
882 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
883 */
884 int dyn_dir = MATCH_UNKNOWN;
885 ipfw_dyn_rule *q = NULL;
886 struct ip_fw_chain *chain = &V_layer3_chain;
887
888 /*
889 * We store in ulp a pointer to the upper layer protocol header.
890 * In the ipv4 case this is easy to determine from the header,
891 * but for ipv6 we might have some additional headers in the middle.
892 * ulp is NULL if not found.
893 */
894 void *ulp = NULL; /* upper layer protocol pointer. */
895
896 /* XXX ipv6 variables */
897 int is_ipv6 = 0;
898 uint8_t icmp6_type = 0;
899 uint16_t ext_hd = 0; /* bits vector for extension header filtering */
900 /* end of ipv6 variables */
901
902 int is_ipv4 = 0;
903
904 int done = 0; /* flag to exit the outer loop */
905
906 if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
907 return (IP_FW_PASS); /* accept */
908
909 dst_ip.s_addr = 0; /* make sure it is initialized */
910 src_ip.s_addr = 0; /* make sure it is initialized */
911 pktlen = m->m_pkthdr.len;
912 args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
913 proto = args->f_id.proto = 0; /* mark f_id invalid */
914 /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
915
916/*
917 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
918 * then it sets p to point at the offset "len" in the mbuf. WARNING: the
919 * pointer might become stale after other pullups (but we never use it
920 * this way).
921 */
922#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T))
923#define PULLUP_LEN(_len, p, T) \
924do { \
925 int x = (_len) + T; \
926 if ((m)->m_len < x) { \
927 args->m = m = m_pullup(m, x); \
928 if (m == NULL) \
929 goto pullup_failed; \
930 } \
931 p = (mtod(m, char *) + (_len)); \
932} while (0)
933
934 /*
935 * if we have an ether header,
936 */
937 if (args->eh)
938 etype = ntohs(args->eh->ether_type);
939
940 /* Identify IP packets and fill up variables. */
941 if (pktlen >= sizeof(struct ip6_hdr) &&
942 (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
943 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
944 is_ipv6 = 1;
945 args->f_id.addr_type = 6;
946 hlen = sizeof(struct ip6_hdr);
947 proto = ip6->ip6_nxt;
948
949 /* Search extension headers to find upper layer protocols */
950 while (ulp == NULL) {
951 switch (proto) {
952 case IPPROTO_ICMPV6:
953 PULLUP_TO(hlen, ulp, struct icmp6_hdr);
954 icmp6_type = ICMP6(ulp)->icmp6_type;
955 break;
956
957 case IPPROTO_TCP:
958 PULLUP_TO(hlen, ulp, struct tcphdr);
959 dst_port = TCP(ulp)->th_dport;
960 src_port = TCP(ulp)->th_sport;
961 /* save flags for dynamic rules */
962 args->f_id._flags = TCP(ulp)->th_flags;
963 break;
964
965 case IPPROTO_SCTP:
966 PULLUP_TO(hlen, ulp, struct sctphdr);
967 src_port = SCTP(ulp)->src_port;
968 dst_port = SCTP(ulp)->dest_port;
969 break;
970
971 case IPPROTO_UDP:
972 PULLUP_TO(hlen, ulp, struct udphdr);
973 dst_port = UDP(ulp)->uh_dport;
974 src_port = UDP(ulp)->uh_sport;
975 break;
976
977 case IPPROTO_HOPOPTS: /* RFC 2460 */
978 PULLUP_TO(hlen, ulp, struct ip6_hbh);
979 ext_hd |= EXT_HOPOPTS;
980 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
981 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
982 ulp = NULL;
983 break;
984
985 case IPPROTO_ROUTING: /* RFC 2460 */
986 PULLUP_TO(hlen, ulp, struct ip6_rthdr);
987 switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
988 case 0:
989 ext_hd |= EXT_RTHDR0;
990 break;
991 case 2:
992 ext_hd |= EXT_RTHDR2;
993 break;
994 default:
995 printf("IPFW2: IPV6 - Unknown Routing "
996 "Header type(%d)\n",
997 ((struct ip6_rthdr *)ulp)->ip6r_type);
998 if (V_fw_deny_unknown_exthdrs)
999 return (IP_FW_DENY);
1000 break;
1001 }
1002 ext_hd |= EXT_ROUTING;
1003 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
1004 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
1005 ulp = NULL;
1006 break;
1007
1008 case IPPROTO_FRAGMENT: /* RFC 2460 */
1009 PULLUP_TO(hlen, ulp, struct ip6_frag);
1010 ext_hd |= EXT_FRAGMENT;
1011 hlen += sizeof (struct ip6_frag);
1012 proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
1013 offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
1014 IP6F_OFF_MASK;
1015 /* Add IP6F_MORE_FRAG for offset of first
1016 * fragment to be != 0. */
1017 offset |= ((struct ip6_frag *)ulp)->ip6f_offlg &
1018 IP6F_MORE_FRAG;
1019 if (offset == 0) {
1020 printf("IPFW2: IPV6 - Invalid Fragment "
1021 "Header\n");
1022 if (V_fw_deny_unknown_exthdrs)
1023 return (IP_FW_DENY);
1024 break;
1025 }
1026 args->f_id.extra =
1027 ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
1028 ulp = NULL;
1029 break;
1030
1031 case IPPROTO_DSTOPTS: /* RFC 2460 */
1032 PULLUP_TO(hlen, ulp, struct ip6_hbh);
1033 ext_hd |= EXT_DSTOPTS;
1034 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
1035 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
1036 ulp = NULL;
1037 break;
1038
1039 case IPPROTO_AH: /* RFC 2402 */
1040 PULLUP_TO(hlen, ulp, struct ip6_ext);
1041 ext_hd |= EXT_AH;
1042 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
1043 proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
1044 ulp = NULL;
1045 break;
1046
1047 case IPPROTO_ESP: /* RFC 2406 */
1048 PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */
1049 /* Anything past Seq# is variable length and
1050 * data past this ext. header is encrypted. */
1051 ext_hd |= EXT_ESP;
1052 break;
1053
1054 case IPPROTO_NONE: /* RFC 2460 */
1055 /*
1056 * Packet ends here, and IPv6 header has
1057 * already been pulled up. If ip6e_len!=0
1058 * then octets must be ignored.
1059 */
1060 ulp = ip; /* non-NULL to get out of loop. */
1061 break;
1062
1063 case IPPROTO_OSPFIGP:
1064 /* XXX OSPF header check? */
1065 PULLUP_TO(hlen, ulp, struct ip6_ext);
1066 break;
1067
1068 case IPPROTO_PIM:
1069 /* XXX PIM header check? */
1070 PULLUP_TO(hlen, ulp, struct pim);
1071 break;
1072
1073 case IPPROTO_CARP:
1074 PULLUP_TO(hlen, ulp, struct carp_header);
1075 if (((struct carp_header *)ulp)->carp_version !=
1076 CARP_VERSION)
1077 return (IP_FW_DENY);
1078 if (((struct carp_header *)ulp)->carp_type !=
1079 CARP_ADVERTISEMENT)
1080 return (IP_FW_DENY);
1081 break;
1082
1083 case IPPROTO_IPV6: /* RFC 2893 */
1084 PULLUP_TO(hlen, ulp, struct ip6_hdr);
1085 break;
1086
1087 case IPPROTO_IPV4: /* RFC 2893 */
1088 PULLUP_TO(hlen, ulp, struct ip);
1089 break;
1090
1091 default:
1092 printf("IPFW2: IPV6 - Unknown Extension "
1093 "Header(%d), ext_hd=%x\n", proto, ext_hd);
1094 if (V_fw_deny_unknown_exthdrs)
1095 return (IP_FW_DENY);
1096 PULLUP_TO(hlen, ulp, struct ip6_ext);
1097 break;
1098 } /*switch */
1099 }
1100 ip = mtod(m, struct ip *);
1101 ip6 = (struct ip6_hdr *)ip;
1102 args->f_id.src_ip6 = ip6->ip6_src;
1103 args->f_id.dst_ip6 = ip6->ip6_dst;
1104 args->f_id.src_ip = 0;
1105 args->f_id.dst_ip = 0;
1106 args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
1107 } else if (pktlen >= sizeof(struct ip) &&
1108 (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
1109 is_ipv4 = 1;
1110 hlen = ip->ip_hl << 2;
1111 args->f_id.addr_type = 4;
1112
1113 /*
1114 * Collect parameters into local variables for faster matching.
1115 */
1116 proto = ip->ip_p;
1117 src_ip = ip->ip_src;
1118 dst_ip = ip->ip_dst;
1119 offset = ntohs(ip->ip_off) & IP_OFFMASK;
1120 iplen = ntohs(ip->ip_len);
1121 pktlen = iplen < pktlen ? iplen : pktlen;
1122
1123 if (offset == 0) {
1124 switch (proto) {
1125 case IPPROTO_TCP:
1126 PULLUP_TO(hlen, ulp, struct tcphdr);
1127 dst_port = TCP(ulp)->th_dport;
1128 src_port = TCP(ulp)->th_sport;
1129 /* save flags for dynamic rules */
1130 args->f_id._flags = TCP(ulp)->th_flags;
1131 break;
1132
1133 case IPPROTO_SCTP:
1134 PULLUP_TO(hlen, ulp, struct sctphdr);
1135 src_port = SCTP(ulp)->src_port;
1136 dst_port = SCTP(ulp)->dest_port;
1137 break;
1138
1139 case IPPROTO_UDP:
1140 PULLUP_TO(hlen, ulp, struct udphdr);
1141 dst_port = UDP(ulp)->uh_dport;
1142 src_port = UDP(ulp)->uh_sport;
1143 break;
1144
1145 case IPPROTO_ICMP:
1146 PULLUP_TO(hlen, ulp, struct icmphdr);
1147 //args->f_id.flags = ICMP(ulp)->icmp_type;
1148 break;
1149
1150 default:
1151 break;
1152 }
1153 }
1154
1155 ip = mtod(m, struct ip *);
1156 args->f_id.src_ip = ntohl(src_ip.s_addr);
1157 args->f_id.dst_ip = ntohl(dst_ip.s_addr);
1158 }
1159#undef PULLUP_TO
1160 if (proto) { /* we may have port numbers, store them */
1161 args->f_id.proto = proto;
1162 args->f_id.src_port = src_port = ntohs(src_port);
1163 args->f_id.dst_port = dst_port = ntohs(dst_port);
1164 }
1165
1166 IPFW_RLOCK(chain);
1167 if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
1168 IPFW_RUNLOCK(chain);
1169 return (IP_FW_PASS); /* accept */
1170 }
1171 if (args->rule.slot) {
1172 /*
1173 * Packet has already been tagged as a result of a previous
1174 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
1175 * REASS, NETGRAPH, DIVERT/TEE...)
1176 * Validate the slot and continue from the next one
1177 * if still present, otherwise do a lookup.
1178 */
1179 f_pos = (args->rule.chain_id == chain->id) ?
1180 args->rule.slot :
1181 ipfw_find_rule(chain, args->rule.rulenum,
1182 args->rule.rule_id);
1183 } else {
1184 f_pos = 0;
1185 }
1186
1187 /*
1188 * Now scan the rules, and parse microinstructions for each rule.
1189 * We have two nested loops and an inner switch. Sometimes we
1190 * need to break out of one or both loops, or re-enter one of
1191 * the loops with updated variables. Loop variables are:
1192 *
1193 * f_pos (outer loop) points to the current rule.
1194 * On output it points to the matching rule.
1195 * done (outer loop) is used as a flag to break the loop.
1196 * l (inner loop) residual length of current rule.
1197 * cmd points to the current microinstruction.
1198 *
1199 * We break the inner loop by setting l=0 and possibly
1200 * cmdlen=0 if we don't want to advance cmd.
1201 * We break the outer loop by setting done=1
1202 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
1203 * as needed.
1204 */
1205 for (; f_pos < chain->n_rules; f_pos++) {
1206 ipfw_insn *cmd;
1207 uint32_t tablearg = 0;
1208 int l, cmdlen, skip_or; /* skip rest of OR block */
1209 struct ip_fw *f;
1210
1211 f = chain->map[f_pos];
1212 if (V_set_disable & (1 << f->set) )
1213 continue;
1214
1215 skip_or = 0;
1216 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
1217 l -= cmdlen, cmd += cmdlen) {
1218 int match;
1219
1220 /*
1221 * check_body is a jump target used when we find a
1222 * CHECK_STATE, and need to jump to the body of
1223 * the target rule.
1224 */
1225
1226/* check_body: */
1227 cmdlen = F_LEN(cmd);
1228 /*
1229 * An OR block (insn_1 || .. || insn_n) has the
1230 * F_OR bit set in all but the last instruction.
1231 * The first match will set "skip_or", and cause
1232 * the following instructions to be skipped until
1233 * past the one with the F_OR bit clear.
1234 */
1235 if (skip_or) { /* skip this instruction */
1236 if ((cmd->len & F_OR) == 0)
1237 skip_or = 0; /* next one is good */
1238 continue;
1239 }
1240 match = 0; /* set to 1 if we succeed */
1241
1242 switch (cmd->opcode) {
1243 /*
1244 * The first set of opcodes compares the packet's
1245 * fields with some pattern, setting 'match' if a
1246 * match is found. At the end of the loop there is
1247 * logic to deal with F_NOT and F_OR flags associated
1248 * with the opcode.
1249 */
1250 case O_NOP:
1251 match = 1;
1252 break;
1253
1254 case O_FORWARD_MAC:
1255 printf("ipfw: opcode %d unimplemented\n",
1256 cmd->opcode);
1257 break;
1258
1259 case O_GID:
1260 case O_UID:
1261 case O_JAIL:
1262 /*
1263 * We only check offset == 0 && proto != 0,
1264 * as this ensures that we have a
1265 * packet with the ports info.
1266 */
1267 if (offset!=0)
1268 break;
1269 if (is_ipv6) /* XXX to be fixed later */
1270 break;
1271 if (proto == IPPROTO_TCP ||
1272 proto == IPPROTO_UDP)
1273 match = check_uidgid(
1274 (ipfw_insn_u32 *)cmd,
1275 proto, oif,
1276 dst_ip, dst_port,
1277 src_ip, src_port, &ucred_lookup,
1278#ifdef __FreeBSD__
1279 &ucred_cache, args->inp);
1280#else
1281 (void *)&ucred_cache,
1282 (struct inpcb *)args->m);
1283#endif
1284 break;
1285
1286 case O_RECV:
1287 match = iface_match(m->m_pkthdr.rcvif,
1288 (ipfw_insn_if *)cmd);
1289 break;
1290
1291 case O_XMIT:
1292 match = iface_match(oif, (ipfw_insn_if *)cmd);
1293 break;
1294
1295 case O_VIA:
1296 match = iface_match(oif ? oif :
1297 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
1298 break;
1299
1300 case O_MACADDR2:
1301 if (args->eh != NULL) { /* have MAC header */
1302 u_int32_t *want = (u_int32_t *)
1303 ((ipfw_insn_mac *)cmd)->addr;
1304 u_int32_t *mask = (u_int32_t *)
1305 ((ipfw_insn_mac *)cmd)->mask;
1306 u_int32_t *hdr = (u_int32_t *)args->eh;
1307
1308 match =
1309 ( want[0] == (hdr[0] & mask[0]) &&
1310 want[1] == (hdr[1] & mask[1]) &&
1311 want[2] == (hdr[2] & mask[2]) );
1312 }
1313 break;
1314
1315 case O_MAC_TYPE:
1316 if (args->eh != NULL) {
1317 u_int16_t *p =
1318 ((ipfw_insn_u16 *)cmd)->ports;
1319 int i;
1320
1321 for (i = cmdlen - 1; !match && i>0;
1322 i--, p += 2)
1323 match = (etype >= p[0] &&
1324 etype <= p[1]);
1325 }
1326 break;
1327
1328 case O_FRAG:
1329 match = (offset != 0);
1330 break;
1331
1332 case O_IN: /* "out" is "not in" */
1333 match = (oif == NULL);
1334 break;
1335
1336 case O_LAYER2:
1337 match = (args->eh != NULL);
1338 break;
1339
1340 case O_DIVERTED:
1341 {
1342 /* For diverted packets, args->rule.info
1343 * contains the divert port (in host format)
1344 * reason and direction.
1345 */
1346 uint32_t i = args->rule.info;
1347 match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
1348 cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
1349 }
1350 break;
1351
1352 case O_PROTO:
1353 /*
1354 * We do not allow an arg of 0 so the
1355 * check of "proto" only suffices.
1356 */
1357 match = (proto == cmd->arg1);
1358 break;
1359
1360 case O_IP_SRC:
1361 match = is_ipv4 &&
1362 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
1363 src_ip.s_addr);
1364 break;
1365
1366 case O_IP_SRC_LOOKUP:
1367 case O_IP_DST_LOOKUP:
1368 if (is_ipv4) {
1369 uint32_t key =
1370 (cmd->opcode == O_IP_DST_LOOKUP) ?
1371 dst_ip.s_addr : src_ip.s_addr;
1372 uint32_t v = 0;
1373
1374 if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
1375 /* generic lookup. The key must be
1376 * in 32bit big-endian format.
1377 */
1378 v = ((ipfw_insn_u32 *)cmd)->d[1];
1379 if (v == 0)
1380 key = dst_ip.s_addr;
1381 else if (v == 1)
1382 key = src_ip.s_addr;
1383 else if (v == 6) /* dscp */
1384 key = (ip->ip_tos >> 2) & 0x3f;
1385 else if (offset != 0)
1386 break;
1387 else if (proto != IPPROTO_TCP &&
1388 proto != IPPROTO_UDP)
1389 break;
1390 else if (v == 2)
1391 key = htonl(dst_port);
1392 else if (v == 3)
1393 key = htonl(src_port);
1394 else if (v == 4 || v == 5) {
1395 check_uidgid(
1396 (ipfw_insn_u32 *)cmd,
1397 proto, oif,
1398 dst_ip, dst_port,
1399 src_ip, src_port, &ucred_lookup,
1400#ifdef __FreeBSD__
1401 &ucred_cache, args->inp);
1402 if (v == 4 /* O_UID */)
1403 key = ucred_cache->cr_uid;
1404 else if (v == 5 /* O_JAIL */)
1405 key = ucred_cache->cr_prison->pr_id;
1406#else /* !__FreeBSD__ */
1407 (void *)&ucred_cache,
1408 (struct inpcb *)args->m);
1409 if (v ==4 /* O_UID */)
1410 key = ucred_cache.uid;
1411 else if (v == 5 /* O_JAIL */)
1412 key = ucred_cache.xid;
1413#endif /* !__FreeBSD__ */
1414 key = htonl(key);
1415 } else
1416 break;
1417 }
1418 match = ipfw_lookup_table(chain,
1419 cmd->arg1, key, &v);
1420 if (!match)
1421 break;
1422 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
1423 match =
1424 ((ipfw_insn_u32 *)cmd)->d[0] == v;
1425 else
1426 tablearg = v;
1427 }
1428 break;
1429
1430 case O_IP_SRC_MASK:
1431 case O_IP_DST_MASK:
1432 if (is_ipv4) {
1433 uint32_t a =
1434 (cmd->opcode == O_IP_DST_MASK) ?
1435 dst_ip.s_addr : src_ip.s_addr;
1436 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
1437 int i = cmdlen-1;
1438
1439 for (; !match && i>0; i-= 2, p+= 2)
1440 match = (p[0] == (a & p[1]));
1441 }
1442 break;
1443
1444 case O_IP_SRC_ME:
1445 if (is_ipv4) {
1446 struct ifnet *tif;
1447
1448 INADDR_TO_IFP(src_ip, tif);
1449 match = (tif != NULL);
1450 break;
1451 }
1452#ifdef INET6
1453 /* FALLTHROUGH */
1454 case O_IP6_SRC_ME:
1455 match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
1456#endif
1457 break;
1458
1459 case O_IP_DST_SET:
1460 case O_IP_SRC_SET:
1461 if (is_ipv4) {
1462 u_int32_t *d = (u_int32_t *)(cmd+1);
1463 u_int32_t addr =
1464 cmd->opcode == O_IP_DST_SET ?
1465 args->f_id.dst_ip :
1466 args->f_id.src_ip;
1467
1468 if (addr < d[0])
1469 break;
1470 addr -= d[0]; /* subtract base */
1471 match = (addr < cmd->arg1) &&
1472 ( d[ 1 + (addr>>5)] &
1473 (1<<(addr & 0x1f)) );
1474 }
1475 break;
1476
1477 case O_IP_DST:
1478 match = is_ipv4 &&
1479 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
1480 dst_ip.s_addr);
1481 break;
1482
1483 case O_IP_DST_ME:
1484 if (is_ipv4) {
1485 struct ifnet *tif;
1486
1487 INADDR_TO_IFP(dst_ip, tif);
1488 match = (tif != NULL);
1489 break;
1490 }
1491#ifdef INET6
1492 /* FALLTHROUGH */
1493 case O_IP6_DST_ME:
1494 match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
1495#endif
1496 break;
1497
1498
1499 case O_IP_SRCPORT:
1500 case O_IP_DSTPORT:
1501 /*
1502 * offset == 0 && proto != 0 is enough
1503 * to guarantee that we have a
1504 * packet with port info.
1505 */
1506 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
1507 && offset == 0) {
1508 u_int16_t x =
1509 (cmd->opcode == O_IP_SRCPORT) ?
1510 src_port : dst_port ;
1511 u_int16_t *p =
1512 ((ipfw_insn_u16 *)cmd)->ports;
1513 int i;
1514
1515 for (i = cmdlen - 1; !match && i>0;
1516 i--, p += 2)
1517 match = (x>=p[0] && x<=p[1]);
1518 }
1519 break;
1520
1521 case O_ICMPTYPE:
1522 match = (offset == 0 && proto==IPPROTO_ICMP &&
1523 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
1524 break;
1525
1526#ifdef INET6
1527 case O_ICMP6TYPE:
1528 match = is_ipv6 && offset == 0 &&
1529 proto==IPPROTO_ICMPV6 &&
1530 icmp6type_match(
1531 ICMP6(ulp)->icmp6_type,
1532 (ipfw_insn_u32 *)cmd);
1533 break;
1534#endif /* INET6 */
1535
1536 case O_IPOPT:
1537 match = (is_ipv4 &&
1538 ipopts_match(ip, cmd) );
1539 break;
1540
1541 case O_IPVER:
1542 match = (is_ipv4 &&
1543 cmd->arg1 == ip->ip_v);
1544 break;
1545
1546 case O_IPID:
1547 case O_IPLEN:
1548 case O_IPTTL:
1549 if (is_ipv4) { /* only for IP packets */
1550 uint16_t x;
1551 uint16_t *p;
1552 int i;
1553
1554 if (cmd->opcode == O_IPLEN)
1555 x = iplen;
1556 else if (cmd->opcode == O_IPTTL)
1557 x = ip->ip_ttl;
1558 else /* must be IPID */
1559 x = ntohs(ip->ip_id);
1560 if (cmdlen == 1) {
1561 match = (cmd->arg1 == x);
1562 break;
1563 }
1564 /* otherwise we have ranges */
1565 p = ((ipfw_insn_u16 *)cmd)->ports;
1566 i = cmdlen - 1;
1567 for (; !match && i>0; i--, p += 2)
1568 match = (x >= p[0] && x <= p[1]);
1569 }
1570 break;
1571
1572 case O_IPPRECEDENCE:
1573 match = (is_ipv4 &&
1574 (cmd->arg1 == (ip->ip_tos & 0xe0)) );
1575 break;
1576
1577 case O_IPTOS:
1578 match = (is_ipv4 &&
1579 flags_match(cmd, ip->ip_tos));
1580 break;
1581
1582 case O_TCPDATALEN:
1583 if (proto == IPPROTO_TCP && offset == 0) {
1584 struct tcphdr *tcp;
1585 uint16_t x;
1586 uint16_t *p;
1587 int i;
1588
1589 tcp = TCP(ulp);
1590 x = iplen -
1591 ((ip->ip_hl + tcp->th_off) << 2);
1592 if (cmdlen == 1) {
1593 match = (cmd->arg1 == x);
1594 break;
1595 }
1596 /* otherwise we have ranges */
1597 p = ((ipfw_insn_u16 *)cmd)->ports;
1598 i = cmdlen - 1;
1599 for (; !match && i>0; i--, p += 2)
1600 match = (x >= p[0] && x <= p[1]);
1601 }
1602 break;
1603
1604 case O_TCPFLAGS:
1605 match = (proto == IPPROTO_TCP && offset == 0 &&
1606 flags_match(cmd, TCP(ulp)->th_flags));
1607 break;
1608
1609 case O_TCPOPTS:
1610 PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
1611 match = (proto == IPPROTO_TCP && offset == 0 &&
1612 tcpopts_match(TCP(ulp), cmd));
1613 break;
1614
1615 case O_TCPSEQ:
1616 match = (proto == IPPROTO_TCP && offset == 0 &&
1617 ((ipfw_insn_u32 *)cmd)->d[0] ==
1618 TCP(ulp)->th_seq);
1619 break;
1620
1621 case O_TCPACK:
1622 match = (proto == IPPROTO_TCP && offset == 0 &&
1623 ((ipfw_insn_u32 *)cmd)->d[0] ==
1624 TCP(ulp)->th_ack);
1625 break;
1626
1627 case O_TCPWIN:
1628 match = (proto == IPPROTO_TCP && offset == 0 &&
1629 cmd->arg1 == TCP(ulp)->th_win);
1630 break;
1631
1632 case O_ESTAB:
1633 /* reject packets which have SYN only */
1634 /* XXX should i also check for TH_ACK ? */
1635 match = (proto == IPPROTO_TCP && offset == 0 &&
1636 (TCP(ulp)->th_flags &
1637 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
1638 break;
1639
1640 case O_ALTQ: {
1641 struct pf_mtag *at;
1642 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
1643
1644 match = 1;
1645 at = pf_find_mtag(m);
1646 if (at != NULL && at->qid != 0)
1647 break;
1648 at = pf_get_mtag(m);
1649 if (at == NULL) {
1650 /*
1651 * Let the packet fall back to the
1652 * default ALTQ.
1653 */
1654 break;
1655 }
1656 at->qid = altq->qid;
1657 if (is_ipv4)
1658 at->af = AF_INET;
1659 else
1660 at->af = AF_LINK;
1661 at->hdr = ip;
1662 break;
1663 }
1664
1665 case O_LOG:
1666 ipfw_log(f, hlen, args, m,
1667 oif, offset, tablearg, ip);
1668 match = 1;
1669 break;
1670
1671 case O_PROB:
1672 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
1673 break;
1674
1675 case O_VERREVPATH:
1676 /* Outgoing packets automatically pass/match */
1677 match = ((oif != NULL) ||
1678 (m->m_pkthdr.rcvif == NULL) ||
1679 (
1680#ifdef INET6
1681 is_ipv6 ?
1682 verify_path6(&(args->f_id.src_ip6),
1683 m->m_pkthdr.rcvif) :
1684#endif
1685 verify_path(src_ip, m->m_pkthdr.rcvif,
1686 args->f_id.fib)));
1687 break;
1688
1689 case O_VERSRCREACH:
1690 /* Outgoing packets automatically pass/match */
1691 match = (hlen > 0 && ((oif != NULL) ||
1692#ifdef INET6
1693 is_ipv6 ?
1694 verify_path6(&(args->f_id.src_ip6),
1695 NULL) :
1696#endif
1697 verify_path(src_ip, NULL, args->f_id.fib)));
1698 break;
1699
1700 case O_ANTISPOOF:
1701 /* Outgoing packets automatically pass/match */
1702 if (oif == NULL && hlen > 0 &&
1703 ( (is_ipv4 && in_localaddr(src_ip))
1704#ifdef INET6
1705 || (is_ipv6 &&
1706 in6_localaddr(&(args->f_id.src_ip6)))
1707#endif
1708 ))
1709 match =
1710#ifdef INET6
1711 is_ipv6 ? verify_path6(
1712 &(args->f_id.src_ip6),
1713 m->m_pkthdr.rcvif) :
1714#endif
1715 verify_path(src_ip,
1716 m->m_pkthdr.rcvif,
1717 args->f_id.fib);
1718 else
1719 match = 1;
1720 break;
1721
1722 case O_IPSEC:
1723#ifdef IPSEC
1724 match = (m_tag_find(m,
1725 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
1726#endif
1727 /* otherwise no match */
1728 break;
1729
1730#ifdef INET6
1731 case O_IP6_SRC:
1732 match = is_ipv6 &&
1733 IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
1734 &((ipfw_insn_ip6 *)cmd)->addr6);
1735 break;
1736
1737 case O_IP6_DST:
1738 match = is_ipv6 &&
1739 IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
1740 &((ipfw_insn_ip6 *)cmd)->addr6);
1741 break;
1742 case O_IP6_SRC_MASK:
1743 case O_IP6_DST_MASK:
1744 if (is_ipv6) {
1745 int i = cmdlen - 1;
1746 struct in6_addr p;
1747 struct in6_addr *d =
1748 &((ipfw_insn_ip6 *)cmd)->addr6;
1749
1750 for (; !match && i > 0; d += 2,
1751 i -= F_INSN_SIZE(struct in6_addr)
1752 * 2) {
1753 p = (cmd->opcode ==
1754 O_IP6_SRC_MASK) ?
1755 args->f_id.src_ip6:
1756 args->f_id.dst_ip6;
1757 APPLY_MASK(&p, &d[1]);
1758 match =
1759 IN6_ARE_ADDR_EQUAL(&d[0],
1760 &p);
1761 }
1762 }
1763 break;
1764
1765 case O_FLOW6ID:
1766 match = is_ipv6 &&
1767 flow6id_match(args->f_id.flow_id6,
1768 (ipfw_insn_u32 *) cmd);
1769 break;
1770
1771 case O_EXT_HDR:
1772 match = is_ipv6 &&
1773 (ext_hd & ((ipfw_insn *) cmd)->arg1);
1774 break;
1775
1776 case O_IP6:
1777 match = is_ipv6;
1778 break;
1779#endif
1780
1781 case O_IP4:
1782 match = is_ipv4;
1783 break;
1784
1785 case O_TAG: {
1786 struct m_tag *mtag;
1787 uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
1788 tablearg : cmd->arg1;
1789
1790 /* Packet is already tagged with this tag? */
1791 mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
1792
1793 /* We have `untag' action when F_NOT flag is
1794 * present. And we must remove this mtag from
1795 * mbuf and reset `match' to zero (`match' will
1796 * be inversed later).
1797 * Otherwise we should allocate new mtag and
1798 * push it into mbuf.
1799 */
1800 if (cmd->len & F_NOT) { /* `untag' action */
1801 if (mtag != NULL)
1802 m_tag_delete(m, mtag);
1803 match = 0;
1804 } else {
1805 if (mtag == NULL) {
1806 mtag = m_tag_alloc( MTAG_IPFW,
1807 tag, 0, M_NOWAIT);
1808 if (mtag != NULL)
1809 m_tag_prepend(m, mtag);
1810 }
1811 match = 1;
1812 }
1813 break;
1814 }
1815
1816 case O_FIB: /* try match the specified fib */
1817 if (args->f_id.fib == cmd->arg1)
1818 match = 1;
1819 break;
1820
1821 case O_SOCKARG: {
1822 struct inpcb *inp = args->inp;
1823 struct inpcbinfo *pi;
1824
1825 if (is_ipv6) /* XXX can we remove this ? */
1826 break;
1827
1828 if (proto == IPPROTO_TCP)
1829 pi = &V_tcbinfo;
1830 else if (proto == IPPROTO_UDP)
1831 pi = &V_udbinfo;
1832 else
1833 break;
1834
1835 /*
1836 * XXXRW: so_user_cookie should almost
1837 * certainly be inp_user_cookie?
1838 */
1839
1840 /* For incomming packet, lookup up the
1841 inpcb using the src/dest ip/port tuple */
1842 if (inp == NULL) {
1843 inp = in_pcblookup(pi,
1844 src_ip, htons(src_port),
1845 dst_ip, htons(dst_port),
1846 INPLOOKUP_RLOCKPCB, NULL);
1847 if (inp != NULL) {
1848 tablearg =
1849 inp->inp_socket->so_user_cookie;
1850 if (tablearg)
1851 match = 1;
1852 INP_RUNLOCK(inp);
1853 }
1854 } else {
1855 if (inp->inp_socket) {
1856 tablearg =
1857 inp->inp_socket->so_user_cookie;
1858 if (tablearg)
1859 match = 1;
1860 }
1861 }
1862 break;
1863 }
1864
1865 case O_TAGGED: {
1866 struct m_tag *mtag;
1867 uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
1868 tablearg : cmd->arg1;
1869
1870 if (cmdlen == 1) {
1871 match = m_tag_locate(m, MTAG_IPFW,
1872 tag, NULL) != NULL;
1873 break;
1874 }
1875
1876 /* we have ranges */
1877 for (mtag = m_tag_first(m);
1878 mtag != NULL && !match;
1879 mtag = m_tag_next(m, mtag)) {
1880 uint16_t *p;
1881 int i;
1882
1883 if (mtag->m_tag_cookie != MTAG_IPFW)
1884 continue;
1885
1886 p = ((ipfw_insn_u16 *)cmd)->ports;
1887 i = cmdlen - 1;
1888 for(; !match && i > 0; i--, p += 2)
1889 match =
1890 mtag->m_tag_id >= p[0] &&
1891 mtag->m_tag_id <= p[1];
1892 }
1893 break;
1894 }
1895
1896 /*
1897 * The second set of opcodes represents 'actions',
1898 * i.e. the terminal part of a rule once the packet
1899 * matches all previous patterns.
1900 * Typically there is only one action for each rule,
1901 * and the opcode is stored at the end of the rule
1902 * (but there are exceptions -- see below).
1903 *
1904 * In general, here we set retval and terminate the
1905 * outer loop (would be a 'break 3' in some language,
1906 * but we need to set l=0, done=1)
1907 *
1908 * Exceptions:
1909 * O_COUNT and O_SKIPTO actions:
1910 * instead of terminating, we jump to the next rule
1911 * (setting l=0), or to the SKIPTO target (setting
1912 * f/f_len, cmd and l as needed), respectively.
1913 *
1914 * O_TAG, O_LOG and O_ALTQ action parameters:
1915 * perform some action and set match = 1;
1916 *
1917 * O_LIMIT and O_KEEP_STATE: these opcodes are
1918 * not real 'actions', and are stored right
1919 * before the 'action' part of the rule.
1920 * These opcodes try to install an entry in the
1921 * state tables; if successful, we continue with
1922 * the next opcode (match=1; break;), otherwise
1923 * the packet must be dropped (set retval,
1924 * break loops with l=0, done=1)
1925 *
1926 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
1927 * cause a lookup of the state table, and a jump
1928 * to the 'action' part of the parent rule
1929 * if an entry is found, or
1930 * (CHECK_STATE only) a jump to the next rule if
1931 * the entry is not found.
1932 * The result of the lookup is cached so that
1933 * further instances of these opcodes become NOPs.
1934 * The jump to the next rule is done by setting
1935 * l=0, cmdlen=0.
1936 */
1937 case O_LIMIT:
1938 case O_KEEP_STATE:
1939 if (ipfw_install_state(f,
1940 (ipfw_insn_limit *)cmd, args, tablearg)) {
1941 /* error or limit violation */
1942 retval = IP_FW_DENY;
1943 l = 0; /* exit inner loop */
1944 done = 1; /* exit outer loop */
1945 }
1946 match = 1;
1947 break;
1948
1949 case O_PROBE_STATE:
1950 case O_CHECK_STATE:
1951 /*
1952 * dynamic rules are checked at the first
1953 * keep-state or check-state occurrence,
1954 * with the result being stored in dyn_dir.
1955 * The compiler introduces a PROBE_STATE
1956 * instruction for us when we have a
1957 * KEEP_STATE (because PROBE_STATE needs
1958 * to be run first).
1959 */
1960 if (dyn_dir == MATCH_UNKNOWN &&
1961 (q = ipfw_lookup_dyn_rule(&args->f_id,
1962 &dyn_dir, proto == IPPROTO_TCP ?
1963 TCP(ulp) : NULL))
1964 != NULL) {
1965 /*
1966 * Found dynamic entry, update stats
1967 * and jump to the 'action' part of
1968 * the parent rule by setting
1969 * f, cmd, l and clearing cmdlen.
1970 */
1971 q->pcnt++;
1972 q->bcnt += pktlen;
1973 /* XXX we would like to have f_pos
1974 * readily accessible in the dynamic
1975 * rule, instead of having to
1976 * lookup q->rule.
1977 */
1978 f = q->rule;
1979 f_pos = ipfw_find_rule(chain,
1980 f->rulenum, f->id);
1981 cmd = ACTION_PTR(f);
1982 l = f->cmd_len - f->act_ofs;
1983 ipfw_dyn_unlock();
1984 cmdlen = 0;
1985 match = 1;
1986 break;
1987 }
1988 /*
1989 * Dynamic entry not found. If CHECK_STATE,
1990 * skip to next rule, if PROBE_STATE just
1991 * ignore and continue with next opcode.
1992 */
1993 if (cmd->opcode == O_CHECK_STATE)
1994 l = 0; /* exit inner loop */
1995 match = 1;
1996 break;
1997
1998 case O_ACCEPT:
1999 retval = 0; /* accept */
2000 l = 0; /* exit inner loop */
2001 done = 1; /* exit outer loop */
2002 break;
2003
2004 case O_PIPE:
2005 case O_QUEUE:
2006 set_match(args, f_pos, chain);
2007 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
2008 tablearg : cmd->arg1;
2009 if (cmd->opcode == O_PIPE)
2010 args->rule.info |= IPFW_IS_PIPE;
2011 if (V_fw_one_pass)
2012 args->rule.info |= IPFW_ONEPASS;
2013 retval = IP_FW_DUMMYNET;
2014 l = 0; /* exit inner loop */
2015 done = 1; /* exit outer loop */
2016 break;
2017
2018 case O_DIVERT:
2019 case O_TEE:
2020 if (args->eh) /* not on layer 2 */
2021 break;
2022 /* otherwise this is terminal */
2023 l = 0; /* exit inner loop */
2024 done = 1; /* exit outer loop */
2025 retval = (cmd->opcode == O_DIVERT) ?
2026 IP_FW_DIVERT : IP_FW_TEE;
2027 set_match(args, f_pos, chain);
2028 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
2029 tablearg : cmd->arg1;
2030 break;
2031
2032 case O_COUNT:
2033 f->pcnt++; /* update stats */
2034 f->bcnt += pktlen;
2035 f->timestamp = time_uptime;
2036 l = 0; /* exit inner loop */
2037 break;
2038
2039 case O_SKIPTO:
2040 f->pcnt++; /* update stats */
2041 f->bcnt += pktlen;
2042 f->timestamp = time_uptime;
2043 /* If possible use cached f_pos (in f->next_rule),
2044 * whose version is written in f->next_rule
2045 * (horrible hacks to avoid changing the ABI).
2046 */
2047 if (cmd->arg1 != IP_FW_TABLEARG &&
2048 (uintptr_t)f->x_next == chain->id) {
2049 f_pos = (uintptr_t)f->next_rule;
2050 } else {
2051 int i = (cmd->arg1 == IP_FW_TABLEARG) ?
2052 tablearg : cmd->arg1;
2053 /* make sure we do not jump backward */
2054 if (i <= f->rulenum)
2055 i = f->rulenum + 1;
2056 f_pos = ipfw_find_rule(chain, i, 0);
2057 /* update the cache */
2058 if (cmd->arg1 != IP_FW_TABLEARG) {
2059 f->next_rule =
2060 (void *)(uintptr_t)f_pos;
2061 f->x_next =
2062 (void *)(uintptr_t)chain->id;
2063 }
2064 }
2065 /*
2066 * Skip disabled rules, and re-enter
2067 * the inner loop with the correct
2068 * f_pos, f, l and cmd.
2069 * Also clear cmdlen and skip_or
2070 */
2071 for (; f_pos < chain->n_rules - 1 &&
2072 (V_set_disable &
2073 (1 << chain->map[f_pos]->set));
2074 f_pos++)
2075 ;
2076 /* Re-enter the inner loop at the skipto rule. */
2077 f = chain->map[f_pos];
2078 l = f->cmd_len;
2079 cmd = f->cmd;
2080 match = 1;
2081 cmdlen = 0;
2082 skip_or = 0;
2083 continue;
2084 break; /* not reached */
2085
2086 case O_REJECT:
2087 /*
2088 * Drop the packet and send a reject notice
2089 * if the packet is not ICMP (or is an ICMP
2090 * query), and it is not multicast/broadcast.
2091 */
2092 if (hlen > 0 && is_ipv4 && offset == 0 &&
2093 (proto != IPPROTO_ICMP ||
2094 is_icmp_query(ICMP(ulp))) &&
2095 !(m->m_flags & (M_BCAST|M_MCAST)) &&
2096 !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
2097 send_reject(args, cmd->arg1, iplen, ip);
2098 m = args->m;
2099 }
2100 /* FALLTHROUGH */
2101#ifdef INET6
2102 case O_UNREACH6:
2103 if (hlen > 0 && is_ipv6 &&
2104 ((offset & IP6F_OFF_MASK) == 0) &&
2105 (proto != IPPROTO_ICMPV6 ||
2106 (is_icmp6_query(icmp6_type) == 1)) &&
2107 !(m->m_flags & (M_BCAST|M_MCAST)) &&
2108 !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
2109 send_reject6(
2110 args, cmd->arg1, hlen,
2111 (struct ip6_hdr *)ip);
2112 m = args->m;
2113 }
2114 /* FALLTHROUGH */
2115#endif
2116 case O_DENY:
2117 retval = IP_FW_DENY;
2118 l = 0; /* exit inner loop */
2119 done = 1; /* exit outer loop */
2120 break;
2121
2122 case O_FORWARD_IP:
2123 if (args->eh) /* not valid on layer2 pkts */
2124 break;
2125 if (q == NULL || q->rule != f ||
2126 dyn_dir == MATCH_FORWARD) {
2127 struct sockaddr_in *sa;
2128 sa = &(((ipfw_insn_sa *)cmd)->sa);
2129 if (sa->sin_addr.s_addr == INADDR_ANY) {
2130 bcopy(sa, &args->hopstore,
2131 sizeof(*sa));
2132 args->hopstore.sin_addr.s_addr =
2133 htonl(tablearg);
2134 args->next_hop = &args->hopstore;
2135 } else {
2136 args->next_hop = sa;
2137 }
2138 }
2139 retval = IP_FW_PASS;
2140 l = 0; /* exit inner loop */
2141 done = 1; /* exit outer loop */
2142 break;
2143
2144 case O_NETGRAPH:
2145 case O_NGTEE:
2146 set_match(args, f_pos, chain);
2147 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
2148 tablearg : cmd->arg1;
2149 if (V_fw_one_pass)
2150 args->rule.info |= IPFW_ONEPASS;
2151 retval = (cmd->opcode == O_NETGRAPH) ?
2152 IP_FW_NETGRAPH : IP_FW_NGTEE;
2153 l = 0; /* exit inner loop */
2154 done = 1; /* exit outer loop */
2155 break;
2156
2157 case O_SETFIB: {
2158 uint32_t fib;
2159
2160 f->pcnt++; /* update stats */
2161 f->bcnt += pktlen;
2162 f->timestamp = time_uptime;
2163 fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg:
2164 cmd->arg1;
2165 if (fib >= rt_numfibs)
2166 fib = 0;
2167 M_SETFIB(m, fib);
2168 args->f_id.fib = fib;
2169 l = 0; /* exit inner loop */
2170 break;
2171 }
2172
2173 case O_NAT:
2174 if (!IPFW_NAT_LOADED) {
2175 retval = IP_FW_DENY;
2176 } else {
2177 struct cfg_nat *t;
2178 int nat_id;
2179
2180 set_match(args, f_pos, chain);
2181 t = ((ipfw_insn_nat *)cmd)->nat;
2182 if (t == NULL) {
2183 nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
2184 tablearg : cmd->arg1;
2185 t = (*lookup_nat_ptr)(&chain->nat, nat_id);
2186
2187 if (t == NULL) {
2188 retval = IP_FW_DENY;
2189 l = 0; /* exit inner loop */
2190 done = 1; /* exit outer loop */
2191 break;
2192 }
2193 if (cmd->arg1 != IP_FW_TABLEARG)
2194 ((ipfw_insn_nat *)cmd)->nat = t;
2195 }
2196 retval = ipfw_nat_ptr(args, t, m);
2197 }
2198 l = 0; /* exit inner loop */
2199 done = 1; /* exit outer loop */
2200 break;
2201
2202 case O_REASS: {
2203 int ip_off;
2204
2205 f->pcnt++;
2206 f->bcnt += pktlen;
2207 l = 0; /* in any case exit inner loop */
2208 ip_off = ntohs(ip->ip_off);
2209
2210 /* if not fragmented, go to next rule */
2211 if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
2212 break;
2213 /*
2214 * ip_reass() expects len & off in host
2215 * byte order.
2216 */
2217 SET_HOST_IPLEN(ip);
2218
2219 args->m = m = ip_reass(m);
2220
2221 /*
2222 * do IP header checksum fixup.
2223 */
2224 if (m == NULL) { /* fragment got swallowed */
2225 retval = IP_FW_DENY;
2226 } else { /* good, packet complete */
2227 int hlen;
2228
2229 ip = mtod(m, struct ip *);
2230 hlen = ip->ip_hl << 2;
2231 SET_NET_IPLEN(ip);
2232 ip->ip_sum = 0;
2233 if (hlen == sizeof(struct ip))
2234 ip->ip_sum = in_cksum_hdr(ip);
2235 else
2236 ip->ip_sum = in_cksum(m, hlen);
2237 retval = IP_FW_REASS;
2238 set_match(args, f_pos, chain);
2239 }
2240 done = 1; /* exit outer loop */
2241 break;
2242 }
2243
2244 default:
2245 panic("-- unknown opcode %d\n", cmd->opcode);
2246 } /* end of switch() on opcodes */
2247 /*
2248 * if we get here with l=0, then match is irrelevant.
2249 */
2250
2251 if (cmd->len & F_NOT)
2252 match = !match;
2253
2254 if (match) {
2255 if (cmd->len & F_OR)
2256 skip_or = 1;
2257 } else {
2258 if (!(cmd->len & F_OR)) /* not an OR block, */
2259 break; /* try next rule */
2260 }
2261
2262 } /* end of inner loop, scan opcodes */
2263#undef PULLUP_LEN
2264
2265 if (done)
2266 break;
2267
2268/* next_rule:; */ /* try next rule */
2269
2270 } /* end of outer for, scan rules */
2271
2272 if (done) {
2273 struct ip_fw *rule = chain->map[f_pos];
2274 /* Update statistics */
2275 rule->pcnt++;
2276 rule->bcnt += pktlen;
2277 rule->timestamp = time_uptime;
2278 } else {
2279 retval = IP_FW_DENY;
2280 printf("ipfw: ouch!, skip past end of rules, denying packet\n");
2281 }
2282 IPFW_RUNLOCK(chain);
2283#ifdef __FreeBSD__
2284 if (ucred_cache != NULL)
2285 crfree(ucred_cache);
2286#endif
2287 return (retval);
2288
2289pullup_failed:
2290 if (V_fw_verbose)
2291 printf("ipfw: pullup failed\n");
2292 return (IP_FW_DENY);
2293}
2294
2295/*
2296 * Module and VNET glue
2297 */
2298
2299/*
2300 * Stuff that must be initialised only on boot or module load
2301 */
2302static int
2303ipfw_init(void)
2304{
2305 int error = 0;
2306
2307 ipfw_dyn_attach();
2308 /*
2309 * Only print out this stuff the first time around,
2310 * when called from the sysinit code.
2311 */
2312 printf("ipfw2 "
2313#ifdef INET6
2314 "(+ipv6) "
2315#endif
2316 "initialized, divert %s, nat %s, "
2317 "rule-based forwarding "
2318#ifdef IPFIREWALL_FORWARD
2319 "enabled, "
2320#else
2321 "disabled, "
2322#endif
2323 "default to %s, logging ",
2324#ifdef IPDIVERT
2325 "enabled",
2326#else
2327 "loadable",
2328#endif
2329#ifdef IPFIREWALL_NAT
2330 "enabled",
2331#else
2332 "loadable",
2333#endif
2334 default_to_accept ? "accept" : "deny");
2335
2336 /*
2337 * Note: V_xxx variables can be accessed here but the vnet specific
2338 * initializer may not have been called yet for the VIMAGE case.
2339 * Tuneables will have been processed. We will print out values for
2340 * the default vnet.
2341 * XXX This should all be rationalized AFTER 8.0
2342 */
2343 if (V_fw_verbose == 0)
2344 printf("disabled\n");
2345 else if (V_verbose_limit == 0)
2346 printf("unlimited\n");
2347 else
2348 printf("limited to %d packets/entry by default\n",
2349 V_verbose_limit);
2350
2351 ipfw_log_bpf(1); /* init */
2352 return (error);
2353}
2354
2355/*
2356 * Called for the removal of the last instance only on module unload.
2357 */
2358static void
2359ipfw_destroy(void)
2360{
2361
2362 ipfw_log_bpf(0); /* uninit */
2363 ipfw_dyn_detach();
2364 printf("IP firewall unloaded\n");
2365}
2366
2367/*
2368 * Stuff that must be initialized for every instance
2369 * (including the first of course).
2370 */
2371static int
2372vnet_ipfw_init(const void *unused)
2373{
2374 int error;
2375 struct ip_fw *rule = NULL;
2376 struct ip_fw_chain *chain;
2377
2378 chain = &V_layer3_chain;
2379
2380 /* First set up some values that are compile time options */
2381 V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
2382 V_fw_deny_unknown_exthdrs = 1;
2383#ifdef IPFIREWALL_VERBOSE
2384 V_fw_verbose = 1;
2385#endif
2386#ifdef IPFIREWALL_VERBOSE_LIMIT
2387 V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
2388#endif
2389#ifdef IPFIREWALL_NAT
2390 LIST_INIT(&chain->nat);
2391#endif
2392
2393 /* insert the default rule and create the initial map */
2394 chain->n_rules = 1;
2395 chain->static_len = sizeof(struct ip_fw);
2396 chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
2397 if (chain->map)
2398 rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
2399 if (rule == NULL) {
2400 if (chain->map)
2401 free(chain->map, M_IPFW);
2402 printf("ipfw2: ENOSPC initializing default rule "
2403 "(support disabled)\n");
2404 return (ENOSPC);
2405 }
2406 error = ipfw_init_tables(chain);
2407 if (error) {
2408 panic("init_tables"); /* XXX Marko fix this ! */
2409 }
2410
2411 /* fill and insert the default rule */
2412 rule->act_ofs = 0;
2413 rule->rulenum = IPFW_DEFAULT_RULE;
2414 rule->cmd_len = 1;
2415 rule->set = RESVD_SET;
2416 rule->cmd[0].len = 1;
2417 rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
2418 chain->rules = chain->default_rule = chain->map[0] = rule;
2419 chain->id = rule->id = 1;
2420
2421 IPFW_LOCK_INIT(chain);
2422 ipfw_dyn_init();
2423
2424 /* First set up some values that are compile time options */
2425 V_ipfw_vnet_ready = 1; /* Open for business */
2426
2427 /*
2428 * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
2429 * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
2430 * we still keep the module alive because the sockopt and
2431 * layer2 paths are still useful.
2432 * ipfw[6]_hook return 0 on success, ENOENT on failure,
2433 * so we can ignore the exact return value and just set a flag.
2434 *
2435 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
2436 * changes in the underlying (per-vnet) variables trigger
2437 * immediate hook()/unhook() calls.
2438 * In layer2 we have the same behaviour, except that V_ether_ipfw
2439 * is checked on each packet because there are no pfil hooks.
2440 */
2441 V_ip_fw_ctl_ptr = ipfw_ctl;
2442 V_ip_fw_chk_ptr = ipfw_chk;
2443 error = ipfw_attach_hooks(1);
2444 return (error);
2445}
2446
2447/*
2448 * Called for the removal of each instance.
2449 */
2450static int
2451vnet_ipfw_uninit(const void *unused)
2452{
2453 struct ip_fw *reap, *rule;
2454 struct ip_fw_chain *chain = &V_layer3_chain;
2455 int i;
2456
2457 V_ipfw_vnet_ready = 0; /* tell new callers to go away */
2458 /*
2459 * disconnect from ipv4, ipv6, layer2 and sockopt.
2460 * Then grab, release and grab again the WLOCK so we make
2461 * sure the update is propagated and nobody will be in.
2462 */
2463 (void)ipfw_attach_hooks(0 /* detach */);
2464 V_ip_fw_chk_ptr = NULL;
2465 V_ip_fw_ctl_ptr = NULL;
2466 IPFW_UH_WLOCK(chain);
2467 IPFW_UH_WUNLOCK(chain);
2468 IPFW_UH_WLOCK(chain);
2469
2470 IPFW_WLOCK(chain);
2471 IPFW_WUNLOCK(chain);
2472 IPFW_WLOCK(chain);
2473
2474 ipfw_dyn_uninit(0); /* run the callout_drain */
2475 ipfw_destroy_tables(chain);
2476 reap = NULL;
2477 for (i = 0; i < chain->n_rules; i++) {
2478 rule = chain->map[i];
2479 rule->x_next = reap;
2480 reap = rule;
2481 }
2482 if (chain->map)
2483 free(chain->map, M_IPFW);
2484 IPFW_WUNLOCK(chain);
2485 IPFW_UH_WUNLOCK(chain);
2486 if (reap != NULL)
2487 ipfw_reap_rules(reap);
2488 IPFW_LOCK_DESTROY(chain);
2489 ipfw_dyn_uninit(1); /* free the remaining parts */
2490 return 0;
2491}
2492
2493/*
2494 * Module event handler.
2495 * In general we have the choice of handling most of these events by the
2496 * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
2497 * use the SYSINIT handlers as they are more capable of expressing the
2498 * flow of control during module and vnet operations, so this is just
2499 * a skeleton. Note there is no SYSINIT equivalent of the module
2500 * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
2501 */
2502static int
2503ipfw_modevent(module_t mod, int type, void *unused)
2504{
2505 int err = 0;
2506
2507 switch (type) {
2508 case MOD_LOAD:
2509 /* Called once at module load or
2510 * system boot if compiled in. */
2511 break;
2512 case MOD_QUIESCE:
2513 /* Called before unload. May veto unloading. */
2514 break;
2515 case MOD_UNLOAD:
2516 /* Called during unload. */
2517 break;
2518 case MOD_SHUTDOWN:
2519 /* Called during system shutdown. */
2520 break;
2521 default:
2522 err = EOPNOTSUPP;
2523 break;
2524 }
2525 return err;
2526}
2527
2528static moduledata_t ipfwmod = {
2529 "ipfw",
2530 ipfw_modevent,
2531 0
2532};
2533
2534/* Define startup order. */
2535#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
2536#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */
2537#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */
2538#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */
2539
2540DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
2541MODULE_VERSION(ipfw, 2);
2542/* should declare some dependencies here */
2543
2544/*
2545 * Starting up. Done in order after ipfwmod() has been called.
2546 * VNET_SYSINIT is also called for each existing vnet and each new vnet.
2547 */
2548SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
2549 ipfw_init, NULL);
2550VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
2551 vnet_ipfw_init, NULL);
2552
2553/*
2554 * Closing up shop. These are done in REVERSE ORDER, but still
2555 * after ipfwmod() has been called. Not called on reboot.
2556 * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
2557 * or when the module is unloaded.
2558 */
2559SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
2560 ipfw_destroy, NULL);
2561VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
2562 vnet_ipfw_uninit, NULL);
2563/* end of file */