ip_fw_pfil.c revision 263497
121918Sjkh/*-
221918Sjkh * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
321918Sjkh * All rights reserved.
450479Speter *
521918Sjkh * Redistribution and use in source and binary forms, with or without
621918Sjkh * modification, are permitted provided that the following conditions
721918Sjkh * are met:
821918Sjkh * 1. Redistributions of source code must retain the above copyright
921918Sjkh *    notice, this list of conditions and the following disclaimer.
1021918Sjkh * 2. Redistributions in binary form must reproduce the above copyright
1121918Sjkh *    notice, this list of conditions and the following disclaimer in the
1221918Sjkh *    documentation and/or other materials provided with the distribution.
1321918Sjkh *
1421918Sjkh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1521918Sjkh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1621918Sjkh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1721918Sjkh * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1821918Sjkh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1921918Sjkh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2021918Sjkh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2121918Sjkh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2221918Sjkh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2321918Sjkh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2421918Sjkh * SUCH DAMAGE.
2521918Sjkh */
2621918Sjkh
2721918Sjkh#include <sys/cdefs.h>
2821918Sjkh__FBSDID("$FreeBSD: head/sys/netpfil/ipfw/ip_fw_pfil.c 263497 2014-03-21 17:07:18Z glebius $");
2979755Sdd
3021918Sjkh#include "opt_ipfw.h"
3121918Sjkh#include "opt_inet.h"
3221918Sjkh#include "opt_inet6.h"
3321918Sjkh#ifndef INET
3468965Sru#error IPFIREWALL requires INET.
3521918Sjkh#endif /* INET */
3621918Sjkh
3721918Sjkh#include <sys/param.h>
3821918Sjkh#include <sys/systm.h>
3929449Scharnier#include <sys/malloc.h>
4021918Sjkh#include <sys/mbuf.h>
4121918Sjkh#include <sys/module.h>
4229449Scharnier#include <sys/kernel.h>
43165567Sru#include <sys/lock.h>
44165567Sru#include <sys/rwlock.h>
45165567Sru#include <sys/socket.h>
4621918Sjkh#include <sys/sysctl.h>
4721918Sjkh
48165567Sru#include <net/if.h>
49165567Sru#include <net/route.h>
50165567Sru#include <net/ethernet.h>
51165567Sru#include <net/pfil.h>
52165567Sru#include <net/vnet.h>
5321918Sjkh
5421918Sjkh#include <netinet/in.h>
5521918Sjkh#include <netinet/in_systm.h>
5621918Sjkh#include <netinet/ip.h>
57131500Sru#include <netinet/ip_var.h>
58131500Sru#include <netinet/ip_fw.h>
59165567Sru#ifdef INET6
60165567Sru#include <netinet/ip6.h>
61165567Sru#include <netinet6/ip6_var.h>
62165567Sru#endif
63165567Sru
64165567Sru#include <netgraph/ng_ipfw.h>
65165567Sru
6621918Sjkh#include <netpfil/ipfw/ip_fw_private.h>
67131500Sru
68131500Sru#include <machine/in_cksum.h>
6921918Sjkh
7021918Sjkhstatic VNET_DEFINE(int, fw_enable) = 1;
71165567Sru#define V_fw_enable	VNET(fw_enable)
72165567Sru
7321918Sjkh#ifdef INET6
7421918Sjkhstatic VNET_DEFINE(int, fw6_enable) = 1;
7521918Sjkh#define V_fw6_enable	VNET(fw6_enable)
7621918Sjkh#endif
7721918Sjkh
7821918Sjkhstatic VNET_DEFINE(int, fwlink_enable) = 0;
7921918Sjkh#define V_fwlink_enable	VNET(fwlink_enable)
8021918Sjkh
8121918Sjkhint ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
8221918Sjkh
8321918Sjkh/* Forward declarations. */
8421918Sjkhstatic int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
85165567Sruint ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int,
86165567Sru	struct inpcb *);
87165567Sruint ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int,
8821918Sjkh	struct inpcb *);
8921918Sjkh
9021918Sjkh#ifdef SYSCTL_NODE
9121918Sjkh
9221918SjkhSYSBEGIN(f1)
9321918Sjkh
9421918SjkhSYSCTL_DECL(_net_inet_ip_fw);
95165567SruSYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
96165567Sru    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
97165567Sru    ipfw_chg_hook, "I", "Enable ipfw");
9829449Scharnier#ifdef INET6
9921918SjkhSYSCTL_DECL(_net_inet6_ip6_fw);
10021918SjkhSYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
10121918Sjkh    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
10221918Sjkh    ipfw_chg_hook, "I", "Enable ipfw+6");
10379755Sdd#endif /* INET6 */
104165567Sru
105165567SruSYSCTL_DECL(_net_link_ether);
106165567SruSYSCTL_VNET_PROC(_net_link_ether, OID_AUTO, ipfw,
107165567Sru    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fwlink_enable), 0,
10821918Sjkh    ipfw_chg_hook, "I", "Pass ether pkts through firewall");
109140368Sru
110165567SruSYSEND
111165567Sru
112165567Sru#endif /* SYSCTL_NODE */
113165567Sru
114165567Sru/*
115165567Sru * The pfilter hook to pass packets to ipfw_chk and then to
116165567Sru * dummynet, divert, netgraph or other modules.
117165567Sru * The packet may be consumed.
118165567Sru */
119165567Sruint
12021918Sjkhipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
121165567Sru    struct inpcb *inp)
122140442Sru{
123140442Sru	struct ip_fw_args args;
124140442Sru	struct m_tag *tag;
12521918Sjkh	int ipfw;
12681622Sru	int ret;
12781622Sru
12881622Sru	/* convert dir to IPFW values */
12981622Sru	dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
13081622Sru	bzero(&args, sizeof(args));
13129449Scharnier
13221918Sjkhagain:
133	/*
134	 * extract and remove the tag if present. If we are left
135	 * with onepass, optimize the outgoing path.
136	 */
137	tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
138	if (tag != NULL) {
139		args.rule = *((struct ipfw_rule_ref *)(tag+1));
140		m_tag_delete(*m0, tag);
141		if (args.rule.info & IPFW_ONEPASS)
142			return (0);
143	}
144
145	args.m = *m0;
146	args.oif = dir == DIR_OUT ? ifp : NULL;
147	args.inp = inp;
148
149	ipfw = ipfw_chk(&args);
150	*m0 = args.m;
151
152	KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL",
153	    __func__));
154
155	/* breaking out of the switch means drop */
156	ret = 0;	/* default return value for pass */
157	switch (ipfw) {
158	case IP_FW_PASS:
159		/* next_hop may be set by ipfw_chk */
160		if (args.next_hop == NULL && args.next_hop6 == NULL)
161			break; /* pass */
162#if (!defined(INET6) && !defined(INET))
163		ret = EACCES;
164#else
165	    {
166		struct m_tag *fwd_tag;
167		size_t len;
168
169		KASSERT(args.next_hop == NULL || args.next_hop6 == NULL,
170		    ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__,
171		     args.next_hop, args.next_hop6));
172#ifdef INET6
173		if (args.next_hop6 != NULL)
174			len = sizeof(struct sockaddr_in6);
175#endif
176#ifdef INET
177		if (args.next_hop != NULL)
178			len = sizeof(struct sockaddr_in);
179#endif
180
181		/* Incoming packets should not be tagged so we do not
182		 * m_tag_find. Outgoing packets may be tagged, so we
183		 * reuse the tag if present.
184		 */
185		fwd_tag = (dir == DIR_IN) ? NULL :
186			m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
187		if (fwd_tag != NULL) {
188			m_tag_unlink(*m0, fwd_tag);
189		} else {
190			fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len,
191			    M_NOWAIT);
192			if (fwd_tag == NULL) {
193				ret = EACCES;
194				break; /* i.e. drop */
195			}
196		}
197#ifdef INET6
198		if (args.next_hop6 != NULL) {
199			bcopy(args.next_hop6, (fwd_tag+1), len);
200			if (in6_localip(&args.next_hop6->sin6_addr))
201				(*m0)->m_flags |= M_FASTFWD_OURS;
202			(*m0)->m_flags |= M_IP6_NEXTHOP;
203		}
204#endif
205#ifdef INET
206		if (args.next_hop != NULL) {
207			bcopy(args.next_hop, (fwd_tag+1), len);
208			if (in_localip(args.next_hop->sin_addr))
209				(*m0)->m_flags |= M_FASTFWD_OURS;
210			(*m0)->m_flags |= M_IP_NEXTHOP;
211		}
212#endif
213		m_tag_prepend(*m0, fwd_tag);
214	    }
215#endif /* INET || INET6 */
216		break;
217
218	case IP_FW_DENY:
219		ret = EACCES;
220		break; /* i.e. drop */
221
222	case IP_FW_DUMMYNET:
223		ret = EACCES;
224		if (ip_dn_io_ptr == NULL)
225			break; /* i.e. drop */
226		if (mtod(*m0, struct ip *)->ip_v == 4)
227			ret = ip_dn_io_ptr(m0, dir, &args);
228		else if (mtod(*m0, struct ip *)->ip_v == 6)
229			ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args);
230		else
231			break; /* drop it */
232		/*
233		 * XXX should read the return value.
234		 * dummynet normally eats the packet and sets *m0=NULL
235		 * unless the packet can be sent immediately. In this
236		 * case args is updated and we should re-run the
237		 * check without clearing args.
238		 */
239		if (*m0 != NULL)
240			goto again;
241		break;
242
243	case IP_FW_TEE:
244	case IP_FW_DIVERT:
245		if (ip_divert_ptr == NULL) {
246			ret = EACCES;
247			break; /* i.e. drop */
248		}
249		ret = ipfw_divert(m0, dir, &args.rule,
250			(ipfw == IP_FW_TEE) ? 1 : 0);
251		/* continue processing for the original packet (tee). */
252		if (*m0)
253			goto again;
254		break;
255
256	case IP_FW_NGTEE:
257	case IP_FW_NETGRAPH:
258		if (ng_ipfw_input_p == NULL) {
259			ret = EACCES;
260			break; /* i.e. drop */
261		}
262		ret = ng_ipfw_input_p(m0, dir, &args,
263			(ipfw == IP_FW_NGTEE) ? 1 : 0);
264		if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
265			goto again;	/* continue with packet */
266		break;
267
268	case IP_FW_NAT:
269		/* honor one-pass in case of successful nat */
270		if (V_fw_one_pass)
271			break; /* ret is already 0 */
272		goto again;
273
274	case IP_FW_REASS:
275		goto again;		/* continue with packet */
276
277	default:
278		KASSERT(0, ("%s: unknown retval", __func__));
279	}
280
281	if (ret != 0) {
282		if (*m0)
283			FREE_PKT(*m0);
284		*m0 = NULL;
285	}
286
287	return ret;
288}
289
290/*
291 * ipfw processing for ethernet packets (in and out).
292 * Inteface is NULL from ether_demux, and ifp from
293 * ether_output_frame.
294 */
295int
296ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir,
297    struct inpcb *inp)
298{
299	struct ether_header *eh;
300	struct ether_header save_eh;
301	struct mbuf *m;
302	int i, ret;
303	struct ip_fw_args args;
304	struct m_tag *mtag;
305
306	/* fetch start point from rule, if any */
307	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
308	if (mtag == NULL) {
309		args.rule.slot = 0;
310	} else {
311		/* dummynet packet, already partially processed */
312		struct ipfw_rule_ref *r;
313
314		/* XXX can we free it after use ? */
315		mtag->m_tag_id = PACKET_TAG_NONE;
316		r = (struct ipfw_rule_ref *)(mtag + 1);
317		if (r->info & IPFW_ONEPASS)
318			return (0);
319		args.rule = *r;
320	}
321
322	/* I need some amt of data to be contiguous */
323	m = *m0;
324	i = min(m->m_pkthdr.len, max_protohdr);
325	if (m->m_len < i) {
326		m = m_pullup(m, i);
327		if (m == NULL) {
328			*m0 = m;
329			return (0);
330		}
331	}
332	eh = mtod(m, struct ether_header *);
333	save_eh = *eh;			/* save copy for restore below */
334	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
335
336	args.m = m;		/* the packet we are looking at		*/
337	args.oif = dst;		/* destination, if any			*/
338	args.next_hop = NULL;	/* we do not support forward yet	*/
339	args.next_hop6 = NULL;	/* we do not support forward yet	*/
340	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
341	args.inp = NULL;	/* used by ipfw uid/gid/jail rules	*/
342	i = ipfw_chk(&args);
343	m = args.m;
344	if (m != NULL) {
345		/*
346		 * Restore Ethernet header, as needed, in case the
347		 * mbuf chain was replaced by ipfw.
348		 */
349		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
350		if (m == NULL) {
351			*m0 = NULL;
352			return (0);
353		}
354		if (eh != mtod(m, struct ether_header *))
355			bcopy(&save_eh, mtod(m, struct ether_header *),
356				ETHER_HDR_LEN);
357	}
358	*m0 = m;
359
360	ret = 0;
361	/* Check result of ipfw_chk() */
362	switch (i) {
363	case IP_FW_PASS:
364		break;
365
366	case IP_FW_DENY:
367		ret = EACCES;
368		break; /* i.e. drop */
369
370	case IP_FW_DUMMYNET:
371		ret = EACCES;
372		int dir;
373
374		if (ip_dn_io_ptr == NULL)
375			break; /* i.e. drop */
376
377		*m0 = NULL;
378		dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
379		ip_dn_io_ptr(&m, dir, &args);
380		return 0;
381
382	default:
383		KASSERT(0, ("%s: unknown retval", __func__));
384	}
385
386	if (ret != 0) {
387		if (*m0)
388			FREE_PKT(*m0);
389		*m0 = NULL;
390	}
391
392	return ret;
393}
394
395/* do the divert, return 1 on error 0 on success */
396static int
397ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
398	int tee)
399{
400	/*
401	 * ipfw_chk() has already tagged the packet with the divert tag.
402	 * If tee is set, copy packet and return original.
403	 * If not tee, consume packet and send it to divert socket.
404	 */
405	struct mbuf *clone;
406	struct ip *ip = mtod(*m0, struct ip *);
407	struct m_tag *tag;
408
409	/* Cloning needed for tee? */
410	if (tee == 0) {
411		clone = *m0;	/* use the original mbuf */
412		*m0 = NULL;
413	} else {
414		clone = m_dup(*m0, M_NOWAIT);
415		/* If we cannot duplicate the mbuf, we sacrifice the divert
416		 * chain and continue with the tee-ed packet.
417		 */
418		if (clone == NULL)
419			return 1;
420	}
421
422	/*
423	 * Divert listeners can normally handle non-fragmented packets,
424	 * but we can only reass in the non-tee case.
425	 * This means that listeners on a tee rule may get fragments,
426	 * and have to live with that.
427	 * Note that we now have the 'reass' ipfw option so if we care
428	 * we can do it before a 'tee'.
429	 */
430	if (!tee) switch (ip->ip_v) {
431	case IPVERSION:
432	    if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) {
433		int hlen;
434		struct mbuf *reass;
435
436		reass = ip_reass(clone); /* Reassemble packet. */
437		if (reass == NULL)
438			return 0; /* not an error */
439		/* if reass = NULL then it was consumed by ip_reass */
440		/*
441		 * IP header checksum fixup after reassembly and leave header
442		 * in network byte order.
443		 */
444		ip = mtod(reass, struct ip *);
445		hlen = ip->ip_hl << 2;
446		ip->ip_sum = 0;
447		if (hlen == sizeof(struct ip))
448			ip->ip_sum = in_cksum_hdr(ip);
449		else
450			ip->ip_sum = in_cksum(reass, hlen);
451		clone = reass;
452	    }
453	    break;
454#ifdef INET6
455	case IPV6_VERSION >> 4:
456	    {
457	    struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *);
458
459		if (ip6->ip6_nxt == IPPROTO_FRAGMENT) {
460			int nxt, off;
461
462			off = sizeof(struct ip6_hdr);
463			nxt = frag6_input(&clone, &off, 0);
464			if (nxt == IPPROTO_DONE)
465				return (0);
466		}
467		break;
468	    }
469#endif
470	}
471
472	/* attach a tag to the packet with the reinject info */
473	tag = m_tag_alloc(MTAG_IPFW_RULE, 0,
474		    sizeof(struct ipfw_rule_ref), M_NOWAIT);
475	if (tag == NULL) {
476		FREE_PKT(clone);
477		return 1;
478	}
479	*((struct ipfw_rule_ref *)(tag+1)) = *rule;
480	m_tag_prepend(clone, tag);
481
482	/* Do the dirty job... */
483	ip_divert_ptr(clone, incoming);
484	return 0;
485}
486
487/*
488 * attach or detach hooks for a given protocol family
489 */
490static int
491ipfw_hook(int onoff, int pf)
492{
493	struct pfil_head *pfh;
494	void *hook_func;
495
496	pfh = pfil_head_get(PFIL_TYPE_AF, pf);
497	if (pfh == NULL)
498		return ENOENT;
499
500	hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet;
501
502	(void) (onoff ? pfil_add_hook : pfil_remove_hook)
503	    (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
504
505	return 0;
506}
507
508int
509ipfw_attach_hooks(int arg)
510{
511	int error = 0;
512
513	if (arg == 0) /* detach */
514		ipfw_hook(0, AF_INET);
515	else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) {
516                error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */
517                printf("ipfw_hook() error\n");
518        }
519#ifdef INET6
520	if (arg == 0) /* detach */
521		ipfw_hook(0, AF_INET6);
522	else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) {
523                error = ENOENT;
524                printf("ipfw6_hook() error\n");
525        }
526#endif
527	if (arg == 0) /* detach */
528		ipfw_hook(0, AF_LINK);
529	else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
530                error = ENOENT;
531                printf("ipfw_link_hook() error\n");
532        }
533	return error;
534}
535
536int
537ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
538{
539	int newval;
540	int error;
541	int af;
542
543	if (arg1 == &V_fw_enable)
544		af = AF_INET;
545#ifdef INET6
546	else if (arg1 == &V_fw6_enable)
547		af = AF_INET6;
548#endif
549	else if (arg1 == &V_fwlink_enable)
550		af = AF_LINK;
551	else
552		return (EINVAL);
553
554	newval = *(int *)arg1;
555	/* Handle sysctl change */
556	error = sysctl_handle_int(oidp, &newval, 0, req);
557
558	if (error)
559		return (error);
560
561	/* Formalize new value */
562	newval = (newval) ? 1 : 0;
563
564	if (*(int *)arg1 == newval)
565		return (0);
566
567	error = ipfw_hook(newval, af);
568	if (error)
569		return (error);
570	*(int *)arg1 = newval;
571
572	return (0);
573}
574/* end of file */
575