ip_fastfwd.c revision 128872
1139823Simp/*
21541Srgrimes * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
31541Srgrimes * All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 3. The name of the author may not be used to endorse or promote
141541Srgrimes *    products derived from this software without specific prior written
151541Srgrimes *    permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
2985051Sru * $FreeBSD: head/sys/netinet/ip_fastfwd.c 128872 2004-05-03 13:52:47Z andre $
3050477Speter */
311541Srgrimes
321541Srgrimes/*
331541Srgrimes * ip_fastforward gets its speed from processing the forwarded packet to
341541Srgrimes * completion (if_output on the other side) without any queues or netisr's.
351541Srgrimes * The receiving interface DMAs the packet into memory, the upper half of
361541Srgrimes * driver calls ip_fastforward, we do our routing table lookup and directly
3732356Seivind * send it off to the outgoing interface which DMAs the packet to the
3832350Seivind * network card. The only part of the packet we touch with the CPU is the
3954263Sshin * IP header (unless there are complex firewall rules touching other parts
4031742Seivind * of the packet, but that is up to you). We are essentially limited by bus
4131742Seivind * bandwidth and how fast the network card/driver can set up receives and
421541Srgrimes * transmits.
431541Srgrimes *
441541Srgrimes * We handle basic errors, ip header errors, checksum errors,
451541Srgrimes * destination unreachable, fragmentation and fragmentation needed and
4671862Speter * report them via icmp to the sender.
4791648Sbrooks *
4891648Sbrooks * Else if something is not pure IPv4 unicast forwarding we fall back to
491541Srgrimes * the normal ip_input processing path. We should only be called from
5024204Sbde * interfaces connected to the outside world.
5171791Speter *
52181803Sbz * Firewalling is fully supported including divert, ipfw fwd and ipfilter
531541Srgrimes * ipnat and address rewrite.
541541Srgrimes *
55130933Sbrooks * IPSEC is not supported if this host is a tunnel broker. IPSEC is
561541Srgrimes * supported for connections to/from local host.
571541Srgrimes *
581541Srgrimes * We try to do the least expensive (in CPU ops) checks and operations
591541Srgrimes * first to catch junk with as little overhead as possible.
60185571Sbz *
611541Srgrimes * We take full advantage of hardware support for ip checksum and
621541Srgrimes * fragmentation offloading.
631541Srgrimes *
641541Srgrimes * We don't do ICMP redirect in the fast forwarding path. I have had my own
651541Srgrimes * cases where two core routers with Zebra routing suite would send millions
661541Srgrimes * ICMP redirects to connected hosts if the router to dest was not the default
6711819Sjulian * gateway. In one case it was filling the routing table of a host with close
6811819Sjulian * 300'000 cloned redirect entries until it ran out of kernel memory. However
6911819Sjulian * the networking code proved very robust and it didn't crash or went ill
7011819Sjulian * otherwise.
7111819Sjulian */
7253541Sshin
7353541Sshin/*
7453541Sshin * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
7553541Sshin * is being followed here.
7653541Sshin */
7762587Sitojun
7853541Sshin#include "opt_ipfw.h"
7953541Sshin#include "opt_ipdn.h"
8015885Sjulian#include "opt_ipdivert.h"
8115885Sjulian#include "opt_ipfilter.h"
8215885Sjulian#include "opt_ipstealth.h"
8383268Speter#include "opt_mac.h"
8415885Sjulian#include "opt_pfil_hooks.h"
85187039Srwatson
86187039Srwatson#include <sys/param.h>
871622Sdg#include <sys/systm.h>
881541Srgrimes#include <sys/kernel.h>
8953541Sshin#include <sys/mac.h>
9053541Sshin#include <sys/malloc.h>
911622Sdg#include <sys/mbuf.h>
926876Sdg#include <sys/protosw.h>
931622Sdg#include <sys/socket.h>
941541Srgrimes#include <sys/sysctl.h>
95189873Srwatson
96189873Srwatson#include <net/pfil.h>
97189873Srwatson#include <net/if.h>
98189873Srwatson#include <net/if_types.h>
99189873Srwatson#include <net/if_var.h>
10091648Sbrooks#include <net/if_dl.h>
10191648Sbrooks#include <net/route.h>
10291648Sbrooks
103191148Skmacy#include <netinet/in.h>
104160195Ssam#include <netinet/in_systm.h>
105128209Sbrooks#include <netinet/in_var.h>
106190787Szec#include <netinet/ip.h>
107193731Szec#include <netinet/ip_var.h>
108193731Szec#include <netinet/ip_icmp.h>
109193731Szec
11091648Sbrooks#include <machine/in_cksum.h>
111195699Srwatson
11291648Sbrooks#include <netinet/ip_fw.h>
113192669Szec#include <netinet/ip_divert.h>
114195699Srwatson#include <netinet/ip_dummynet.h>
115195699Srwatson
116195727Srwatsonstatic int ipfastforward_active = 0;
117195727SrwatsonSYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
118195699Srwatson    &ipfastforward_active, 0, "Enable fast IP forwarding");
119192669Szec
120192669Szecstatic struct sockaddr_in *
121192669Szecip_findroute(struct route *ro, in_addr_t dest, struct mbuf *m)
122195699Srwatson{
123190909Szec	struct sockaddr_in *dst;
124190909Szec	struct rtentry *rt;
125192669Szec
126190909Szec	/*
127193731Szec	 * Find route to destination.
128193731Szec	 */
129195699Srwatson	bzero(ro, sizeof(*ro));
130193731Szec	dst = (struct sockaddr_in *)&ro->ro_dst;
131190909Szec	dst->sin_family = AF_INET;
132130933Sbrooks	dst->sin_len = sizeof(*dst);
13391648Sbrooks	dst->sin_addr.s_addr = dest;
134128209Sbrooks	rtalloc_ign(ro, RTF_CLONING);
135177965Srwatson
13691648Sbrooks	/*
13791648Sbrooks	 * Route there and interface still up?
138193731Szec	 */
13997289Sbrooks	rt = ro->ro_rt;
140181803Sbz	if (rt && (rt->rt_flags & RTF_UP) &&
141193731Szec	    (rt->rt_ifp->if_flags & IFF_UP) &&
14291648Sbrooks	    (rt->rt_ifp->if_flags & IFF_RUNNING)) {
14391648Sbrooks		if (rt->rt_flags & RTF_GATEWAY)
14491648Sbrooks			dst = (struct sockaddr_in *)rt->rt_gateway;
145147256Sbrooks	} else {
14691648Sbrooks		ipstat.ips_noroute++;
14791648Sbrooks		ipstat.ips_cantforward++;
148128209Sbrooks		if (rt)
149177965Srwatson			RTFREE(rt);
15071791Speter		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL);
151147256Sbrooks		return NULL;
15271791Speter	}
153180094Sed	return dst;
154180094Sed}
155147256Sbrooks
15671791Speter/*
157147256Sbrooks * Try to forward a packet based on the destination address.
158147256Sbrooks * This is a fast path optimized for the plain forwarding case.
159147256Sbrooks * If the packet is handled (and consumed) here then we return 1;
160147256Sbrooks * otherwise 0 is returned and the packet should be delivered
161147256Sbrooks * to ip_input for full processing.
162147256Sbrooks */
163189871Srwatsonint
164189873Srwatsonip_fastforward(struct mbuf *m)
165147256Sbrooks{
166147611Sdwmalone	struct ip *ip;
167181803Sbz	struct mbuf *m0 = NULL;
168181803Sbz#ifdef IPDIVERT
16992081Smux	struct ip *tip;
17092081Smux	struct mbuf *clone = NULL;
17171791Speter#endif
17271791Speter	struct route ro;
173193731Szec	struct sockaddr_in *dst = NULL;
174193731Szec	struct in_ifaddr *ia = NULL;
175190787Szec	struct ifaddr *ifa = NULL;
176190787Szec	struct ifnet *ifp;
177192669Szec	struct ip_fw_args args;
178192669Szec	in_addr_t odest, dest;
179192669Szec	u_short sum, ip_len;
180193731Szec	int error = 0;
181193731Szec	int hlen, ipfw, mtu;
182192669Szec
183193731Szec	/*
184193731Szec	 * Are we active and forwarding packets?
185192669Szec	 */
186192669Szec	if (!ipfastforward_active || !ipforwarding)
187190787Szec		return 0;
188192669Szec
189190787Szec	M_ASSERTVALID(m);
190190787Szec	M_ASSERTPKTHDR(m);
191190787Szec
192193731Szec	ro.ro_rt = NULL;
19371791Speter
194193731Szec	/*
195193731Szec	 * Step 1: check for packet drop conditions (and sanity checks)
196193731Szec	 */
197193731Szec
198193731Szec	/*
199193731Szec	 * Is entire packet big enough?
200193731Szec	 */
201193731Szec	if (m->m_pkthdr.len < sizeof(struct ip)) {
202193731Szec		ipstat.ips_tooshort++;
203193731Szec		goto drop;
204193731Szec	}
205193731Szec
206193731Szec	/*
207178883Srwatson	 * Is first mbuf large enough for ip header and is header present?
208178883Srwatson	 */
209177965Srwatson	if (m->m_len < sizeof (struct ip) &&
210178883Srwatson	   (m = m_pullup(m, sizeof (struct ip))) == 0) {
211178883Srwatson		ipstat.ips_toosmall++;
212195699Srwatson		goto drop;
213190909Szec	}
214190909Szec
215190787Szec	ip = mtod(m, struct ip *);
216190909Szec
217178883Srwatson	/*
218177965Srwatson	 * Is it IPv4?
219178883Srwatson	 */
220178883Srwatson	if (ip->ip_v != IPVERSION) {
221177965Srwatson		ipstat.ips_badvers++;
222177965Srwatson		goto drop;
223132199Sphk	}
224177965Srwatson
225178883Srwatson	/*
226177965Srwatson	 * Is IP header length correct and is it in first mbuf?
227178883Srwatson	 */
2281541Srgrimes	hlen = ip->ip_hl << 2;
229178883Srwatson	if (hlen < sizeof(struct ip)) {	/* minimum header length */
230178883Srwatson		ipstat.ips_badlen++;
231178883Srwatson		goto drop;
23271862Speter	}
233178883Srwatson	if (hlen > m->m_len) {
23471862Speter		if ((m = m_pullup(m, hlen)) == 0) {
235121596Skan			ipstat.ips_badhlen++;
23671862Speter			goto drop;
23754263Sshin		}
238177965Srwatson		ip = mtod(m, struct ip *);
239191148Skmacy	}
2401541Srgrimes
241147611Sdwmalone	/*
242191148Skmacy	 * Checksum correct?
243187039Srwatson	 */
244187039Srwatson	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
245187039Srwatson		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
246147611Sdwmalone	else {
247113255Sdes		if (hlen == sizeof(struct ip))
248113255Sdes			sum = in_cksum_hdr(ip);
249191148Skmacy		else
250191148Skmacy			sum = in_cksum(m, hlen);
251187039Srwatson	}
252187039Srwatson	if (sum) {
253187039Srwatson		ipstat.ips_badsum++;
254187039Srwatson		goto drop;
255187039Srwatson	}
256187039Srwatson	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
257187039Srwatson
258187039Srwatson	ip_len = ntohs(ip->ip_len);
25936908Sjulian
26036908Sjulian	/*
26136908Sjulian	 * Is IP length longer than packet we have got?
26236908Sjulian	 */
26336908Sjulian	if (m->m_pkthdr.len < ip_len) {
26453541Sshin		ipstat.ips_tooshort++;
26536908Sjulian		goto drop;
26636908Sjulian	}
267147611Sdwmalone
268147611Sdwmalone	/*
269147611Sdwmalone	 * Is packet longer than IP header tells us? If yes, truncate packet.
270147611Sdwmalone	 */
271147611Sdwmalone	if (m->m_pkthdr.len > ip_len) {
272147611Sdwmalone		if (m->m_len == m->m_pkthdr.len) {
273147611Sdwmalone			m->m_len = ip_len;
27436992Sjulian			m->m_pkthdr.len = ip_len;
27536992Sjulian		} else
27636992Sjulian			m_adj(m, ip_len - m->m_pkthdr.len);
277189863Srwatson	}
278189863Srwatson
279189873Srwatson	/*
280189863Srwatson	 * Is packet from or to 127/8?
281189873Srwatson	 */
28253541Sshin	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
28336992Sjulian	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
28436992Sjulian		ipstat.ips_badaddr++;
28536994Sjulian		goto drop;
28636992Sjulian	}
28765454Srwatson
28836992Sjulian	/*
28936992Sjulian	 * Step 2: fallback conditions to normal ip_input path processing
29036992Sjulian	 */
29136992Sjulian
292177965Srwatson	/*
29336908Sjulian	 * Only IP packets without options
29436908Sjulian	 */
29536908Sjulian	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
29636908Sjulian		if (ip_doopts == 1)
29736908Sjulian			return 0;
29836908Sjulian		else if (ip_doopts == 2) {
29936908Sjulian			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
30036908Sjulian				0, NULL);
30136908Sjulian			return 1;
30236908Sjulian		}
30336908Sjulian		/* else ignore IP options and continue */
30436908Sjulian	}
30536908Sjulian
306177965Srwatson	/*
30736908Sjulian	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
30869152Sjlemon	 * no multicast, no INADDR_ANY
3091541Srgrimes	 *
310113255Sdes	 * XXX: Probably some of these checks could be direct drop
311121645Ssam	 * conditions.  However it is not clear whether there are some
31236908Sjulian	 * hacks or obscure behaviours which make it neccessary to
31360889Sarchie	 * let ip_input handle it.  We play safe here and let ip_input
314187039Srwatson	 * deal with it until it is proven that we can directly drop it.
315187039Srwatson	 */
316187039Srwatson	if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
317187039Srwatson	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
318162539Ssuz	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
319162539Ssuz	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
320178883Srwatson	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
321162539Ssuz	    ip->ip_dst.s_addr == INADDR_ANY )
322162539Ssuz		return 0;
323162539Ssuz
324162539Ssuz	/*
325162539Ssuz	 * Is it for a local address on this host?
326162539Ssuz	 */
327162539Ssuz	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
328162539Ssuz		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
329162539Ssuz			return 0;
330123922Ssam	}
331162539Ssuz
332162539Ssuz	/*
333181803Sbz	 * Or is it for a local IP broadcast address on this host?
334181803Sbz	 */
335162539Ssuz	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
336181118Srwatson	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
337162539Ssuz			if (ifa->ifa_addr->sa_family != AF_INET)
338162539Ssuz				continue;
339162539Ssuz			ia = ifatoia(ifa);
340162539Ssuz			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr)
341181803Sbz				return 0;
342162539Ssuz			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
343162539Ssuz			    ip->ip_dst.s_addr)
3441541Srgrimes				return 0;
3451541Srgrimes		}
34636908Sjulian	}
34737600Sdfr	ipstat.ips_total++;
34860952Sgallatin
349166577Scognet	/*
350158471Sjhb	 * Step 3: incoming packet firewall processing
351158471Sjhb	 */
352158471Sjhb
353158471Sjhb	/*
35460952Sgallatin	 * Convert to host representation
35561181Smjacob	 */
356178883Srwatson	ip->ip_len = ntohs(ip->ip_len);
357178883Srwatson	ip->ip_off = ntohs(ip->ip_off);
35860952Sgallatin
35960952Sgallatin	odest = dest = ip->ip_dst.s_addr;
360132780Skan#ifdef PFIL_HOOKS
36160952Sgallatin	/*
36237600Sdfr	 * Run through list of ipfilter hooks for input packets
36337600Sdfr	 */
36436908Sjulian	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN) ||
36560889Sarchie	    m == NULL)
36660889Sarchie		return 1;
3671541Srgrimes
3681541Srgrimes	M_ASSERTVALID(m);
3691541Srgrimes	M_ASSERTPKTHDR(m);
3701541Srgrimes
3711541Srgrimes	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
37253541Sshin	dest = ip->ip_dst.s_addr;
37353541Sshin#endif
37453541Sshin
37553541Sshin	/*
37653541Sshin	 * Run through ipfw for input packets
37753541Sshin	 */
37811819Sjulian	if (fw_enable && IPFW_LOADED) {
37911819Sjulian		bzero(&args, sizeof(args));
38011819Sjulian		args.m = m;
38111819Sjulian
38211819Sjulian		ipfw = ip_fw_chk_ptr(&args);
38315885Sjulian		m = args.m;
38415885Sjulian
385111888Sjlemon		M_ASSERTVALID(m);
38615885Sjulian		M_ASSERTPKTHDR(m);
38783268Speter
3881541Srgrimes		/*
38960889Sarchie		 * Packet denied, drop it
3901541Srgrimes		 */
3911541Srgrimes		if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL)
3921541Srgrimes			goto drop;
3931541Srgrimes		/*
3941541Srgrimes		 * Send packet to the appropriate pipe
395134391Sandre		 */
3961541Srgrimes		if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) {
3971541Srgrimes			ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_IN, &args);
3981541Srgrimes			return 1;
3991541Srgrimes		}
40012706Sphk#ifdef IPDIVERT
401177965Srwatson		/*
4021541Srgrimes		 * Divert packet
403177965Srwatson		 */
404120727Ssam		if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) {
405142352Ssam			/*
4061541Srgrimes			 * See if this is a fragment
4071541Srgrimes			 */
4081541Srgrimes			if (ip->ip_off & (IP_MF | IP_OFFMASK))
4091541Srgrimes				goto droptoours;
4101541Srgrimes			/*
4111541Srgrimes			 * Tee packet
41254263Sshin			 */
413177965Srwatson			if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
4141541Srgrimes				clone = divert_clone(m);
415177965Srwatson			else
416177965Srwatson				clone = m;
417189863Srwatson			if (clone == NULL)
4181541Srgrimes				goto passin;
4191541Srgrimes
4201541Srgrimes			/*
421148887Srwatson			 * Delayed checksums are not compatible
422148887Srwatson			 */
4231541Srgrimes			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
42413928Swollman				in_delayed_cksum(m);
4251541Srgrimes				m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
4261541Srgrimes			}
4271541Srgrimes			/*
4281541Srgrimes			 * Restore packet header fields to original values
4291541Srgrimes			 */
4301541Srgrimes			tip = mtod(m, struct ip *);
4311541Srgrimes			tip->ip_len = htons(tip->ip_len);
4321541Srgrimes			tip->ip_off = htons(tip->ip_off);
4331541Srgrimes			/*
4341541Srgrimes			 * Deliver packet to divert input routine
4351541Srgrimes			 */
4361541Srgrimes			divert_packet(m, 0);
4371541Srgrimes			/*
4381541Srgrimes			 * If this was not tee, we are done
4391541Srgrimes			 */
4401541Srgrimes			m = clone;
4411541Srgrimes			if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0)
44253541Sshin				return 1;
44353541Sshin			/* Continue if it was tee */
44453541Sshin			goto passin;
44553541Sshin		}
4461541Srgrimes#endif
4471541Srgrimes		if (ipfw == 0 && args.next_hop != NULL) {
4481541Srgrimes			dest = args.next_hop->sin_addr.s_addr;
4491541Srgrimes			goto passin;
4501541Srgrimes		}
4511541Srgrimes		/*
4521541Srgrimes		 * Let through or not?
4531944Sdg		 */
45449468Sbrian		if (ipfw != 0)
4551944Sdg			goto drop;
4561944Sdg	}
45735563Sphkpassin:
45835563Sphk	ip = mtod(m, struct ip *);	/* if m changed during fw processing */
45935563Sphk
460189863Srwatson	/*
461189863Srwatson	 * Destination address changed?
462189863Srwatson	 */
463189863Srwatson	if (odest != dest) {
464189863Srwatson		/*
465189863Srwatson		 * Is it now for a local address on this host?
466189863Srwatson		 */
467189873Srwatson		LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
468189863Srwatson			if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
469189863Srwatson				goto forwardlocal;
470189863Srwatson		}
471189863Srwatson		/*
4721541Srgrimes		 * Go on with new destination address
4731541Srgrimes		 */
4741541Srgrimes	}
4751541Srgrimes
4761541Srgrimes	/*
477	 * Step 4: decrement TTL and look up route
478	 */
479
480	/*
481	 * Check TTL
482	 */
483#ifdef IPSTEALTH
484	if (!ipstealth) {
485#endif
486	if (ip->ip_ttl <= IPTTLDEC) {
487		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL);
488		return 1;
489	}
490
491	/*
492	 * Decrement the TTL and incrementally change the checksum.
493	 * Don't bother doing this with hw checksum offloading.
494	 */
495	ip->ip_ttl -= IPTTLDEC;
496	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
497		ip->ip_sum -= ~htons(IPTTLDEC << 8);
498	else
499		ip->ip_sum += htons(IPTTLDEC << 8);
500#ifdef IPSTEALTH
501	}
502#endif
503
504	/*
505	 * Find route to destination.
506	 */
507	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
508		return 1;	/* icmp unreach already sent */
509	ifp = ro.ro_rt->rt_ifp;
510
511	/*
512	 * Step 5: outgoing firewall packet processing
513	 */
514
515#ifdef PFIL_HOOKS
516	/*
517	 * Run through list of hooks for output packets.
518	 */
519	if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT) || m == NULL) {
520		goto consumed;
521	}
522
523	M_ASSERTVALID(m);
524	M_ASSERTPKTHDR(m);
525
526	ip = mtod(m, struct ip *);
527	dest = ip->ip_dst.s_addr;
528#endif
529	if (fw_enable && IPFW_LOADED && !args.next_hop) {
530		bzero(&args, sizeof(args));
531		args.m = m;
532		args.oif = ifp;
533
534		ipfw = ip_fw_chk_ptr(&args);
535		m = args.m;
536
537		M_ASSERTVALID(m);
538		M_ASSERTPKTHDR(m);
539
540		if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL)
541			goto drop;
542
543		if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) {
544			/*
545			 * XXX note: if the ifp or rt entry are deleted
546			 * while a pkt is in dummynet, we are in trouble!
547			 */
548			args.ro = &ro;		/* dummynet does not save it */
549			args.dst = dst;
550
551			ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_OUT, &args);
552			goto consumed;
553		}
554#ifdef IPDIVERT
555		if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) {
556			/*
557			 * See if this is a fragment
558			 */
559			if (ip->ip_off & (IP_MF | IP_OFFMASK))
560				goto droptoours;
561			/*
562			 * Tee packet
563			 */
564			if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
565				clone = divert_clone(m);
566			else
567				clone = m;
568			if (clone == NULL)
569				goto passout;
570
571			/*
572			 * Delayed checksums are not compatible with divert
573			 */
574			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
575				in_delayed_cksum(m);
576				m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
577			}
578			/*
579			 * Restore packet header fields to original values
580			 */
581			tip = mtod(m, struct ip *);
582			tip->ip_len = htons(tip->ip_len);
583			tip->ip_off = htons(tip->ip_off);
584			/*
585			 * Deliver packet to divert input routine
586			 */
587			divert_packet(m, 0);
588			/*
589			 * If this was not tee, we are done
590			 */
591			m = clone;
592			if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) {
593				goto consumed;
594			}
595			/* Continue if it was tee */
596			goto passout;
597		}
598#endif
599		if (ipfw == 0 && args.next_hop != NULL) {
600			dest = args.next_hop->sin_addr.s_addr;
601			goto passout;
602		}
603		/*
604		 * Let through or not?
605		 */
606		if (ipfw != 0)
607			goto drop;
608	}
609passout:
610	ip = mtod(m, struct ip *);
611
612	/*
613	 * Destination address changed?
614	 */
615	if (odest != dest) {
616		/*
617		 * Is it now for a local address on this host?
618		 */
619		LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
620			if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) {
621forwardlocal:
622				if (args.next_hop) {
623					struct m_tag *mtag = m_tag_get(
624					    PACKET_TAG_IPFORWARD,
625					    sizeof(struct sockaddr_in *),
626					    M_NOWAIT);
627					if (mtag == NULL) {
628						goto drop;
629					}
630					*(struct sockaddr_in **)(mtag+1) =
631					    args.next_hop;
632					m_tag_prepend(m, mtag);
633				}
634#ifdef IPDIVERT
635droptoours:	/* Used for DIVERT */
636#endif
637				/* for ip_input */
638				m->m_flags |= M_FASTFWD_OURS;
639
640				/* ip still points to the real packet */
641				ip->ip_len = htons(ip->ip_len);
642				ip->ip_off = htons(ip->ip_off);
643
644				/*
645				 * Return packet for processing by ip_input
646				 */
647				if (ro.ro_rt)
648					RTFREE(ro.ro_rt);
649				return 0;
650			}
651		}
652		/*
653		 * Redo route lookup with new destination address
654		 */
655		RTFREE(ro.ro_rt);
656		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
657			return 1;	/* icmp unreach already sent */
658		ifp = ro.ro_rt->rt_ifp;
659	}
660
661	/*
662	 * Step 6: send off the packet
663	 */
664
665	/*
666	 * Check if route is dampned (when ARP is unable to resolve)
667	 */
668	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
669	    ro.ro_rt->rt_rmx.rmx_expire >= time_second) {
670		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL);
671		goto consumed;
672	}
673
674	/*
675	 * Check if there is enough space in the interface queue
676	 */
677	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
678	    ifp->if_snd.ifq_maxlen) {
679		ipstat.ips_odropped++;
680		/* would send source quench here but that is depreciated */
681		goto drop;
682	}
683
684	/*
685	 * Check if media link state of interface is not down
686	 */
687	if (ifp->if_link_state == LINK_STATE_DOWN) {
688		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL);
689		goto consumed;
690	}
691
692	/*
693	 * Check if packet fits MTU or if hardware will fragement for us
694	 */
695	if (ro.ro_rt->rt_rmx.rmx_mtu)
696		mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
697	else
698		mtu = ifp->if_mtu;
699
700	if (ip->ip_len <= mtu ||
701	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
702		/*
703		 * Restore packet header fields to original values
704		 */
705		ip->ip_len = htons(ip->ip_len);
706		ip->ip_off = htons(ip->ip_off);
707		/*
708		 * Send off the packet via outgoing interface
709		 */
710		error = (*ifp->if_output)(ifp, m,
711				(struct sockaddr *)dst, ro.ro_rt);
712	} else {
713		/*
714		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
715		 */
716		if (ip->ip_off & IP_DF) {
717			ipstat.ips_cantfrag++;
718			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
719				0, ifp);
720			goto consumed;
721		} else {
722			/*
723			 * We have to fragement the packet
724			 */
725			m->m_pkthdr.csum_flags |= CSUM_IP;
726			/*
727			 * ip_fragment expects ip_len and ip_off in host byte
728			 * order but returns all packets in network byte order
729			 */
730			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
731					(~ifp->if_hwassist & CSUM_DELAY_IP))) {
732				goto drop;
733			}
734			KASSERT(m != NULL, ("null mbuf and no error"));
735			/*
736			 * Send off the fragments via outgoing interface
737			 */
738			error = 0;
739			do {
740				m0 = m->m_nextpkt;
741				m->m_nextpkt = NULL;
742
743				error = (*ifp->if_output)(ifp, m,
744					(struct sockaddr *)dst, ro.ro_rt);
745				if (error)
746					break;
747			} while ((m = m0) != NULL);
748			if (error) {
749				/* Reclaim remaining fragments */
750				for (; m; m = m0) {
751					m0 = m->m_nextpkt;
752					m->m_nextpkt = NULL;
753					m_freem(m);
754				}
755			} else
756				ipstat.ips_fragmented++;
757		}
758	}
759
760	if (error != 0)
761		ipstat.ips_odropped++;
762	else {
763		ro.ro_rt->rt_rmx.rmx_pksent++;
764		ipstat.ips_forward++;
765		ipstat.ips_fastforward++;
766	}
767consumed:
768	RTFREE(ro.ro_rt);
769	return 1;
770drop:
771	if (m)
772		m_freem(m);
773	if (ro.ro_rt)
774		RTFREE(ro.ro_rt);
775	return 1;
776}
777