ip_output.c revision 171167
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
291541Srgrimes *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
301541Srgrimes * $FreeBSD: head/sys/netinet/ip_output.c 171167 2007-07-03 12:13:45Z gnn $
311541Srgrimes */
321541Srgrimes
331541Srgrimes#include "opt_ipfw.h"
341541Srgrimes#include "opt_ipsec.h"
351541Srgrimes#include "opt_mac.h"
361541Srgrimes#include "opt_mbuf_stress_test.h"
371541Srgrimes
3814622Sfenner#include <sys/param.h>
391541Srgrimes#include <sys/systm.h>
401541Srgrimes#include <sys/kernel.h>
412531Swollman#include <sys/malloc.h>
422531Swollman#include <sys/mbuf.h>
432531Swollman#include <sys/priv.h>
442531Swollman#include <sys/protosw.h>
452531Swollman#include <sys/socket.h>
469209Swollman#include <sys/socketvar.h>
4714622Sfenner#include <sys/sysctl.h>
482531Swollman
4914622Sfenner#include <net/if.h>
502531Swollman#include <net/netisr.h>
511541Srgrimes#include <net/pfil.h>
521541Srgrimes#include <net/route.h>
531549Srgrimes
541541Srgrimes#include <netinet/in.h>
551541Srgrimes#include <netinet/in_systm.h>
561541Srgrimes#include <netinet/ip.h>
5712296Sphk#include <netinet/in_pcb.h>
586472Swollman#include <netinet/in_var.h>
591541Srgrimes#include <netinet/ip_var.h>
601541Srgrimes#include <netinet/ip_options.h>
611541Srgrimes
621541Srgrimes#ifdef IPSEC
631541Srgrimes#include <netinet/ip_ipsec.h>
641541Srgrimes#include <netipsec/ipsec.h>
651541Srgrimes#endif /* IPSEC*/
661541Srgrimes
671541Srgrimes#include <machine/in_cksum.h>
681541Srgrimes
691541Srgrimes#include <security/mac/mac_framework.h>
701541Srgrimes
7112704Sphk#define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
7212704Sphk				x, (ntohl(a.s_addr)>>24)&0xFF,\
7312579Sbde				  (ntohl(a.s_addr)>>16)&0xFF,\
7412579Sbde				  (ntohl(a.s_addr)>>8)&0xFF,\
7512704Sphk				  (ntohl(a.s_addr))&0xFF, y);
762531Swollman
7712296Sphku_short ip_id;
7812296Sphk
7912296Sphk#ifdef MBUF_STRESS_TEST
809209Swollmanint mbuf_frag_size = 0;
811541SrgrimesSYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
8214622Sfenner	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
8314622Sfenner#endif
849209Swollman
851541Srgrimesstatic void	ip_mloopback
8614622Sfenner	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
871541Srgrimes
881541Srgrimes
891541Srgrimesextern	struct protosw inetsw[];
901541Srgrimes
9114622Sfenner/*
9214622Sfenner * IP output.  The packet in mbuf chain m contains a skeletal IP
931541Srgrimes * header (with len, off, ttl, proto, tos, src, dst).
941541Srgrimes * The mbuf chain containing the packet will be freed.
951541Srgrimes * The mbuf opt, if present, will not be freed.
961541Srgrimes * In the IP forwarding case, the packet will arrive with options already
9714622Sfenner * inserted, so must have a NULL opt pointer.
989209Swollman */
999209Swollmanint
1009209Swollmanip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
10114622Sfenner    struct ip_moptions *imo, struct inpcb *inp)
10214622Sfenner{
10314622Sfenner	struct ip *ip;
10414622Sfenner	struct ifnet *ifp = NULL;	/* keep compiler happy */
10514622Sfenner	struct mbuf *m0;
10614622Sfenner	int hlen = sizeof (struct ip);
10714622Sfenner	int mtu;
10814622Sfenner	int len, error = 0;
10914622Sfenner	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
11014622Sfenner	struct in_ifaddr *ia = NULL;
11114622Sfenner	int isbroadcast, sw_csum;
11214622Sfenner	struct route iproute;
1132531Swollman	struct in_addr odst;
1141541Srgrimes#ifdef IPFIREWALL_FORWARD
1151541Srgrimes	struct m_tag *fwd_tag = NULL;
11612704Sphk#endif
1172531Swollman	M_ASSERTPKTHDR(m);
1182531Swollman
1192531Swollman	if (ro == NULL) {
1202531Swollman		ro = &iproute;
1212531Swollman		bzero(ro, sizeof (*ro));
1222531Swollman	}
1232531Swollman
1242531Swollman	if (inp != NULL)
1252531Swollman		INP_LOCK_ASSERT(inp);
12614622Sfenner
1272531Swollman	if (opt) {
1282531Swollman		len = 0;
1292531Swollman		m = ip_insertoptions(m, opt, &len);
1302531Swollman		if (len != 0)
1312531Swollman			hlen = len;
13214622Sfenner	}
1332531Swollman	ip = mtod(m, struct ip *);
1342531Swollman
13514622Sfenner	/*
13614622Sfenner	 * Fill in IP header.  If we are not allowing fragmentation,
13714622Sfenner	 * then the ip_id field is meaningless, but we don't set it
13814622Sfenner	 * to zero.  Doing so causes various problems when devices along
1392531Swollman	 * the path (routers, load balancers, firewalls, etc.) illegally
1402531Swollman	 * disable DF on our packet.  Note that a 16-bit counter
1412531Swollman	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
1422531Swollman	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
1432531Swollman	 * for Counting NATted Hosts", Proc. IMW'02, available at
1442531Swollman	 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
1452531Swollman	 */
1461541Srgrimes	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
1471541Srgrimes		ip->ip_v = IPVERSION;
1481541Srgrimes		ip->ip_hl = hlen >> 2;
1491541Srgrimes		ip->ip_id = ip_newid();
1501541Srgrimes		ipstat.ips_localout++;
1511541Srgrimes	} else {
1521541Srgrimes		hlen = ip->ip_hl << 2;
1531541Srgrimes	}
1541541Srgrimes
1551541Srgrimes	dst = (struct sockaddr_in *)&ro->ro_dst;
1561541Srgrimesagain:
1571541Srgrimes	/*
1581541Srgrimes	 * If there is a cached route,
1592531Swollman	 * check that it is to the same destination
1609209Swollman	 * and is still up.  If not, free it and try again.
1618546Sdg	 * The address family should also be checked in case of sharing the
1621541Srgrimes	 * cache with IPv6.
1631541Srgrimes	 */
1641541Srgrimes	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
1651541Srgrimes			  dst->sin_family != AF_INET ||
1661541Srgrimes			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
1671541Srgrimes		RTFREE(ro->ro_rt);
1681541Srgrimes		ro->ro_rt = (struct rtentry *)NULL;
1691541Srgrimes	}
1701541Srgrimes#ifdef IPFIREWALL_FORWARD
1711541Srgrimes	if (ro->ro_rt == NULL && fwd_tag == NULL) {
1721541Srgrimes#else
1731541Srgrimes	if (ro->ro_rt == NULL) {
1741541Srgrimes#endif
1751541Srgrimes		bzero(dst, sizeof(*dst));
1761541Srgrimes		dst->sin_family = AF_INET;
1771541Srgrimes		dst->sin_len = sizeof(*dst);
1781541Srgrimes		dst->sin_addr = ip->ip_dst;
1791541Srgrimes	}
1801541Srgrimes	/*
1811541Srgrimes	 * If routing to interface only, short circuit routing lookup.
1821541Srgrimes	 * The use of an all-ones broadcast address implies this; an
1831541Srgrimes	 * interface is specified by the broadcast address of an interface,
1841541Srgrimes	 * or the destination address of a ptp interface.
1851541Srgrimes	 */
1861541Srgrimes	if (flags & IP_SENDONES) {
1871541Srgrimes		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
1881541Srgrimes		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
1891541Srgrimes			ipstat.ips_noroute++;
1901541Srgrimes			error = ENETUNREACH;
1911541Srgrimes			goto bad;
1921541Srgrimes		}
1931541Srgrimes		ip->ip_dst.s_addr = INADDR_BROADCAST;
1941541Srgrimes		dst->sin_addr = ip->ip_dst;
1951541Srgrimes		ifp = ia->ia_ifp;
1962531Swollman		ip->ip_ttl = 1;
1971541Srgrimes		isbroadcast = 1;
1988546Sdg	} else if (flags & IP_ROUTETOIF) {
1992531Swollman		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
2001541Srgrimes		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
20114622Sfenner			ipstat.ips_noroute++;
20214622Sfenner			error = ENETUNREACH;
20314622Sfenner			goto bad;
20414622Sfenner		}
20514622Sfenner		ifp = ia->ia_ifp;
20614622Sfenner		ip->ip_ttl = 1;
20714622Sfenner		isbroadcast = in_broadcast(dst->sin_addr, ifp);
20814622Sfenner	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
20914622Sfenner	    imo != NULL && imo->imo_multicast_ifp != NULL) {
21014622Sfenner		/*
21114622Sfenner		 * Bypass the normal routing lookup for multicast
2121541Srgrimes		 * packets if the interface is specified.
2131541Srgrimes		 */
21414622Sfenner		ifp = imo->imo_multicast_ifp;
2151541Srgrimes		IFP_TO_IA(ifp, ia);
2161541Srgrimes		isbroadcast = 0;	/* fool gcc */
2178090Spst	} else {
2181541Srgrimes		/*
2191541Srgrimes		 * We want to do any cloning requested by the link layer,
2202531Swollman		 * as this is probably required in all cases for correct
22114622Sfenner		 * operation (as it is for ARP).
22214622Sfenner		 */
22314622Sfenner		if (ro->ro_rt == NULL)
22414622Sfenner			rtalloc_ign(ro, 0);
22514622Sfenner		if (ro->ro_rt == NULL) {
2264028Spst			ipstat.ips_noroute++;
22714622Sfenner			error = EHOSTUNREACH;
22814622Sfenner			goto bad;
2294028Spst		}
23014622Sfenner		ia = ifatoia(ro->ro_rt->rt_ifa);
2314028Spst		ifp = ro->ro_rt->rt_ifp;
23214622Sfenner		ro->ro_rt->rt_rmx.rmx_pksent++;
23314622Sfenner		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
2342531Swollman			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
2352531Swollman		if (ro->ro_rt->rt_flags & RTF_HOST)
2362531Swollman			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
2372531Swollman		else
23814622Sfenner			isbroadcast = in_broadcast(dst->sin_addr, ifp);
2392531Swollman	}
24014622Sfenner	/*
2412531Swollman	 * Calculate MTU.  If we have a route that is up, use that,
24214622Sfenner	 * otherwise use the interface's MTU.
24314622Sfenner	 */
24414622Sfenner	if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) {
24514622Sfenner		/*
24614622Sfenner		 * This case can happen if the user changed the MTU
24714622Sfenner		 * of an interface after enabling IP on it.  Because
24814622Sfenner		 * most netifs don't keep track of routes pointing to
24914622Sfenner		 * them, there is no way for one to update all its
2502531Swollman		 * routes when the MTU is changed.
25114622Sfenner		 */
25214622Sfenner		if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
25314622Sfenner			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
25414622Sfenner		mtu = ro->ro_rt->rt_rmx.rmx_mtu;
25514622Sfenner	} else {
25614622Sfenner		mtu = ifp->if_mtu;
25714622Sfenner	}
25814622Sfenner	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
25914622Sfenner		struct in_multi *inm;
26014622Sfenner
26114622Sfenner		m->m_flags |= M_MCAST;
26214622Sfenner		/*
26314622Sfenner		 * IP destination address is multicast.  Make sure "dst"
26414622Sfenner		 * still points to the address in "ro".  (It may have been
26514622Sfenner		 * changed to point to a gateway address, above.)
26614622Sfenner		 */
26714622Sfenner		dst = (struct sockaddr_in *)&ro->ro_dst;
26814622Sfenner		/*
26914622Sfenner		 * See if the caller provided any multicast options
27014622Sfenner		 */
2712531Swollman		if (imo != NULL) {
2722531Swollman			ip->ip_ttl = imo->imo_multicast_ttl;
2731541Srgrimes			if (imo->imo_multicast_vif != -1)
2741541Srgrimes				ip->ip_src.s_addr =
2751541Srgrimes				    ip_mcast_src ?
2769209Swollman				    ip_mcast_src(imo->imo_multicast_vif) :
2771541Srgrimes				    INADDR_ANY;
2781541Srgrimes		} else
27914622Sfenner			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
28014622Sfenner		/*
2819209Swollman		 * Confirm that the outgoing interface supports multicast.
28214622Sfenner		 */
28314622Sfenner		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
28414622Sfenner			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
28514622Sfenner				ipstat.ips_noroute++;
2869209Swollman				error = ENETUNREACH;
28714622Sfenner				goto bad;
28814622Sfenner			}
28914622Sfenner		}
29014622Sfenner		/*
2911541Srgrimes		 * If source address not specified yet, use address
2921541Srgrimes		 * of outgoing interface.
2938090Spst		 */
2941541Srgrimes		if (ip->ip_src.s_addr == INADDR_ANY) {
2951541Srgrimes			/* Interface may have no addresses. */
29614622Sfenner			if (ia != NULL)
2971541Srgrimes				ip->ip_src = IA_SIN(ia)->sin_addr;
2981541Srgrimes		}
2991541Srgrimes
3001541Srgrimes		IN_MULTI_LOCK();
3011541Srgrimes		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
3021541Srgrimes		if (inm != NULL &&
3031541Srgrimes		   (imo == NULL || imo->imo_multicast_loop)) {
3041541Srgrimes			IN_MULTI_UNLOCK();
3051541Srgrimes			/*
3061541Srgrimes			 * If we belong to the destination multicast group
3071541Srgrimes			 * on the outgoing interface, and the caller did not
3081541Srgrimes			 * forbid loopback, loop back a copy.
3091541Srgrimes			 */
3101541Srgrimes			ip_mloopback(ifp, m, dst, hlen);
31114622Sfenner		}
3121541Srgrimes		else {
3131541Srgrimes			IN_MULTI_UNLOCK();
3141541Srgrimes			/*
3151541Srgrimes			 * If we are acting as a multicast router, perform
3161541Srgrimes			 * multicast forwarding as if the packet had just
3171541Srgrimes			 * arrived on the interface to which we are about
3181541Srgrimes			 * to send.  The multicast forwarding function
3191541Srgrimes			 * recursively calls this function, using the
3202531Swollman			 * IP_FORWARDING flag to prevent infinite recursion.
32114622Sfenner			 *
32214622Sfenner			 * Multicasts that are looped back by ip_mloopback(),
32314622Sfenner			 * above, will be forwarded by the ip_input() routine,
32414622Sfenner			 * if necessary.
3252531Swollman			 */
32614622Sfenner			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
3271541Srgrimes				/*
3281541Srgrimes				 * If rsvp daemon is not running, do not
3291541Srgrimes				 * set ip_moptions. This ensures that the packet
3301541Srgrimes				 * is multicast and not just sent down one link
3311541Srgrimes				 * as prescribed by rsvpd.
3321541Srgrimes				 */
3331541Srgrimes				if (!rsvp_on)
3341541Srgrimes					imo = NULL;
3351541Srgrimes				if (ip_mforward &&
3361541Srgrimes				    ip_mforward(ip, ifp, m, imo) != 0) {
3371541Srgrimes					m_freem(m);
3381541Srgrimes					goto done;
3391541Srgrimes				}
3401541Srgrimes			}
3419209Swollman		}
3421541Srgrimes
34314622Sfenner		/*
34414622Sfenner		 * Multicasts with a time-to-live of zero may be looped-
3451541Srgrimes		 * back, above, but must not be transmitted on a network.
34614622Sfenner		 * Also, multicasts addressed to the loopback interface
34714622Sfenner		 * are not sent -- the above call to ip_mloopback() will
34814622Sfenner		 * loop back a copy if this host actually belongs to the
34914622Sfenner		 * destination group on the loopback interface.
3502531Swollman		 */
3512531Swollman		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
35214622Sfenner			m_freem(m);
3531541Srgrimes			goto done;
3541541Srgrimes		}
3551541Srgrimes
3561541Srgrimes		goto sendit;
3571541Srgrimes	}
3581541Srgrimes
3591541Srgrimes	/*
3601541Srgrimes	 * If the source address is not specified yet, use the address
3611541Srgrimes	 * of the outoing interface.
36214622Sfenner	 */
36314622Sfenner	if (ip->ip_src.s_addr == INADDR_ANY) {
36414622Sfenner		/* Interface may have no addresses. */
36514622Sfenner		if (ia != NULL) {
36614622Sfenner			ip->ip_src = IA_SIN(ia)->sin_addr;
3671541Srgrimes		}
3681541Srgrimes	}
3691541Srgrimes
3701541Srgrimes	/*
3711541Srgrimes	 * Verify that we have any chance at all of being able to queue the
3721541Srgrimes	 * packet or packet fragments, unless ALTQ is enabled on the given
3731541Srgrimes	 * interface in which case packetdrop should be done by queueing.
3749209Swollman	 */
3751541Srgrimes#ifdef ALTQ
3761541Srgrimes	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
3771541Srgrimes	    ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
3781541Srgrimes	    ifp->if_snd.ifq_maxlen))
3791541Srgrimes#else
3809209Swollman	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
3811541Srgrimes	    ifp->if_snd.ifq_maxlen)
3821541Srgrimes#endif /* ALTQ */
3831541Srgrimes	{
3841541Srgrimes		error = ENOBUFS;
3851541Srgrimes		ipstat.ips_odropped++;
3861541Srgrimes		ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
3871541Srgrimes		goto bad;
3881541Srgrimes	}
3891541Srgrimes
3901541Srgrimes	/*
39114622Sfenner	 * Look for broadcast address and
39214622Sfenner	 * verify user is allowed to send
3931541Srgrimes	 * such a packet.
3941541Srgrimes	 */
3951541Srgrimes	if (isbroadcast) {
3961541Srgrimes		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
3971541Srgrimes			error = EADDRNOTAVAIL;
3981541Srgrimes			goto bad;
3991541Srgrimes		}
4001541Srgrimes		if ((flags & IP_ALLOWBROADCAST) == 0) {
4012531Swollman			error = EACCES;
4022531Swollman			goto bad;
4032531Swollman		}
4042531Swollman		/* don't allow broadcast messages to be fragmented */
4052531Swollman		if (ip->ip_len > mtu) {
4062531Swollman			error = EMSGSIZE;
4072531Swollman			goto bad;
4082531Swollman		}
4092531Swollman		m->m_flags |= M_BCAST;
4102531Swollman	} else {
41114622Sfenner		m->m_flags &= ~M_BCAST;
41214622Sfenner	}
41314622Sfenner
41414622Sfennersendit:
4152531Swollman#ifdef IPSEC
41614622Sfenner	switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
41714622Sfenner	case 1:
4182531Swollman		goto bad;
4199209Swollman	case -1:
4202531Swollman		goto done;
4212531Swollman	case 0:
4222531Swollman	default:
4232531Swollman		break;	/* Continue with packet processing. */
4242531Swollman	}
4251541Srgrimes	/* Update variables that are affected by ipsec4_output(). */
42614622Sfenner	ip = mtod(m, struct ip *);
4272531Swollman	hlen = ip->ip_hl << 2;
4282531Swollman#endif /* IPSEC */
42914622Sfenner
4301541Srgrimes	/* Jump over all PFIL processing if hooks are not active. */
4312531Swollman	if (!PFIL_HOOKED(&inet_pfil_hook))
4322531Swollman		goto passout;
4332531Swollman
4342531Swollman	/* Run through list of hooks for output packets. */
4351541Srgrimes	odst.s_addr = ip->ip_dst.s_addr;
4362531Swollman	error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
4372531Swollman	if (error != 0 || m == NULL)
4382531Swollman		goto done;
4392531Swollman
4402531Swollman	ip = mtod(m, struct ip *);
4412531Swollman
4422531Swollman	/* See if destination IP address was changed by packet filter. */
4431541Srgrimes	if (odst.s_addr != ip->ip_dst.s_addr) {
4442531Swollman		m->m_flags |= M_SKIP_FIREWALL;
4452531Swollman		/* If destination is now ourself drop to ip_input(). */
4468090Spst		if (in_localip(ip->ip_dst)) {
4471541Srgrimes			m->m_flags |= M_FASTFWD_OURS;
4482531Swollman			if (m->m_pkthdr.rcvif == NULL)
4492531Swollman				m->m_pkthdr.rcvif = loif;
4502531Swollman			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
4512531Swollman				m->m_pkthdr.csum_flags |=
4522531Swollman				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
4532531Swollman				m->m_pkthdr.csum_data = 0xffff;
4542531Swollman			}
4552531Swollman			m->m_pkthdr.csum_flags |=
4562531Swollman			    CSUM_IP_CHECKED | CSUM_IP_VALID;
4571541Srgrimes
4582531Swollman			error = netisr_queue(NETISR_IP, m);
4592531Swollman			goto done;
4602531Swollman		} else
4612531Swollman			goto again;	/* Redo the routing table lookup. */
4622531Swollman	}
4632531Swollman
4642531Swollman#ifdef IPFIREWALL_FORWARD
4652531Swollman	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
46614622Sfenner	if (m->m_flags & M_FASTFWD_OURS) {
4671541Srgrimes		if (m->m_pkthdr.rcvif == NULL)
4682531Swollman			m->m_pkthdr.rcvif = loif;
4692531Swollman		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
4709209Swollman			m->m_pkthdr.csum_flags |=
4712531Swollman			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
4722531Swollman			m->m_pkthdr.csum_data = 0xffff;
4732531Swollman		}
4742531Swollman		m->m_pkthdr.csum_flags |=
4752531Swollman			    CSUM_IP_CHECKED | CSUM_IP_VALID;
4761541Srgrimes
47714622Sfenner		error = netisr_queue(NETISR_IP, m);
4782531Swollman		goto done;
4792531Swollman	}
4802531Swollman	/* Or forward to some other address? */
4811541Srgrimes	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
482	if (fwd_tag) {
483		dst = (struct sockaddr_in *)&ro->ro_dst;
484		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
485		m->m_flags |= M_SKIP_FIREWALL;
486		m_tag_delete(m, fwd_tag);
487		goto again;
488	}
489#endif /* IPFIREWALL_FORWARD */
490
491passout:
492	/* 127/8 must not appear on wire - RFC1122. */
493	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
494	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
495		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
496			ipstat.ips_badaddr++;
497			error = EADDRNOTAVAIL;
498			goto bad;
499		}
500	}
501
502	m->m_pkthdr.csum_flags |= CSUM_IP;
503	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
504	if (sw_csum & CSUM_DELAY_DATA) {
505		in_delayed_cksum(m);
506		sw_csum &= ~CSUM_DELAY_DATA;
507	}
508	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
509
510	/*
511	 * If small enough for interface, or the interface will take
512	 * care of the fragmentation for us, we can just send directly.
513	 */
514	if (ip->ip_len <= mtu ||
515	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
516	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
517		ip->ip_len = htons(ip->ip_len);
518		ip->ip_off = htons(ip->ip_off);
519		ip->ip_sum = 0;
520		if (sw_csum & CSUM_DELAY_IP)
521			ip->ip_sum = in_cksum(m, hlen);
522
523		/*
524		 * Record statistics for this interface address.
525		 * With CSUM_TSO the byte/packet count will be slightly
526		 * incorrect because we count the IP+TCP headers only
527		 * once instead of for every generated packet.
528		 */
529		if (!(flags & IP_FORWARDING) && ia) {
530			if (m->m_pkthdr.csum_flags & CSUM_TSO)
531				ia->ia_ifa.if_opackets +=
532				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
533			else
534				ia->ia_ifa.if_opackets++;
535			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
536		}
537#ifdef MBUF_STRESS_TEST
538		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
539			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
540#endif
541		/*
542		 * Reset layer specific mbuf flags
543		 * to avoid confusing lower layers.
544		 */
545		m->m_flags &= ~(M_PROTOFLAGS);
546
547		error = (*ifp->if_output)(ifp, m,
548				(struct sockaddr *)dst, ro->ro_rt);
549		goto done;
550	}
551
552	/* Balk when DF bit is set or the interface didn't support TSO. */
553	if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
554		error = EMSGSIZE;
555		ipstat.ips_cantfrag++;
556		goto bad;
557	}
558
559	/*
560	 * Too large for interface; fragment if possible. If successful,
561	 * on return, m will point to a list of packets to be sent.
562	 */
563	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
564	if (error)
565		goto bad;
566	for (; m; m = m0) {
567		m0 = m->m_nextpkt;
568		m->m_nextpkt = 0;
569		if (error == 0) {
570			/* Record statistics for this interface address. */
571			if (ia != NULL) {
572				ia->ia_ifa.if_opackets++;
573				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
574			}
575			/*
576			 * Reset layer specific mbuf flags
577			 * to avoid confusing upper layers.
578			 */
579			m->m_flags &= ~(M_PROTOFLAGS);
580
581			error = (*ifp->if_output)(ifp, m,
582			    (struct sockaddr *)dst, ro->ro_rt);
583		} else
584			m_freem(m);
585	}
586
587	if (error == 0)
588		ipstat.ips_fragmented++;
589
590done:
591	if (ro == &iproute && ro->ro_rt) {
592		RTFREE(ro->ro_rt);
593	}
594	return (error);
595bad:
596	m_freem(m);
597	goto done;
598}
599
600/*
601 * Create a chain of fragments which fit the given mtu. m_frag points to the
602 * mbuf to be fragmented; on return it points to the chain with the fragments.
603 * Return 0 if no error. If error, m_frag may contain a partially built
604 * chain of fragments that should be freed by the caller.
605 *
606 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
607 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
608 */
609int
610ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
611    u_long if_hwassist_flags, int sw_csum)
612{
613	int error = 0;
614	int hlen = ip->ip_hl << 2;
615	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
616	int off;
617	struct mbuf *m0 = *m_frag;	/* the original packet		*/
618	int firstlen;
619	struct mbuf **mnext;
620	int nfrags;
621
622	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
623		ipstat.ips_cantfrag++;
624		return EMSGSIZE;
625	}
626
627	/*
628	 * Must be able to put at least 8 bytes per fragment.
629	 */
630	if (len < 8)
631		return EMSGSIZE;
632
633	/*
634	 * If the interface will not calculate checksums on
635	 * fragmented packets, then do it here.
636	 */
637	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
638	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
639		in_delayed_cksum(m0);
640		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
641	}
642
643	if (len > PAGE_SIZE) {
644		/*
645		 * Fragment large datagrams such that each segment
646		 * contains a multiple of PAGE_SIZE amount of data,
647		 * plus headers. This enables a receiver to perform
648		 * page-flipping zero-copy optimizations.
649		 *
650		 * XXX When does this help given that sender and receiver
651		 * could have different page sizes, and also mtu could
652		 * be less than the receiver's page size ?
653		 */
654		int newlen;
655		struct mbuf *m;
656
657		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
658			off += m->m_len;
659
660		/*
661		 * firstlen (off - hlen) must be aligned on an
662		 * 8-byte boundary
663		 */
664		if (off < hlen)
665			goto smart_frag_failure;
666		off = ((off - hlen) & ~7) + hlen;
667		newlen = (~PAGE_MASK) & mtu;
668		if ((newlen + sizeof (struct ip)) > mtu) {
669			/* we failed, go back the default */
670smart_frag_failure:
671			newlen = len;
672			off = hlen + len;
673		}
674		len = newlen;
675
676	} else {
677		off = hlen + len;
678	}
679
680	firstlen = off - hlen;
681	mnext = &m0->m_nextpkt;		/* pointer to next packet */
682
683	/*
684	 * Loop through length of segment after first fragment,
685	 * make new header and copy data of each part and link onto chain.
686	 * Here, m0 is the original packet, m is the fragment being created.
687	 * The fragments are linked off the m_nextpkt of the original
688	 * packet, which after processing serves as the first fragment.
689	 */
690	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
691		struct ip *mhip;	/* ip header on the fragment */
692		struct mbuf *m;
693		int mhlen = sizeof (struct ip);
694
695		MGETHDR(m, M_DONTWAIT, MT_DATA);
696		if (m == NULL) {
697			error = ENOBUFS;
698			ipstat.ips_odropped++;
699			goto done;
700		}
701		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
702		/*
703		 * In the first mbuf, leave room for the link header, then
704		 * copy the original IP header including options. The payload
705		 * goes into an additional mbuf chain returned by m_copy().
706		 */
707		m->m_data += max_linkhdr;
708		mhip = mtod(m, struct ip *);
709		*mhip = *ip;
710		if (hlen > sizeof (struct ip)) {
711			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
712			mhip->ip_v = IPVERSION;
713			mhip->ip_hl = mhlen >> 2;
714		}
715		m->m_len = mhlen;
716		/* XXX do we need to add ip->ip_off below ? */
717		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
718		if (off + len >= ip->ip_len) {	/* last fragment */
719			len = ip->ip_len - off;
720			m->m_flags |= M_LASTFRAG;
721		} else
722			mhip->ip_off |= IP_MF;
723		mhip->ip_len = htons((u_short)(len + mhlen));
724		m->m_next = m_copy(m0, off, len);
725		if (m->m_next == NULL) {	/* copy failed */
726			m_free(m);
727			error = ENOBUFS;	/* ??? */
728			ipstat.ips_odropped++;
729			goto done;
730		}
731		m->m_pkthdr.len = mhlen + len;
732		m->m_pkthdr.rcvif = NULL;
733#ifdef MAC
734		mac_create_fragment(m0, m);
735#endif
736		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
737		mhip->ip_off = htons(mhip->ip_off);
738		mhip->ip_sum = 0;
739		if (sw_csum & CSUM_DELAY_IP)
740			mhip->ip_sum = in_cksum(m, mhlen);
741		*mnext = m;
742		mnext = &m->m_nextpkt;
743	}
744	ipstat.ips_ofragments += nfrags;
745
746	/* set first marker for fragment chain */
747	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
748	m0->m_pkthdr.csum_data = nfrags;
749
750	/*
751	 * Update first fragment by trimming what's been copied out
752	 * and updating header.
753	 */
754	m_adj(m0, hlen + firstlen - ip->ip_len);
755	m0->m_pkthdr.len = hlen + firstlen;
756	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
757	ip->ip_off |= IP_MF;
758	ip->ip_off = htons(ip->ip_off);
759	ip->ip_sum = 0;
760	if (sw_csum & CSUM_DELAY_IP)
761		ip->ip_sum = in_cksum(m0, hlen);
762
763done:
764	*m_frag = m0;
765	return error;
766}
767
768void
769in_delayed_cksum(struct mbuf *m)
770{
771	struct ip *ip;
772	u_short csum, offset;
773
774	ip = mtod(m, struct ip *);
775	offset = ip->ip_hl << 2 ;
776	csum = in_cksum_skip(m, ip->ip_len, offset);
777	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
778		csum = 0xffff;
779	offset += m->m_pkthdr.csum_data;	/* checksum offset */
780
781	if (offset + sizeof(u_short) > m->m_len) {
782		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
783		    m->m_len, offset, ip->ip_p);
784		/*
785		 * XXX
786		 * this shouldn't happen, but if it does, the
787		 * correct behavior may be to insert the checksum
788		 * in the appropriate next mbuf in the chain.
789		 */
790		return;
791	}
792	*(u_short *)(m->m_data + offset) = csum;
793}
794
795/*
796 * IP socket option processing.
797 */
798int
799ip_ctloutput(struct socket *so, struct sockopt *sopt)
800{
801	struct	inpcb *inp = sotoinpcb(so);
802	int	error, optval;
803
804	error = optval = 0;
805	if (sopt->sopt_level != IPPROTO_IP) {
806		return (EINVAL);
807	}
808
809	switch (sopt->sopt_dir) {
810	case SOPT_SET:
811		switch (sopt->sopt_name) {
812		case IP_OPTIONS:
813#ifdef notyet
814		case IP_RETOPTS:
815#endif
816		{
817			struct mbuf *m;
818			if (sopt->sopt_valsize > MLEN) {
819				error = EMSGSIZE;
820				break;
821			}
822			MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
823			if (m == NULL) {
824				error = ENOBUFS;
825				break;
826			}
827			m->m_len = sopt->sopt_valsize;
828			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
829					    m->m_len);
830			if (error) {
831				m_free(m);
832				break;
833			}
834			INP_LOCK(inp);
835			error = ip_pcbopts(inp, sopt->sopt_name, m);
836			INP_UNLOCK(inp);
837			return (error);
838		}
839
840		case IP_TOS:
841		case IP_TTL:
842		case IP_MINTTL:
843		case IP_RECVOPTS:
844		case IP_RECVRETOPTS:
845		case IP_RECVDSTADDR:
846		case IP_RECVTTL:
847		case IP_RECVIF:
848		case IP_FAITH:
849		case IP_ONESBCAST:
850		case IP_DONTFRAG:
851			error = sooptcopyin(sopt, &optval, sizeof optval,
852					    sizeof optval);
853			if (error)
854				break;
855
856			switch (sopt->sopt_name) {
857			case IP_TOS:
858				inp->inp_ip_tos = optval;
859				break;
860
861			case IP_TTL:
862				inp->inp_ip_ttl = optval;
863				break;
864
865			case IP_MINTTL:
866				if (optval > 0 && optval <= MAXTTL)
867					inp->inp_ip_minttl = optval;
868				else
869					error = EINVAL;
870				break;
871
872#define	OPTSET(bit) do {						\
873	INP_LOCK(inp);							\
874	if (optval)							\
875		inp->inp_flags |= bit;					\
876	else								\
877		inp->inp_flags &= ~bit;					\
878	INP_UNLOCK(inp);						\
879} while (0)
880
881			case IP_RECVOPTS:
882				OPTSET(INP_RECVOPTS);
883				break;
884
885			case IP_RECVRETOPTS:
886				OPTSET(INP_RECVRETOPTS);
887				break;
888
889			case IP_RECVDSTADDR:
890				OPTSET(INP_RECVDSTADDR);
891				break;
892
893			case IP_RECVTTL:
894				OPTSET(INP_RECVTTL);
895				break;
896
897			case IP_RECVIF:
898				OPTSET(INP_RECVIF);
899				break;
900
901			case IP_FAITH:
902				OPTSET(INP_FAITH);
903				break;
904
905			case IP_ONESBCAST:
906				OPTSET(INP_ONESBCAST);
907				break;
908			case IP_DONTFRAG:
909				OPTSET(INP_DONTFRAG);
910				break;
911			}
912			break;
913#undef OPTSET
914
915		/*
916		 * Multicast socket options are processed by the in_mcast
917		 * module.
918		 */
919		case IP_MULTICAST_IF:
920		case IP_MULTICAST_VIF:
921		case IP_MULTICAST_TTL:
922		case IP_MULTICAST_LOOP:
923		case IP_ADD_MEMBERSHIP:
924		case IP_DROP_MEMBERSHIP:
925		case IP_ADD_SOURCE_MEMBERSHIP:
926		case IP_DROP_SOURCE_MEMBERSHIP:
927		case IP_BLOCK_SOURCE:
928		case IP_UNBLOCK_SOURCE:
929		case IP_MSFILTER:
930		case MCAST_JOIN_GROUP:
931		case MCAST_LEAVE_GROUP:
932		case MCAST_JOIN_SOURCE_GROUP:
933		case MCAST_LEAVE_SOURCE_GROUP:
934		case MCAST_BLOCK_SOURCE:
935		case MCAST_UNBLOCK_SOURCE:
936			error = inp_setmoptions(inp, sopt);
937			break;
938
939		case IP_PORTRANGE:
940			error = sooptcopyin(sopt, &optval, sizeof optval,
941					    sizeof optval);
942			if (error)
943				break;
944
945			INP_LOCK(inp);
946			switch (optval) {
947			case IP_PORTRANGE_DEFAULT:
948				inp->inp_flags &= ~(INP_LOWPORT);
949				inp->inp_flags &= ~(INP_HIGHPORT);
950				break;
951
952			case IP_PORTRANGE_HIGH:
953				inp->inp_flags &= ~(INP_LOWPORT);
954				inp->inp_flags |= INP_HIGHPORT;
955				break;
956
957			case IP_PORTRANGE_LOW:
958				inp->inp_flags &= ~(INP_HIGHPORT);
959				inp->inp_flags |= INP_LOWPORT;
960				break;
961
962			default:
963				error = EINVAL;
964				break;
965			}
966			INP_UNLOCK(inp);
967			break;
968
969#ifdef IPSEC
970		case IP_IPSEC_POLICY:
971		{
972			caddr_t req;
973			size_t len = 0;
974			int priv;
975			struct mbuf *m;
976			int optname;
977
978			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
979				break;
980			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
981				break;
982			if (sopt->sopt_td != NULL) {
983				/*
984				 * XXXRW: Would be more desirable to do this
985				 * one layer down so that we only exercise
986				 * privilege if it is needed.
987				 */
988				error = priv_check(sopt->sopt_td,
989				    PRIV_NETINET_IPSEC);
990				if (error)
991					priv = 0;
992				else
993					priv = 1;
994			} else
995				priv = 1;
996			req = mtod(m, caddr_t);
997			len = m->m_len;
998			optname = sopt->sopt_name;
999			error = ipsec4_set_policy(inp, optname, req, len, priv);
1000			m_freem(m);
1001			break;
1002		}
1003#endif /* IPSEC */
1004
1005		default:
1006			error = ENOPROTOOPT;
1007			break;
1008		}
1009		break;
1010
1011	case SOPT_GET:
1012		switch (sopt->sopt_name) {
1013		case IP_OPTIONS:
1014		case IP_RETOPTS:
1015			if (inp->inp_options)
1016				error = sooptcopyout(sopt,
1017						     mtod(inp->inp_options,
1018							  char *),
1019						     inp->inp_options->m_len);
1020			else
1021				sopt->sopt_valsize = 0;
1022			break;
1023
1024		case IP_TOS:
1025		case IP_TTL:
1026		case IP_MINTTL:
1027		case IP_RECVOPTS:
1028		case IP_RECVRETOPTS:
1029		case IP_RECVDSTADDR:
1030		case IP_RECVTTL:
1031		case IP_RECVIF:
1032		case IP_PORTRANGE:
1033		case IP_FAITH:
1034		case IP_ONESBCAST:
1035		case IP_DONTFRAG:
1036			switch (sopt->sopt_name) {
1037
1038			case IP_TOS:
1039				optval = inp->inp_ip_tos;
1040				break;
1041
1042			case IP_TTL:
1043				optval = inp->inp_ip_ttl;
1044				break;
1045
1046			case IP_MINTTL:
1047				optval = inp->inp_ip_minttl;
1048				break;
1049
1050#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1051
1052			case IP_RECVOPTS:
1053				optval = OPTBIT(INP_RECVOPTS);
1054				break;
1055
1056			case IP_RECVRETOPTS:
1057				optval = OPTBIT(INP_RECVRETOPTS);
1058				break;
1059
1060			case IP_RECVDSTADDR:
1061				optval = OPTBIT(INP_RECVDSTADDR);
1062				break;
1063
1064			case IP_RECVTTL:
1065				optval = OPTBIT(INP_RECVTTL);
1066				break;
1067
1068			case IP_RECVIF:
1069				optval = OPTBIT(INP_RECVIF);
1070				break;
1071
1072			case IP_PORTRANGE:
1073				if (inp->inp_flags & INP_HIGHPORT)
1074					optval = IP_PORTRANGE_HIGH;
1075				else if (inp->inp_flags & INP_LOWPORT)
1076					optval = IP_PORTRANGE_LOW;
1077				else
1078					optval = 0;
1079				break;
1080
1081			case IP_FAITH:
1082				optval = OPTBIT(INP_FAITH);
1083				break;
1084
1085			case IP_ONESBCAST:
1086				optval = OPTBIT(INP_ONESBCAST);
1087				break;
1088			case IP_DONTFRAG:
1089				optval = OPTBIT(INP_DONTFRAG);
1090				break;
1091			}
1092			error = sooptcopyout(sopt, &optval, sizeof optval);
1093			break;
1094
1095		/*
1096		 * Multicast socket options are processed by the in_mcast
1097		 * module.
1098		 */
1099		case IP_MULTICAST_IF:
1100		case IP_MULTICAST_VIF:
1101		case IP_MULTICAST_TTL:
1102		case IP_MULTICAST_LOOP:
1103		case IP_MSFILTER:
1104			error = inp_getmoptions(inp, sopt);
1105			break;
1106
1107#ifdef IPSEC
1108		case IP_IPSEC_POLICY:
1109		{
1110			struct mbuf *m = NULL;
1111			caddr_t req = NULL;
1112			size_t len = 0;
1113
1114			if (m != 0) {
1115				req = mtod(m, caddr_t);
1116				len = m->m_len;
1117			}
1118			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1119			if (error == 0)
1120				error = soopt_mcopyout(sopt, m); /* XXX */
1121			if (error == 0)
1122				m_freem(m);
1123			break;
1124		}
1125#endif /* IPSEC */
1126
1127		default:
1128			error = ENOPROTOOPT;
1129			break;
1130		}
1131		break;
1132	}
1133	return (error);
1134}
1135
1136/*
1137 * Routine called from ip_output() to loop back a copy of an IP multicast
1138 * packet to the input queue of a specified interface.  Note that this
1139 * calls the output routine of the loopback "driver", but with an interface
1140 * pointer that might NOT be a loopback interface -- evil, but easier than
1141 * replicating that code here.
1142 */
1143static void
1144ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
1145    int hlen)
1146{
1147	register struct ip *ip;
1148	struct mbuf *copym;
1149
1150	copym = m_copy(m, 0, M_COPYALL);
1151	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1152		copym = m_pullup(copym, hlen);
1153	if (copym != NULL) {
1154		/* If needed, compute the checksum and mark it as valid. */
1155		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1156			in_delayed_cksum(copym);
1157			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1158			copym->m_pkthdr.csum_flags |=
1159			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1160			copym->m_pkthdr.csum_data = 0xffff;
1161		}
1162		/*
1163		 * We don't bother to fragment if the IP length is greater
1164		 * than the interface's MTU.  Can this possibly matter?
1165		 */
1166		ip = mtod(copym, struct ip *);
1167		ip->ip_len = htons(ip->ip_len);
1168		ip->ip_off = htons(ip->ip_off);
1169		ip->ip_sum = 0;
1170		ip->ip_sum = in_cksum(copym, hlen);
1171		/*
1172		 * NB:
1173		 * It's not clear whether there are any lingering
1174		 * reentrancy problems in other areas which might
1175		 * be exposed by using ip_input directly (in
1176		 * particular, everything which modifies the packet
1177		 * in-place).  Yet another option is using the
1178		 * protosw directly to deliver the looped back
1179		 * packet.  For the moment, we'll err on the side
1180		 * of safety by using if_simloop().
1181		 */
1182#if 1 /* XXX */
1183		if (dst->sin_family != AF_INET) {
1184			printf("ip_mloopback: bad address family %d\n",
1185						dst->sin_family);
1186			dst->sin_family = AF_INET;
1187		}
1188#endif
1189
1190#ifdef notdef
1191		copym->m_pkthdr.rcvif = ifp;
1192		ip_input(copym);
1193#else
1194		if_simloop(ifp, copym, dst->sin_family, 0);
1195#endif
1196	}
1197}
1198