ip_output.c revision 194760
1239675Srwatson/*-
2239675Srwatson * Copyright (c) 1982, 1986, 1988, 1990, 1993
3239675Srwatson *	The Regents of the University of California.  All rights reserved.
4239675Srwatson *
5239675Srwatson * Redistribution and use in source and binary forms, with or without
6239675Srwatson * modification, are permitted provided that the following conditions
7239675Srwatson * are met:
8239675Srwatson * 1. Redistributions of source code must retain the above copyright
9239675Srwatson *    notice, this list of conditions and the following disclaimer.
10239675Srwatson * 2. Redistributions in binary form must reproduce the above copyright
11239675Srwatson *    notice, this list of conditions and the following disclaimer in the
12239675Srwatson *    documentation and/or other materials provided with the distribution.
13239675Srwatson * 4. Neither the name of the University nor the names of its contributors
14239675Srwatson *    may be used to endorse or promote products derived from this software
15239675Srwatson *    without specific prior written permission.
16239675Srwatson *
17239675Srwatson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18239675Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19239675Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20239675Srwatson * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21239675Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22239675Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23239675Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24239675Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25239675Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26239675Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27239675Srwatson * SUCH DAMAGE.
28239675Srwatson *
29239675Srwatson *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
30239675Srwatson */
31239675Srwatson
32239675Srwatson#include <sys/cdefs.h>
33239675Srwatson__FBSDID("$FreeBSD: head/sys/netinet/ip_output.c 194760 2009-06-23 20:19:09Z rwatson $");
34256744Sbrooks
35256744Sbrooks#include "opt_ipfw.h"
36239675Srwatson#include "opt_ipsec.h"
37239675Srwatson#include "opt_route.h"
38239675Srwatson#include "opt_mbuf_stress_test.h"
39239675Srwatson#include "opt_mpath.h"
40239675Srwatson#include "opt_sctp.h"
41239675Srwatson
42239675Srwatson#include <sys/param.h>
43239675Srwatson#include <sys/systm.h>
44239675Srwatson#include <sys/kernel.h>
45239675Srwatson#include <sys/malloc.h>
46239675Srwatson#include <sys/mbuf.h>
47239675Srwatson#include <sys/priv.h>
48239675Srwatson#include <sys/proc.h>
49239675Srwatson#include <sys/protosw.h>
50239675Srwatson#include <sys/socket.h>
51239675Srwatson#include <sys/socketvar.h>
52239675Srwatson#include <sys/sysctl.h>
53239675Srwatson#include <sys/ucred.h>
54239675Srwatson#include <sys/vimage.h>
55239675Srwatson
56239675Srwatson#include <net/if.h>
57239675Srwatson#include <net/netisr.h>
58239675Srwatson#include <net/pfil.h>
59239675Srwatson#include <net/route.h>
60239675Srwatson#include <net/flowtable.h>
61239675Srwatson#ifdef RADIX_MPATH
62239675Srwatson#include <net/radix_mpath.h>
63239675Srwatson#endif
64239675Srwatson#include <net/vnet.h>
65239675Srwatson
66239675Srwatson#include <netinet/in.h>
67239675Srwatson#include <netinet/in_systm.h>
68239675Srwatson#include <netinet/ip.h>
69239675Srwatson#include <netinet/in_pcb.h>
70239675Srwatson#include <netinet/in_var.h>
71239675Srwatson#include <netinet/ip_var.h>
72239675Srwatson#include <netinet/ip_options.h>
73239675Srwatson#include <netinet/vinet.h>
74239675Srwatson#ifdef SCTP
75245380Srwatson#include <netinet/sctp.h>
76239675Srwatson#include <netinet/sctp_crc32.h>
77239675Srwatson#endif
78239675Srwatson
79239675Srwatson#ifdef IPSEC
80239675Srwatson#include <netinet/ip_ipsec.h>
81239675Srwatson#include <netipsec/ipsec.h>
82239675Srwatson#endif /* IPSEC*/
83239675Srwatson
84239675Srwatson#include <machine/in_cksum.h>
85239675Srwatson
86239675Srwatson#include <security/mac/mac_framework.h>
87239675Srwatson
88239675Srwatson#define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
89239675Srwatson				x, (ntohl(a.s_addr)>>24)&0xFF,\
90239675Srwatson				  (ntohl(a.s_addr)>>16)&0xFF,\
91239675Srwatson				  (ntohl(a.s_addr)>>8)&0xFF,\
92239675Srwatson				  (ntohl(a.s_addr))&0xFF, y);
93239675Srwatson
94239675Srwatson#ifdef VIMAGE_GLOBALS
95239675Srwatsonu_short ip_id;
96239675Srwatson#endif
97239675Srwatson
98239675Srwatson#ifdef MBUF_STRESS_TEST
99239675Srwatsonint mbuf_frag_size = 0;
100239675SrwatsonSYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
101239675Srwatson	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
102239675Srwatson#endif
103239675Srwatson
104239675Srwatsonstatic void	ip_mloopback
105239675Srwatson	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
106239675Srwatson
107239675Srwatson
108239675Srwatsonextern int in_mcast_loop;
109239675Srwatsonextern	struct protosw inetsw[];
110239675Srwatson
111239675Srwatson/*
112239675Srwatson * IP output.  The packet in mbuf chain m contains a skeletal IP
113239675Srwatson * header (with len, off, ttl, proto, tos, src, dst).
114239675Srwatson * The mbuf chain containing the packet will be freed.
115239675Srwatson * The mbuf opt, if present, will not be freed.
116239675Srwatson * In the IP forwarding case, the packet will arrive with options already
117239675Srwatson * inserted, so must have a NULL opt pointer.
118239675Srwatson */
119239675Srwatsonint
120239675Srwatsonip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
121239675Srwatson    struct ip_moptions *imo, struct inpcb *inp)
122239675Srwatson{
123239675Srwatson	INIT_VNET_NET(curvnet);
124239675Srwatson	INIT_VNET_INET(curvnet);
125239675Srwatson	struct ip *ip;
126239675Srwatson	struct ifnet *ifp = NULL;	/* keep compiler happy */
127239675Srwatson	struct mbuf *m0;
128239675Srwatson	int hlen = sizeof (struct ip);
129239675Srwatson	int mtu;
130239675Srwatson	int len, error = 0;
131239675Srwatson	int nortfree = 0;
132239675Srwatson	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
133239675Srwatson	struct in_ifaddr *ia = NULL;
134239675Srwatson	int isbroadcast, sw_csum;
135239675Srwatson	struct route iproute;
136239675Srwatson	struct in_addr odst;
137239675Srwatson#ifdef IPFIREWALL_FORWARD
138239675Srwatson	struct m_tag *fwd_tag = NULL;
139239675Srwatson#endif
140239675Srwatson#ifdef IPSEC
141239675Srwatson	int no_route_but_check_spd = 0;
142239675Srwatson#endif
143239675Srwatson	M_ASSERTPKTHDR(m);
144239675Srwatson
145239675Srwatson	if (inp != NULL) {
146239675Srwatson		INP_LOCK_ASSERT(inp);
147239675Srwatson		M_SETFIB(m, inp->inp_inc.inc_fibnum);
148239675Srwatson		if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
149239675Srwatson			m->m_pkthdr.flowid = inp->inp_flowid;
150239675Srwatson			m->m_flags |= M_FLOWID;
151239675Srwatson		}
152239675Srwatson	}
153239675Srwatson
154239675Srwatson	if (ro == NULL) {
155239675Srwatson		ro = &iproute;
156239675Srwatson		bzero(ro, sizeof (*ro));
157239675Srwatson
158239675Srwatson#ifdef FLOWTABLE
159239675Srwatson		/*
160239675Srwatson		 * The flow table returns route entries valid for up to 30
161239675Srwatson		 * seconds; we rely on the remainder of ip_output() taking no
162239675Srwatson		 * longer than that long for the stability of ro_rt.  The
163239675Srwatson		 * flow ID assignment must have happened before this point.
164239675Srwatson		 */
165239675Srwatson		if (flowtable_lookup(V_ip_ft, m, ro) == 0)
166239675Srwatson			nortfree = 1;
167239675Srwatson#endif
168239675Srwatson	}
169239675Srwatson
170239675Srwatson	if (opt) {
171239675Srwatson		len = 0;
172239675Srwatson		m = ip_insertoptions(m, opt, &len);
173239675Srwatson		if (len != 0)
174239675Srwatson			hlen = len;
175239675Srwatson	}
176239675Srwatson	ip = mtod(m, struct ip *);
177239675Srwatson
178239675Srwatson	/*
179239675Srwatson	 * Fill in IP header.  If we are not allowing fragmentation,
180239675Srwatson	 * then the ip_id field is meaningless, but we don't set it
181239675Srwatson	 * to zero.  Doing so causes various problems when devices along
182239675Srwatson	 * the path (routers, load balancers, firewalls, etc.) illegally
183239675Srwatson	 * disable DF on our packet.  Note that a 16-bit counter
184239675Srwatson	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
185239675Srwatson	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
186239675Srwatson	 * for Counting NATted Hosts", Proc. IMW'02, available at
187239675Srwatson	 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
188239675Srwatson	 */
189239675Srwatson	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
190239675Srwatson		ip->ip_v = IPVERSION;
191239675Srwatson		ip->ip_hl = hlen >> 2;
192239675Srwatson		ip->ip_id = ip_newid();
193239675Srwatson		IPSTAT_INC(ips_localout);
194239675Srwatson	} else {
195239675Srwatson		hlen = ip->ip_hl << 2;
196239675Srwatson	}
197239675Srwatson
198239675Srwatson	dst = (struct sockaddr_in *)&ro->ro_dst;
199239675Srwatsonagain:
200239675Srwatson	/*
201239675Srwatson	 * If there is a cached route,
202239675Srwatson	 * check that it is to the same destination
203239675Srwatson	 * and is still up.  If not, free it and try again.
204239675Srwatson	 * The address family should also be checked in case of sharing the
205239675Srwatson	 * cache with IPv6.
206239675Srwatson	 */
207239675Srwatson	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
208239675Srwatson			  dst->sin_family != AF_INET ||
209239675Srwatson			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
210239675Srwatson		if (!nortfree)
211239675Srwatson			RTFREE(ro->ro_rt);
212239675Srwatson		ro->ro_rt = (struct rtentry *)NULL;
213239675Srwatson	}
214239675Srwatson#ifdef IPFIREWALL_FORWARD
215239675Srwatson	if (ro->ro_rt == NULL && fwd_tag == NULL) {
216239675Srwatson#else
217239675Srwatson	if (ro->ro_rt == NULL) {
218239675Srwatson#endif
219239675Srwatson		bzero(dst, sizeof(*dst));
220239675Srwatson		dst->sin_family = AF_INET;
221239675Srwatson		dst->sin_len = sizeof(*dst);
222239675Srwatson		dst->sin_addr = ip->ip_dst;
223239675Srwatson	}
224239675Srwatson	/*
225239675Srwatson	 * If routing to interface only, short circuit routing lookup.
226239675Srwatson	 * The use of an all-ones broadcast address implies this; an
227239675Srwatson	 * interface is specified by the broadcast address of an interface,
228239675Srwatson	 * or the destination address of a ptp interface.
229239675Srwatson	 */
230239675Srwatson	if (flags & IP_SENDONES) {
231239675Srwatson		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
232239675Srwatson		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
233239675Srwatson			IPSTAT_INC(ips_noroute);
234239675Srwatson			error = ENETUNREACH;
235239675Srwatson			goto bad;
236239675Srwatson		}
237239675Srwatson		ip->ip_dst.s_addr = INADDR_BROADCAST;
238239675Srwatson		dst->sin_addr = ip->ip_dst;
239239675Srwatson		ifp = ia->ia_ifp;
240239675Srwatson		ip->ip_ttl = 1;
241239675Srwatson		isbroadcast = 1;
242239675Srwatson	} else if (flags & IP_ROUTETOIF) {
243239675Srwatson		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
244239675Srwatson		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
245239675Srwatson			IPSTAT_INC(ips_noroute);
246239675Srwatson			error = ENETUNREACH;
247239675Srwatson			goto bad;
248239675Srwatson		}
249239675Srwatson		ifp = ia->ia_ifp;
250239675Srwatson		ip->ip_ttl = 1;
251239675Srwatson		isbroadcast = in_broadcast(dst->sin_addr, ifp);
252239675Srwatson	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
253239675Srwatson	    imo != NULL && imo->imo_multicast_ifp != NULL) {
254239675Srwatson		/*
255239675Srwatson		 * Bypass the normal routing lookup for multicast
256239675Srwatson		 * packets if the interface is specified.
257239675Srwatson		 */
258239675Srwatson		ifp = imo->imo_multicast_ifp;
259239675Srwatson		IFP_TO_IA(ifp, ia);
260239675Srwatson		isbroadcast = 0;	/* fool gcc */
261239675Srwatson	} else {
262239675Srwatson		/*
263256744Sbrooks		 * We want to do any cloning requested by the link layer,
264256744Sbrooks		 * as this is probably required in all cases for correct
265256744Sbrooks		 * operation (as it is for ARP).
266239675Srwatson		 */
267239675Srwatson		if (ro->ro_rt == NULL)
268239675Srwatson#ifdef RADIX_MPATH
269239675Srwatson			rtalloc_mpath_fib(ro,
270239675Srwatson			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
271239675Srwatson			    inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
272239675Srwatson#else
273239675Srwatson			in_rtalloc_ign(ro, 0,
274239675Srwatson			    inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
275239675Srwatson#endif
276239675Srwatson		if (ro->ro_rt == NULL) {
277239675Srwatson#ifdef IPSEC
278239675Srwatson			/*
279239675Srwatson			 * There is no route for this packet, but it is
280239675Srwatson			 * possible that a matching SPD entry exists.
281239675Srwatson			 */
282239675Srwatson			no_route_but_check_spd = 1;
283239675Srwatson			mtu = 0; /* Silence GCC warning. */
284239675Srwatson			goto sendit;
285239675Srwatson#endif
286239675Srwatson			IPSTAT_INC(ips_noroute);
287239675Srwatson			error = EHOSTUNREACH;
288239675Srwatson			goto bad;
289239675Srwatson		}
290239675Srwatson		ia = ifatoia(ro->ro_rt->rt_ifa);
291239675Srwatson		ifa_ref(&ia->ia_ifa);
292239675Srwatson		ifp = ro->ro_rt->rt_ifp;
293239675Srwatson		ro->ro_rt->rt_rmx.rmx_pksent++;
294239675Srwatson		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
295239675Srwatson			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
296239675Srwatson		if (ro->ro_rt->rt_flags & RTF_HOST)
297239675Srwatson			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
298239675Srwatson		else
299239675Srwatson			isbroadcast = in_broadcast(dst->sin_addr, ifp);
300239675Srwatson	}
301239675Srwatson	/*
302239675Srwatson	 * Calculate MTU.  If we have a route that is up, use that,
303239675Srwatson	 * otherwise use the interface's MTU.
304239675Srwatson	 */
305239675Srwatson	if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) {
306239675Srwatson		/*
307256744Sbrooks		 * This case can happen if the user changed the MTU
308239675Srwatson		 * of an interface after enabling IP on it.  Because
309256744Sbrooks		 * most netifs don't keep track of routes pointing to
310256744Sbrooks		 * them, there is no way for one to update all its
311256744Sbrooks		 * routes when the MTU is changed.
312256744Sbrooks		 */
313239675Srwatson		if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
314239675Srwatson			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
315239675Srwatson		mtu = ro->ro_rt->rt_rmx.rmx_mtu;
316239675Srwatson	} else {
317239675Srwatson		mtu = ifp->if_mtu;
318239675Srwatson	}
319239675Srwatson	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
320239675Srwatson		m->m_flags |= M_MCAST;
321239675Srwatson		/*
322239675Srwatson		 * IP destination address is multicast.  Make sure "dst"
323239675Srwatson		 * still points to the address in "ro".  (It may have been
324239675Srwatson		 * changed to point to a gateway address, above.)
325239675Srwatson		 */
326239675Srwatson		dst = (struct sockaddr_in *)&ro->ro_dst;
327239675Srwatson		/*
328239675Srwatson		 * See if the caller provided any multicast options
329239675Srwatson		 */
330239675Srwatson		if (imo != NULL) {
331239675Srwatson			ip->ip_ttl = imo->imo_multicast_ttl;
332239675Srwatson			if (imo->imo_multicast_vif != -1)
333239675Srwatson				ip->ip_src.s_addr =
334239675Srwatson				    ip_mcast_src ?
335239675Srwatson				    ip_mcast_src(imo->imo_multicast_vif) :
336239675Srwatson				    INADDR_ANY;
337239675Srwatson		} else
338239675Srwatson			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
339239675Srwatson		/*
340239675Srwatson		 * Confirm that the outgoing interface supports multicast.
341239675Srwatson		 */
342239675Srwatson		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
343239675Srwatson			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
344239675Srwatson				IPSTAT_INC(ips_noroute);
345239675Srwatson				error = ENETUNREACH;
346239675Srwatson				goto bad;
347239675Srwatson			}
348239675Srwatson		}
349239675Srwatson		/*
350239675Srwatson		 * If source address not specified yet, use address
351239675Srwatson		 * of outgoing interface.
352239675Srwatson		 */
353239675Srwatson		if (ip->ip_src.s_addr == INADDR_ANY) {
354239675Srwatson			/* Interface may have no addresses. */
355239675Srwatson			if (ia != NULL)
356239675Srwatson				ip->ip_src = IA_SIN(ia)->sin_addr;
357239675Srwatson		}
358239675Srwatson
359239675Srwatson		if ((imo == NULL && in_mcast_loop) ||
360239675Srwatson		    (imo && imo->imo_multicast_loop)) {
361239675Srwatson			/*
362239675Srwatson			 * Loop back multicast datagram if not expressly
363239675Srwatson			 * forbidden to do so, even if we are not a member
364239675Srwatson			 * of the group; ip_input() will filter it later,
365239675Srwatson			 * thus deferring a hash lookup and mutex acquisition
366239675Srwatson			 * at the expense of a cheap copy using m_copym().
367239675Srwatson			 */
368239675Srwatson			ip_mloopback(ifp, m, dst, hlen);
369239675Srwatson		} else {
370239675Srwatson			/*
371239675Srwatson			 * If we are acting as a multicast router, perform
372239675Srwatson			 * multicast forwarding as if the packet had just
373239675Srwatson			 * arrived on the interface to which we are about
374239675Srwatson			 * to send.  The multicast forwarding function
375239675Srwatson			 * recursively calls this function, using the
376239675Srwatson			 * IP_FORWARDING flag to prevent infinite recursion.
377239675Srwatson			 *
378239675Srwatson			 * Multicasts that are looped back by ip_mloopback(),
379239675Srwatson			 * above, will be forwarded by the ip_input() routine,
380239675Srwatson			 * if necessary.
381239675Srwatson			 */
382239675Srwatson			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
383239675Srwatson				/*
384239675Srwatson				 * If rsvp daemon is not running, do not
385239675Srwatson				 * set ip_moptions. This ensures that the packet
386239675Srwatson				 * is multicast and not just sent down one link
387239675Srwatson				 * as prescribed by rsvpd.
388239675Srwatson				 */
389239675Srwatson				if (!V_rsvp_on)
390239675Srwatson					imo = NULL;
391239675Srwatson				if (ip_mforward &&
392239675Srwatson				    ip_mforward(ip, ifp, m, imo) != 0) {
393239675Srwatson					m_freem(m);
394239675Srwatson					goto done;
395239675Srwatson				}
396239675Srwatson			}
397239675Srwatson		}
398239675Srwatson
399239675Srwatson		/*
400239675Srwatson		 * Multicasts with a time-to-live of zero may be looped-
401239675Srwatson		 * back, above, but must not be transmitted on a network.
402239675Srwatson		 * Also, multicasts addressed to the loopback interface
403239675Srwatson		 * are not sent -- the above call to ip_mloopback() will
404239675Srwatson		 * loop back a copy. ip_input() will drop the copy if
405239675Srwatson		 * this host does not belong to the destination group on
406239675Srwatson		 * the loopback interface.
407239675Srwatson		 */
408239675Srwatson		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
409256744Sbrooks			m_freem(m);
410256744Sbrooks			goto done;
411256744Sbrooks		}
412256744Sbrooks
413239675Srwatson		goto sendit;
414	}
415
416	/*
417	 * If the source address is not specified yet, use the address
418	 * of the outoing interface.
419	 */
420	if (ip->ip_src.s_addr == INADDR_ANY) {
421		/* Interface may have no addresses. */
422		if (ia != NULL) {
423			ip->ip_src = IA_SIN(ia)->sin_addr;
424		}
425	}
426
427	/*
428	 * Verify that we have any chance at all of being able to queue the
429	 * packet or packet fragments, unless ALTQ is enabled on the given
430	 * interface in which case packetdrop should be done by queueing.
431	 */
432#ifdef ALTQ
433	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
434	    ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
435	    ifp->if_snd.ifq_maxlen))
436#else
437	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
438	    ifp->if_snd.ifq_maxlen)
439#endif /* ALTQ */
440	{
441		error = ENOBUFS;
442		IPSTAT_INC(ips_odropped);
443		ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
444		goto bad;
445	}
446
447	/*
448	 * Look for broadcast address and
449	 * verify user is allowed to send
450	 * such a packet.
451	 */
452	if (isbroadcast) {
453		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
454			error = EADDRNOTAVAIL;
455			goto bad;
456		}
457		if ((flags & IP_ALLOWBROADCAST) == 0) {
458			error = EACCES;
459			goto bad;
460		}
461		/* don't allow broadcast messages to be fragmented */
462		if (ip->ip_len > mtu) {
463			error = EMSGSIZE;
464			goto bad;
465		}
466		m->m_flags |= M_BCAST;
467	} else {
468		m->m_flags &= ~M_BCAST;
469	}
470
471sendit:
472#ifdef IPSEC
473	switch(ip_ipsec_output(&m, inp, &flags, &error, &ifp)) {
474	case 1:
475		goto bad;
476	case -1:
477		goto done;
478	case 0:
479	default:
480		break;	/* Continue with packet processing. */
481	}
482	/*
483	 * Check if there was a route for this packet; return error if not.
484	 */
485	if (no_route_but_check_spd) {
486		IPSTAT_INC(ips_noroute);
487		error = EHOSTUNREACH;
488		goto bad;
489	}
490	/* Update variables that are affected by ipsec4_output(). */
491	ip = mtod(m, struct ip *);
492	hlen = ip->ip_hl << 2;
493#endif /* IPSEC */
494
495	/* Jump over all PFIL processing if hooks are not active. */
496	if (!PFIL_HOOKED(&inet_pfil_hook))
497		goto passout;
498
499	/* Run through list of hooks for output packets. */
500	odst.s_addr = ip->ip_dst.s_addr;
501	error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
502	if (error != 0 || m == NULL)
503		goto done;
504
505	ip = mtod(m, struct ip *);
506
507	/* See if destination IP address was changed by packet filter. */
508	if (odst.s_addr != ip->ip_dst.s_addr) {
509		m->m_flags |= M_SKIP_FIREWALL;
510		/* If destination is now ourself drop to ip_input(). */
511		if (in_localip(ip->ip_dst)) {
512			m->m_flags |= M_FASTFWD_OURS;
513			if (m->m_pkthdr.rcvif == NULL)
514				m->m_pkthdr.rcvif = V_loif;
515			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
516				m->m_pkthdr.csum_flags |=
517				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
518				m->m_pkthdr.csum_data = 0xffff;
519			}
520			m->m_pkthdr.csum_flags |=
521			    CSUM_IP_CHECKED | CSUM_IP_VALID;
522#ifdef SCTP
523			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
524				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
525#endif
526			error = netisr_queue(NETISR_IP, m);
527			goto done;
528		} else
529			goto again;	/* Redo the routing table lookup. */
530	}
531
532#ifdef IPFIREWALL_FORWARD
533	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
534	if (m->m_flags & M_FASTFWD_OURS) {
535		if (m->m_pkthdr.rcvif == NULL)
536			m->m_pkthdr.rcvif = V_loif;
537		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
538			m->m_pkthdr.csum_flags |=
539			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
540			m->m_pkthdr.csum_data = 0xffff;
541		}
542#ifdef SCTP
543		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
544			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
545#endif
546		m->m_pkthdr.csum_flags |=
547			    CSUM_IP_CHECKED | CSUM_IP_VALID;
548
549		error = netisr_queue(NETISR_IP, m);
550		goto done;
551	}
552	/* Or forward to some other address? */
553	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
554	if (fwd_tag) {
555		dst = (struct sockaddr_in *)&ro->ro_dst;
556		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
557		m->m_flags |= M_SKIP_FIREWALL;
558		m_tag_delete(m, fwd_tag);
559		goto again;
560	}
561#endif /* IPFIREWALL_FORWARD */
562
563passout:
564	/* 127/8 must not appear on wire - RFC1122. */
565	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
566	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
567		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
568			IPSTAT_INC(ips_badaddr);
569			error = EADDRNOTAVAIL;
570			goto bad;
571		}
572	}
573
574	m->m_pkthdr.csum_flags |= CSUM_IP;
575	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
576	if (sw_csum & CSUM_DELAY_DATA) {
577		in_delayed_cksum(m);
578		sw_csum &= ~CSUM_DELAY_DATA;
579	}
580#ifdef SCTP
581	if (sw_csum & CSUM_SCTP) {
582		sctp_delayed_cksum(m);
583		sw_csum &= ~CSUM_SCTP;
584	}
585#endif
586	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
587
588	/*
589	 * If small enough for interface, or the interface will take
590	 * care of the fragmentation for us, we can just send directly.
591	 */
592	if (ip->ip_len <= mtu ||
593	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
594	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
595		ip->ip_len = htons(ip->ip_len);
596		ip->ip_off = htons(ip->ip_off);
597		ip->ip_sum = 0;
598		if (sw_csum & CSUM_DELAY_IP)
599			ip->ip_sum = in_cksum(m, hlen);
600
601		/*
602		 * Record statistics for this interface address.
603		 * With CSUM_TSO the byte/packet count will be slightly
604		 * incorrect because we count the IP+TCP headers only
605		 * once instead of for every generated packet.
606		 */
607		if (!(flags & IP_FORWARDING) && ia) {
608			if (m->m_pkthdr.csum_flags & CSUM_TSO)
609				ia->ia_ifa.if_opackets +=
610				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
611			else
612				ia->ia_ifa.if_opackets++;
613			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
614		}
615#ifdef MBUF_STRESS_TEST
616		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
617			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
618#endif
619		/*
620		 * Reset layer specific mbuf flags
621		 * to avoid confusing lower layers.
622		 */
623		m->m_flags &= ~(M_PROTOFLAGS);
624		error = (*ifp->if_output)(ifp, m,
625		    		(struct sockaddr *)dst, ro);
626		goto done;
627	}
628
629	/* Balk when DF bit is set or the interface didn't support TSO. */
630	if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
631		error = EMSGSIZE;
632		IPSTAT_INC(ips_cantfrag);
633		goto bad;
634	}
635
636	/*
637	 * Too large for interface; fragment if possible. If successful,
638	 * on return, m will point to a list of packets to be sent.
639	 */
640	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
641	if (error)
642		goto bad;
643	for (; m; m = m0) {
644		m0 = m->m_nextpkt;
645		m->m_nextpkt = 0;
646		if (error == 0) {
647			/* Record statistics for this interface address. */
648			if (ia != NULL) {
649				ia->ia_ifa.if_opackets++;
650				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
651			}
652			/*
653			 * Reset layer specific mbuf flags
654			 * to avoid confusing upper layers.
655			 */
656			m->m_flags &= ~(M_PROTOFLAGS);
657
658			error = (*ifp->if_output)(ifp, m,
659			    (struct sockaddr *)dst, ro);
660		} else
661			m_freem(m);
662	}
663
664	if (error == 0)
665		IPSTAT_INC(ips_fragmented);
666
667done:
668	if (ro == &iproute && ro->ro_rt && !nortfree) {
669		RTFREE(ro->ro_rt);
670	}
671	if (ia != NULL)
672		ifa_free(&ia->ia_ifa);
673	return (error);
674bad:
675	m_freem(m);
676	goto done;
677}
678
679/*
680 * Create a chain of fragments which fit the given mtu. m_frag points to the
681 * mbuf to be fragmented; on return it points to the chain with the fragments.
682 * Return 0 if no error. If error, m_frag may contain a partially built
683 * chain of fragments that should be freed by the caller.
684 *
685 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
686 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
687 */
688int
689ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
690    u_long if_hwassist_flags, int sw_csum)
691{
692	INIT_VNET_INET(curvnet);
693	int error = 0;
694	int hlen = ip->ip_hl << 2;
695	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
696	int off;
697	struct mbuf *m0 = *m_frag;	/* the original packet		*/
698	int firstlen;
699	struct mbuf **mnext;
700	int nfrags;
701
702	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
703		IPSTAT_INC(ips_cantfrag);
704		return EMSGSIZE;
705	}
706
707	/*
708	 * Must be able to put at least 8 bytes per fragment.
709	 */
710	if (len < 8)
711		return EMSGSIZE;
712
713	/*
714	 * If the interface will not calculate checksums on
715	 * fragmented packets, then do it here.
716	 */
717	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
718	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
719		in_delayed_cksum(m0);
720		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
721	}
722#ifdef SCTP
723	if (m0->m_pkthdr.csum_flags & CSUM_SCTP &&
724	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
725		sctp_delayed_cksum(m0);
726		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
727	}
728#endif
729	if (len > PAGE_SIZE) {
730		/*
731		 * Fragment large datagrams such that each segment
732		 * contains a multiple of PAGE_SIZE amount of data,
733		 * plus headers. This enables a receiver to perform
734		 * page-flipping zero-copy optimizations.
735		 *
736		 * XXX When does this help given that sender and receiver
737		 * could have different page sizes, and also mtu could
738		 * be less than the receiver's page size ?
739		 */
740		int newlen;
741		struct mbuf *m;
742
743		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
744			off += m->m_len;
745
746		/*
747		 * firstlen (off - hlen) must be aligned on an
748		 * 8-byte boundary
749		 */
750		if (off < hlen)
751			goto smart_frag_failure;
752		off = ((off - hlen) & ~7) + hlen;
753		newlen = (~PAGE_MASK) & mtu;
754		if ((newlen + sizeof (struct ip)) > mtu) {
755			/* we failed, go back the default */
756smart_frag_failure:
757			newlen = len;
758			off = hlen + len;
759		}
760		len = newlen;
761
762	} else {
763		off = hlen + len;
764	}
765
766	firstlen = off - hlen;
767	mnext = &m0->m_nextpkt;		/* pointer to next packet */
768
769	/*
770	 * Loop through length of segment after first fragment,
771	 * make new header and copy data of each part and link onto chain.
772	 * Here, m0 is the original packet, m is the fragment being created.
773	 * The fragments are linked off the m_nextpkt of the original
774	 * packet, which after processing serves as the first fragment.
775	 */
776	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
777		struct ip *mhip;	/* ip header on the fragment */
778		struct mbuf *m;
779		int mhlen = sizeof (struct ip);
780
781		MGETHDR(m, M_DONTWAIT, MT_DATA);
782		if (m == NULL) {
783			error = ENOBUFS;
784			IPSTAT_INC(ips_odropped);
785			goto done;
786		}
787		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
788		/*
789		 * In the first mbuf, leave room for the link header, then
790		 * copy the original IP header including options. The payload
791		 * goes into an additional mbuf chain returned by m_copym().
792		 */
793		m->m_data += max_linkhdr;
794		mhip = mtod(m, struct ip *);
795		*mhip = *ip;
796		if (hlen > sizeof (struct ip)) {
797			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
798			mhip->ip_v = IPVERSION;
799			mhip->ip_hl = mhlen >> 2;
800		}
801		m->m_len = mhlen;
802		/* XXX do we need to add ip->ip_off below ? */
803		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
804		if (off + len >= ip->ip_len) {	/* last fragment */
805			len = ip->ip_len - off;
806			m->m_flags |= M_LASTFRAG;
807		} else
808			mhip->ip_off |= IP_MF;
809		mhip->ip_len = htons((u_short)(len + mhlen));
810		m->m_next = m_copym(m0, off, len, M_DONTWAIT);
811		if (m->m_next == NULL) {	/* copy failed */
812			m_free(m);
813			error = ENOBUFS;	/* ??? */
814			IPSTAT_INC(ips_odropped);
815			goto done;
816		}
817		m->m_pkthdr.len = mhlen + len;
818		m->m_pkthdr.rcvif = NULL;
819#ifdef MAC
820		mac_netinet_fragment(m0, m);
821#endif
822		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
823		mhip->ip_off = htons(mhip->ip_off);
824		mhip->ip_sum = 0;
825		if (sw_csum & CSUM_DELAY_IP)
826			mhip->ip_sum = in_cksum(m, mhlen);
827		*mnext = m;
828		mnext = &m->m_nextpkt;
829	}
830	IPSTAT_ADD(ips_ofragments, nfrags);
831
832	/* set first marker for fragment chain */
833	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
834	m0->m_pkthdr.csum_data = nfrags;
835
836	/*
837	 * Update first fragment by trimming what's been copied out
838	 * and updating header.
839	 */
840	m_adj(m0, hlen + firstlen - ip->ip_len);
841	m0->m_pkthdr.len = hlen + firstlen;
842	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
843	ip->ip_off |= IP_MF;
844	ip->ip_off = htons(ip->ip_off);
845	ip->ip_sum = 0;
846	if (sw_csum & CSUM_DELAY_IP)
847		ip->ip_sum = in_cksum(m0, hlen);
848
849done:
850	*m_frag = m0;
851	return error;
852}
853
854void
855in_delayed_cksum(struct mbuf *m)
856{
857	struct ip *ip;
858	u_short csum, offset;
859
860	ip = mtod(m, struct ip *);
861	offset = ip->ip_hl << 2 ;
862	csum = in_cksum_skip(m, ip->ip_len, offset);
863	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
864		csum = 0xffff;
865	offset += m->m_pkthdr.csum_data;	/* checksum offset */
866
867	if (offset + sizeof(u_short) > m->m_len) {
868		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
869		    m->m_len, offset, ip->ip_p);
870		/*
871		 * XXX
872		 * this shouldn't happen, but if it does, the
873		 * correct behavior may be to insert the checksum
874		 * in the appropriate next mbuf in the chain.
875		 */
876		return;
877	}
878	*(u_short *)(m->m_data + offset) = csum;
879}
880
881/*
882 * IP socket option processing.
883 */
884int
885ip_ctloutput(struct socket *so, struct sockopt *sopt)
886{
887	struct	inpcb *inp = sotoinpcb(so);
888	int	error, optval;
889
890	error = optval = 0;
891	if (sopt->sopt_level != IPPROTO_IP) {
892		if ((sopt->sopt_level == SOL_SOCKET) &&
893		    (sopt->sopt_name == SO_SETFIB)) {
894			inp->inp_inc.inc_fibnum = so->so_fibnum;
895			return (0);
896		}
897		return (EINVAL);
898	}
899
900	switch (sopt->sopt_dir) {
901	case SOPT_SET:
902		switch (sopt->sopt_name) {
903		case IP_OPTIONS:
904#ifdef notyet
905		case IP_RETOPTS:
906#endif
907		{
908			struct mbuf *m;
909			if (sopt->sopt_valsize > MLEN) {
910				error = EMSGSIZE;
911				break;
912			}
913			MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
914			if (m == NULL) {
915				error = ENOBUFS;
916				break;
917			}
918			m->m_len = sopt->sopt_valsize;
919			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
920					    m->m_len);
921			if (error) {
922				m_free(m);
923				break;
924			}
925			INP_WLOCK(inp);
926			error = ip_pcbopts(inp, sopt->sopt_name, m);
927			INP_WUNLOCK(inp);
928			return (error);
929		}
930
931		case IP_BINDANY:
932			if (sopt->sopt_td != NULL) {
933				error = priv_check(sopt->sopt_td,
934				    PRIV_NETINET_BINDANY);
935				if (error)
936					break;
937			}
938			/* FALLTHROUGH */
939		case IP_TOS:
940		case IP_TTL:
941		case IP_MINTTL:
942		case IP_RECVOPTS:
943		case IP_RECVRETOPTS:
944		case IP_RECVDSTADDR:
945		case IP_RECVTTL:
946		case IP_RECVIF:
947		case IP_FAITH:
948		case IP_ONESBCAST:
949		case IP_DONTFRAG:
950			error = sooptcopyin(sopt, &optval, sizeof optval,
951					    sizeof optval);
952			if (error)
953				break;
954
955			switch (sopt->sopt_name) {
956			case IP_TOS:
957				inp->inp_ip_tos = optval;
958				break;
959
960			case IP_TTL:
961				inp->inp_ip_ttl = optval;
962				break;
963
964			case IP_MINTTL:
965				if (optval >= 0 && optval <= MAXTTL)
966					inp->inp_ip_minttl = optval;
967				else
968					error = EINVAL;
969				break;
970
971#define	OPTSET(bit) do {						\
972	INP_WLOCK(inp);							\
973	if (optval)							\
974		inp->inp_flags |= bit;					\
975	else								\
976		inp->inp_flags &= ~bit;					\
977	INP_WUNLOCK(inp);						\
978} while (0)
979
980			case IP_RECVOPTS:
981				OPTSET(INP_RECVOPTS);
982				break;
983
984			case IP_RECVRETOPTS:
985				OPTSET(INP_RECVRETOPTS);
986				break;
987
988			case IP_RECVDSTADDR:
989				OPTSET(INP_RECVDSTADDR);
990				break;
991
992			case IP_RECVTTL:
993				OPTSET(INP_RECVTTL);
994				break;
995
996			case IP_RECVIF:
997				OPTSET(INP_RECVIF);
998				break;
999
1000			case IP_FAITH:
1001				OPTSET(INP_FAITH);
1002				break;
1003
1004			case IP_ONESBCAST:
1005				OPTSET(INP_ONESBCAST);
1006				break;
1007			case IP_DONTFRAG:
1008				OPTSET(INP_DONTFRAG);
1009				break;
1010			case IP_BINDANY:
1011				OPTSET(INP_BINDANY);
1012				break;
1013			}
1014			break;
1015#undef OPTSET
1016
1017		/*
1018		 * Multicast socket options are processed by the in_mcast
1019		 * module.
1020		 */
1021		case IP_MULTICAST_IF:
1022		case IP_MULTICAST_VIF:
1023		case IP_MULTICAST_TTL:
1024		case IP_MULTICAST_LOOP:
1025		case IP_ADD_MEMBERSHIP:
1026		case IP_DROP_MEMBERSHIP:
1027		case IP_ADD_SOURCE_MEMBERSHIP:
1028		case IP_DROP_SOURCE_MEMBERSHIP:
1029		case IP_BLOCK_SOURCE:
1030		case IP_UNBLOCK_SOURCE:
1031		case IP_MSFILTER:
1032		case MCAST_JOIN_GROUP:
1033		case MCAST_LEAVE_GROUP:
1034		case MCAST_JOIN_SOURCE_GROUP:
1035		case MCAST_LEAVE_SOURCE_GROUP:
1036		case MCAST_BLOCK_SOURCE:
1037		case MCAST_UNBLOCK_SOURCE:
1038			error = inp_setmoptions(inp, sopt);
1039			break;
1040
1041		case IP_PORTRANGE:
1042			error = sooptcopyin(sopt, &optval, sizeof optval,
1043					    sizeof optval);
1044			if (error)
1045				break;
1046
1047			INP_WLOCK(inp);
1048			switch (optval) {
1049			case IP_PORTRANGE_DEFAULT:
1050				inp->inp_flags &= ~(INP_LOWPORT);
1051				inp->inp_flags &= ~(INP_HIGHPORT);
1052				break;
1053
1054			case IP_PORTRANGE_HIGH:
1055				inp->inp_flags &= ~(INP_LOWPORT);
1056				inp->inp_flags |= INP_HIGHPORT;
1057				break;
1058
1059			case IP_PORTRANGE_LOW:
1060				inp->inp_flags &= ~(INP_HIGHPORT);
1061				inp->inp_flags |= INP_LOWPORT;
1062				break;
1063
1064			default:
1065				error = EINVAL;
1066				break;
1067			}
1068			INP_WUNLOCK(inp);
1069			break;
1070
1071#ifdef IPSEC
1072		case IP_IPSEC_POLICY:
1073		{
1074			caddr_t req;
1075			struct mbuf *m;
1076
1077			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1078				break;
1079			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1080				break;
1081			req = mtod(m, caddr_t);
1082			error = ipsec_set_policy(inp, sopt->sopt_name, req,
1083			    m->m_len, (sopt->sopt_td != NULL) ?
1084			    sopt->sopt_td->td_ucred : NULL);
1085			m_freem(m);
1086			break;
1087		}
1088#endif /* IPSEC */
1089
1090		default:
1091			error = ENOPROTOOPT;
1092			break;
1093		}
1094		break;
1095
1096	case SOPT_GET:
1097		switch (sopt->sopt_name) {
1098		case IP_OPTIONS:
1099		case IP_RETOPTS:
1100			if (inp->inp_options)
1101				error = sooptcopyout(sopt,
1102						     mtod(inp->inp_options,
1103							  char *),
1104						     inp->inp_options->m_len);
1105			else
1106				sopt->sopt_valsize = 0;
1107			break;
1108
1109		case IP_TOS:
1110		case IP_TTL:
1111		case IP_MINTTL:
1112		case IP_RECVOPTS:
1113		case IP_RECVRETOPTS:
1114		case IP_RECVDSTADDR:
1115		case IP_RECVTTL:
1116		case IP_RECVIF:
1117		case IP_PORTRANGE:
1118		case IP_FAITH:
1119		case IP_ONESBCAST:
1120		case IP_DONTFRAG:
1121			switch (sopt->sopt_name) {
1122
1123			case IP_TOS:
1124				optval = inp->inp_ip_tos;
1125				break;
1126
1127			case IP_TTL:
1128				optval = inp->inp_ip_ttl;
1129				break;
1130
1131			case IP_MINTTL:
1132				optval = inp->inp_ip_minttl;
1133				break;
1134
1135#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1136
1137			case IP_RECVOPTS:
1138				optval = OPTBIT(INP_RECVOPTS);
1139				break;
1140
1141			case IP_RECVRETOPTS:
1142				optval = OPTBIT(INP_RECVRETOPTS);
1143				break;
1144
1145			case IP_RECVDSTADDR:
1146				optval = OPTBIT(INP_RECVDSTADDR);
1147				break;
1148
1149			case IP_RECVTTL:
1150				optval = OPTBIT(INP_RECVTTL);
1151				break;
1152
1153			case IP_RECVIF:
1154				optval = OPTBIT(INP_RECVIF);
1155				break;
1156
1157			case IP_PORTRANGE:
1158				if (inp->inp_flags & INP_HIGHPORT)
1159					optval = IP_PORTRANGE_HIGH;
1160				else if (inp->inp_flags & INP_LOWPORT)
1161					optval = IP_PORTRANGE_LOW;
1162				else
1163					optval = 0;
1164				break;
1165
1166			case IP_FAITH:
1167				optval = OPTBIT(INP_FAITH);
1168				break;
1169
1170			case IP_ONESBCAST:
1171				optval = OPTBIT(INP_ONESBCAST);
1172				break;
1173			case IP_DONTFRAG:
1174				optval = OPTBIT(INP_DONTFRAG);
1175				break;
1176			}
1177			error = sooptcopyout(sopt, &optval, sizeof optval);
1178			break;
1179
1180		/*
1181		 * Multicast socket options are processed by the in_mcast
1182		 * module.
1183		 */
1184		case IP_MULTICAST_IF:
1185		case IP_MULTICAST_VIF:
1186		case IP_MULTICAST_TTL:
1187		case IP_MULTICAST_LOOP:
1188		case IP_MSFILTER:
1189			error = inp_getmoptions(inp, sopt);
1190			break;
1191
1192#ifdef IPSEC
1193		case IP_IPSEC_POLICY:
1194		{
1195			struct mbuf *m = NULL;
1196			caddr_t req = NULL;
1197			size_t len = 0;
1198
1199			if (m != 0) {
1200				req = mtod(m, caddr_t);
1201				len = m->m_len;
1202			}
1203			error = ipsec_get_policy(sotoinpcb(so), req, len, &m);
1204			if (error == 0)
1205				error = soopt_mcopyout(sopt, m); /* XXX */
1206			if (error == 0)
1207				m_freem(m);
1208			break;
1209		}
1210#endif /* IPSEC */
1211
1212		default:
1213			error = ENOPROTOOPT;
1214			break;
1215		}
1216		break;
1217	}
1218	return (error);
1219}
1220
1221/*
1222 * Routine called from ip_output() to loop back a copy of an IP multicast
1223 * packet to the input queue of a specified interface.  Note that this
1224 * calls the output routine of the loopback "driver", but with an interface
1225 * pointer that might NOT be a loopback interface -- evil, but easier than
1226 * replicating that code here.
1227 */
1228static void
1229ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
1230    int hlen)
1231{
1232	register struct ip *ip;
1233	struct mbuf *copym;
1234
1235	/*
1236	 * Make a deep copy of the packet because we're going to
1237	 * modify the pack in order to generate checksums.
1238	 */
1239	copym = m_dup(m, M_DONTWAIT);
1240	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1241		copym = m_pullup(copym, hlen);
1242	if (copym != NULL) {
1243		/* If needed, compute the checksum and mark it as valid. */
1244		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1245			in_delayed_cksum(copym);
1246			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1247			copym->m_pkthdr.csum_flags |=
1248			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1249			copym->m_pkthdr.csum_data = 0xffff;
1250		}
1251		/*
1252		 * We don't bother to fragment if the IP length is greater
1253		 * than the interface's MTU.  Can this possibly matter?
1254		 */
1255		ip = mtod(copym, struct ip *);
1256		ip->ip_len = htons(ip->ip_len);
1257		ip->ip_off = htons(ip->ip_off);
1258		ip->ip_sum = 0;
1259		ip->ip_sum = in_cksum(copym, hlen);
1260#if 1 /* XXX */
1261		if (dst->sin_family != AF_INET) {
1262			printf("ip_mloopback: bad address family %d\n",
1263						dst->sin_family);
1264			dst->sin_family = AF_INET;
1265		}
1266#endif
1267		if_simloop(ifp, copym, dst->sin_family, 0);
1268	}
1269}
1270