ip_output.c revision 105586
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 105586 2002-10-20 22:52:07Z phk $
35 */
36
37#include "opt_ipfw.h"
38#include "opt_ipdn.h"
39#include "opt_ipdivert.h"
40#include "opt_ipfilter.h"
41#include "opt_ipsec.h"
42#include "opt_mac.h"
43#include "opt_pfil_hooks.h"
44#include "opt_random_ip_id.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/mac.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/protosw.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55
56#include <net/if.h>
57#include <net/route.h>
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/ip.h>
62#include <netinet/in_pcb.h>
63#include <netinet/in_var.h>
64#include <netinet/ip_var.h>
65
66#include <machine/in_cksum.h>
67
68static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
69
70#ifdef IPSEC
71#include <netinet6/ipsec.h>
72#include <netkey/key.h>
73#ifdef IPSEC_DEBUG
74#include <netkey/key_debug.h>
75#else
76#define	KEYDEBUG(lev,arg)
77#endif
78#endif /*IPSEC*/
79
80#ifdef FAST_IPSEC
81#include <netipsec/ipsec.h>
82#include <netipsec/xform.h>
83#include <netipsec/key.h>
84#endif /*FAST_IPSEC*/
85
86#include <netinet/ip_fw.h>
87#include <netinet/ip_dummynet.h>
88
89#define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
90				x, (ntohl(a.s_addr)>>24)&0xFF,\
91				  (ntohl(a.s_addr)>>16)&0xFF,\
92				  (ntohl(a.s_addr)>>8)&0xFF,\
93				  (ntohl(a.s_addr))&0xFF, y);
94
95u_short ip_id;
96
97static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
98static struct ifnet *ip_multicast_if(struct in_addr *, int *);
99static void	ip_mloopback
100	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
101static int	ip_getmoptions
102	(struct sockopt *, struct ip_moptions *);
103static int	ip_pcbopts(int, struct mbuf **, struct mbuf *);
104static int	ip_setmoptions
105	(struct sockopt *, struct ip_moptions **);
106
107int	ip_optcopy(struct ip *, struct ip *);
108
109
110extern	struct protosw inetsw[];
111
112/*
113 * IP output.  The packet in mbuf chain m contains a skeletal IP
114 * header (with len, off, ttl, proto, tos, src, dst).
115 * The mbuf chain containing the packet will be freed.
116 * The mbuf opt, if present, will not be freed.
117 */
118int
119ip_output(m0, opt, ro, flags, imo, inp)
120	struct mbuf *m0;
121	struct mbuf *opt;
122	struct route *ro;
123	int flags;
124	struct ip_moptions *imo;
125	struct inpcb *inp;
126{
127	struct ip *ip, *mhip;
128	struct ifnet *ifp = NULL;	/* keep compiler happy */
129	struct mbuf *m;
130	int hlen = sizeof (struct ip);
131	int len, off, error = 0;
132	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
133	struct in_ifaddr *ia = NULL;
134	int isbroadcast, sw_csum;
135	struct in_addr pkt_dst;
136#ifdef IPSEC
137	struct route iproute;
138	struct secpolicy *sp = NULL;
139	struct socket *so = inp ? inp->inp_socket : NULL;
140#endif
141#ifdef FAST_IPSEC
142	struct route iproute;
143	struct m_tag *mtag;
144	struct secpolicy *sp = NULL;
145	struct tdb_ident *tdbi;
146	int s;
147#endif /* FAST_IPSEC */
148	struct ip_fw_args args;
149	int src_was_INADDR_ANY = 0;	/* as the name says... */
150#ifdef PFIL_HOOKS
151	struct packet_filter_hook *pfh;
152	struct mbuf *m1;
153	int rv;
154#endif /* PFIL_HOOKS */
155
156	args.eh = NULL;
157	args.rule = NULL;
158	args.next_hop = NULL;
159	args.divert_rule = 0;			/* divert cookie */
160
161	/* Grab info from MT_TAG mbufs prepended to the chain. */
162	for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) {
163		switch(m0->_m_tag_id) {
164		default:
165			printf("ip_output: unrecognised MT_TAG tag %d\n",
166			    m0->_m_tag_id);
167			break;
168
169		case PACKET_TAG_DUMMYNET:
170			/*
171			 * the packet was already tagged, so part of the
172			 * processing was already done, and we need to go down.
173			 * Get parameters from the header.
174			 */
175			args.rule = ((struct dn_pkt *)m0)->rule;
176			opt = NULL ;
177			ro = & ( ((struct dn_pkt *)m0)->ro ) ;
178			imo = NULL ;
179			dst = ((struct dn_pkt *)m0)->dn_dst ;
180			ifp = ((struct dn_pkt *)m0)->ifp ;
181			flags = ((struct dn_pkt *)m0)->flags ;
182			break;
183
184		case PACKET_TAG_DIVERT:
185			args.divert_rule = (intptr_t)m0->m_data & 0xffff;
186			break;
187
188		case PACKET_TAG_IPFORWARD:
189			args.next_hop = (struct sockaddr_in *)m0->m_data;
190			break;
191		}
192	}
193	m = m0;
194
195	KASSERT(!m || (m->m_flags & M_PKTHDR) != 0, ("ip_output: no HDR"));
196#ifndef FAST_IPSEC
197	KASSERT(ro != NULL, ("ip_output: no route, proto %d",
198	    mtod(m, struct ip *)->ip_p));
199#endif
200
201	if (args.rule != NULL) {	/* dummynet already saw us */
202		ip = mtod(m, struct ip *);
203		hlen = ip->ip_hl << 2 ;
204		if (ro->ro_rt)
205			ia = ifatoia(ro->ro_rt->rt_ifa);
206		goto sendit;
207	}
208
209	if (opt) {
210		len = 0;
211		m = ip_insertoptions(m, opt, &len);
212		if (len != 0)
213			hlen = len;
214	}
215	ip = mtod(m, struct ip *);
216	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
217
218	/*
219	 * Fill in IP header.
220	 */
221	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
222		ip->ip_v = IPVERSION;
223		ip->ip_hl = hlen >> 2;
224		ip->ip_off &= IP_DF;
225#ifdef RANDOM_IP_ID
226		ip->ip_id = ip_randomid();
227#else
228		ip->ip_id = htons(ip_id++);
229#endif
230		ipstat.ips_localout++;
231	} else {
232		hlen = ip->ip_hl << 2;
233	}
234
235#ifdef FAST_IPSEC
236	if (ro == NULL) {
237		ro = &iproute;
238		bzero(ro, sizeof (*ro));
239	}
240#endif /* FAST_IPSEC */
241	dst = (struct sockaddr_in *)&ro->ro_dst;
242	/*
243	 * If there is a cached route,
244	 * check that it is to the same destination
245	 * and is still up.  If not, free it and try again.
246	 * The address family should also be checked in case of sharing the
247	 * cache with IPv6.
248	 */
249	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
250			  dst->sin_family != AF_INET ||
251			  dst->sin_addr.s_addr != pkt_dst.s_addr)) {
252		RTFREE(ro->ro_rt);
253		ro->ro_rt = (struct rtentry *)0;
254	}
255	if (ro->ro_rt == 0) {
256		bzero(dst, sizeof(*dst));
257		dst->sin_family = AF_INET;
258		dst->sin_len = sizeof(*dst);
259		dst->sin_addr = pkt_dst;
260	}
261	/*
262	 * If routing to interface only,
263	 * short circuit routing lookup.
264	 */
265	if (flags & IP_ROUTETOIF) {
266		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
267		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
268			ipstat.ips_noroute++;
269			error = ENETUNREACH;
270			goto bad;
271		}
272		ifp = ia->ia_ifp;
273		ip->ip_ttl = 1;
274		isbroadcast = in_broadcast(dst->sin_addr, ifp);
275	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
276	    imo != NULL && imo->imo_multicast_ifp != NULL) {
277		/*
278		 * Bypass the normal routing lookup for multicast
279		 * packets if the interface is specified.
280		 */
281		ifp = imo->imo_multicast_ifp;
282		IFP_TO_IA(ifp, ia);
283		isbroadcast = 0;	/* fool gcc */
284	} else {
285		/*
286		 * If this is the case, we probably don't want to allocate
287		 * a protocol-cloned route since we didn't get one from the
288		 * ULP.  This lets TCP do its thing, while not burdening
289		 * forwarding or ICMP with the overhead of cloning a route.
290		 * Of course, we still want to do any cloning requested by
291		 * the link layer, as this is probably required in all cases
292		 * for correct operation (as it is for ARP).
293		 */
294		if (ro->ro_rt == 0)
295			rtalloc_ign(ro, RTF_PRCLONING);
296		if (ro->ro_rt == 0) {
297			ipstat.ips_noroute++;
298			error = EHOSTUNREACH;
299			goto bad;
300		}
301		ia = ifatoia(ro->ro_rt->rt_ifa);
302		ifp = ro->ro_rt->rt_ifp;
303		ro->ro_rt->rt_use++;
304		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
305			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
306		if (ro->ro_rt->rt_flags & RTF_HOST)
307			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
308		else
309			isbroadcast = in_broadcast(dst->sin_addr, ifp);
310	}
311	if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
312		struct in_multi *inm;
313
314		m->m_flags |= M_MCAST;
315		/*
316		 * IP destination address is multicast.  Make sure "dst"
317		 * still points to the address in "ro".  (It may have been
318		 * changed to point to a gateway address, above.)
319		 */
320		dst = (struct sockaddr_in *)&ro->ro_dst;
321		/*
322		 * See if the caller provided any multicast options
323		 */
324		if (imo != NULL) {
325			ip->ip_ttl = imo->imo_multicast_ttl;
326			if (imo->imo_multicast_vif != -1)
327				ip->ip_src.s_addr =
328				    ip_mcast_src(imo->imo_multicast_vif);
329		} else
330			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
331		/*
332		 * Confirm that the outgoing interface supports multicast.
333		 */
334		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
335			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
336				ipstat.ips_noroute++;
337				error = ENETUNREACH;
338				goto bad;
339			}
340		}
341		/*
342		 * If source address not specified yet, use address
343		 * of outgoing interface.
344		 */
345		if (ip->ip_src.s_addr == INADDR_ANY) {
346			/* Interface may have no addresses. */
347			if (ia != NULL)
348				ip->ip_src = IA_SIN(ia)->sin_addr;
349		}
350
351		if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
352			/*
353			 * XXX
354			 * delayed checksums are not currently
355			 * compatible with IP multicast routing
356			 */
357			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
358				in_delayed_cksum(m);
359				m->m_pkthdr.csum_flags &=
360					~CSUM_DELAY_DATA;
361			}
362		}
363		IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
364		if (inm != NULL &&
365		   (imo == NULL || imo->imo_multicast_loop)) {
366			/*
367			 * If we belong to the destination multicast group
368			 * on the outgoing interface, and the caller did not
369			 * forbid loopback, loop back a copy.
370			 */
371			ip_mloopback(ifp, m, dst, hlen);
372		}
373		else {
374			/*
375			 * If we are acting as a multicast router, perform
376			 * multicast forwarding as if the packet had just
377			 * arrived on the interface to which we are about
378			 * to send.  The multicast forwarding function
379			 * recursively calls this function, using the
380			 * IP_FORWARDING flag to prevent infinite recursion.
381			 *
382			 * Multicasts that are looped back by ip_mloopback(),
383			 * above, will be forwarded by the ip_input() routine,
384			 * if necessary.
385			 */
386			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
387				/*
388				 * Check if rsvp daemon is running. If not, don't
389				 * set ip_moptions. This ensures that the packet
390				 * is multicast and not just sent down one link
391				 * as prescribed by rsvpd.
392				 */
393				if (!rsvp_on)
394				  imo = NULL;
395				if (ip_mforward(ip, ifp, m, imo) != 0) {
396					m_freem(m);
397					goto done;
398				}
399			}
400		}
401
402		/*
403		 * Multicasts with a time-to-live of zero may be looped-
404		 * back, above, but must not be transmitted on a network.
405		 * Also, multicasts addressed to the loopback interface
406		 * are not sent -- the above call to ip_mloopback() will
407		 * loop back a copy if this host actually belongs to the
408		 * destination group on the loopback interface.
409		 */
410		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
411			m_freem(m);
412			goto done;
413		}
414
415		goto sendit;
416	}
417#ifndef notdef
418	/*
419	 * If the source address is not specified yet, use the address
420	 * of the outoing interface. In case, keep note we did that, so
421	 * if the the firewall changes the next-hop causing the output
422	 * interface to change, we can fix that.
423	 */
424	if (ip->ip_src.s_addr == INADDR_ANY) {
425		/* Interface may have no addresses. */
426		if (ia != NULL) {
427			ip->ip_src = IA_SIN(ia)->sin_addr;
428			src_was_INADDR_ANY = 1;
429		}
430	}
431#endif /* notdef */
432	/*
433	 * Verify that we have any chance at all of being able to queue
434	 *      the packet or packet fragments
435	 */
436	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
437		ifp->if_snd.ifq_maxlen) {
438			error = ENOBUFS;
439			ipstat.ips_odropped++;
440			goto bad;
441	}
442
443	/*
444	 * Look for broadcast address and
445	 * verify user is allowed to send
446	 * such a packet.
447	 */
448	if (isbroadcast) {
449		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
450			error = EADDRNOTAVAIL;
451			goto bad;
452		}
453		if ((flags & IP_ALLOWBROADCAST) == 0) {
454			error = EACCES;
455			goto bad;
456		}
457		/* don't allow broadcast messages to be fragmented */
458		if ((u_short)ip->ip_len > ifp->if_mtu) {
459			error = EMSGSIZE;
460			goto bad;
461		}
462		m->m_flags |= M_BCAST;
463	} else {
464		m->m_flags &= ~M_BCAST;
465	}
466
467sendit:
468#ifdef IPSEC
469	/* get SP for this packet */
470	if (so == NULL)
471		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
472	else
473		sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
474
475	if (sp == NULL) {
476		ipsecstat.out_inval++;
477		goto bad;
478	}
479
480	error = 0;
481
482	/* check policy */
483	switch (sp->policy) {
484	case IPSEC_POLICY_DISCARD:
485		/*
486		 * This packet is just discarded.
487		 */
488		ipsecstat.out_polvio++;
489		goto bad;
490
491	case IPSEC_POLICY_BYPASS:
492	case IPSEC_POLICY_NONE:
493		/* no need to do IPsec. */
494		goto skip_ipsec;
495
496	case IPSEC_POLICY_IPSEC:
497		if (sp->req == NULL) {
498			/* acquire a policy */
499			error = key_spdacquire(sp);
500			goto bad;
501		}
502		break;
503
504	case IPSEC_POLICY_ENTRUST:
505	default:
506		printf("ip_output: Invalid policy found. %d\n", sp->policy);
507	}
508    {
509	struct ipsec_output_state state;
510	bzero(&state, sizeof(state));
511	state.m = m;
512	if (flags & IP_ROUTETOIF) {
513		state.ro = &iproute;
514		bzero(&iproute, sizeof(iproute));
515	} else
516		state.ro = ro;
517	state.dst = (struct sockaddr *)dst;
518
519	ip->ip_sum = 0;
520
521	/*
522	 * XXX
523	 * delayed checksums are not currently compatible with IPsec
524	 */
525	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
526		in_delayed_cksum(m);
527		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
528	}
529
530	ip->ip_len = htons(ip->ip_len);
531	ip->ip_off = htons(ip->ip_off);
532
533	error = ipsec4_output(&state, sp, flags);
534
535	m = state.m;
536	if (flags & IP_ROUTETOIF) {
537		/*
538		 * if we have tunnel mode SA, we may need to ignore
539		 * IP_ROUTETOIF.
540		 */
541		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
542			flags &= ~IP_ROUTETOIF;
543			ro = state.ro;
544		}
545	} else
546		ro = state.ro;
547	dst = (struct sockaddr_in *)state.dst;
548	if (error) {
549		/* mbuf is already reclaimed in ipsec4_output. */
550		m0 = NULL;
551		switch (error) {
552		case EHOSTUNREACH:
553		case ENETUNREACH:
554		case EMSGSIZE:
555		case ENOBUFS:
556		case ENOMEM:
557			break;
558		default:
559			printf("ip4_output (ipsec): error code %d\n", error);
560			/*fall through*/
561		case ENOENT:
562			/* don't show these error codes to the user */
563			error = 0;
564			break;
565		}
566		goto bad;
567	}
568    }
569
570	/* be sure to update variables that are affected by ipsec4_output() */
571	ip = mtod(m, struct ip *);
572	hlen = ip->ip_hl << 2;
573	if (ro->ro_rt == NULL) {
574		if ((flags & IP_ROUTETOIF) == 0) {
575			printf("ip_output: "
576				"can't update route after IPsec processing\n");
577			error = EHOSTUNREACH;	/*XXX*/
578			goto bad;
579		}
580	} else {
581		ia = ifatoia(ro->ro_rt->rt_ifa);
582		ifp = ro->ro_rt->rt_ifp;
583	}
584
585	/* make it flipped, again. */
586	ip->ip_len = ntohs(ip->ip_len);
587	ip->ip_off = ntohs(ip->ip_off);
588skip_ipsec:
589#endif /*IPSEC*/
590#ifdef FAST_IPSEC
591	/*
592	 * Check the security policy (SP) for the packet and, if
593	 * required, do IPsec-related processing.  There are two
594	 * cases here; the first time a packet is sent through
595	 * it will be untagged and handled by ipsec4_checkpolicy.
596	 * If the packet is resubmitted to ip_output (e.g. after
597	 * AH, ESP, etc. processing), there will be a tag to bypass
598	 * the lookup and related policy checking.
599	 */
600	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
601	s = splnet();
602	if (mtag != NULL) {
603		tdbi = (struct tdb_ident *)(mtag + 1);
604		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
605		if (sp == NULL)
606			error = -EINVAL;	/* force silent drop */
607		m_tag_delete(m, mtag);
608	} else {
609		sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
610					&error, inp);
611	}
612	/*
613	 * There are four return cases:
614	 *    sp != NULL	 	    apply IPsec policy
615	 *    sp == NULL, error == 0	    no IPsec handling needed
616	 *    sp == NULL, error == -EINVAL  discard packet w/o error
617	 *    sp == NULL, error != 0	    discard packet, report error
618	 */
619	if (sp != NULL) {
620		/* Loop detection, check if ipsec processing already done */
621		KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
622		for (mtag = m_tag_first(m); mtag != NULL;
623		     mtag = m_tag_next(m, mtag)) {
624			if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
625				continue;
626			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
627			    mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
628				continue;
629			/*
630			 * Check if policy has an SA associated with it.
631			 * This can happen when an SP has yet to acquire
632			 * an SA; e.g. on first reference.  If it occurs,
633			 * then we let ipsec4_process_packet do its thing.
634			 */
635			if (sp->req->sav == NULL)
636				break;
637			tdbi = (struct tdb_ident *)(mtag + 1);
638			if (tdbi->spi == sp->req->sav->spi &&
639			    tdbi->proto == sp->req->sav->sah->saidx.proto &&
640			    bcmp(&tdbi->dst, &sp->spidx.dst,
641				 sizeof (union sockaddr_union)) == 0) {
642				/*
643				 * No IPsec processing is needed, free
644				 * reference to SP.
645				 *
646				 * NB: null pointer to avoid free at
647				 *     done: below.
648				 */
649				KEY_FREESP(&sp), sp = NULL;
650				splx(s);
651				goto spd_done;
652			}
653		}
654
655		/*
656		 * Do delayed checksums now because we send before
657		 * this is done in the normal processing path.
658		 */
659		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
660			in_delayed_cksum(m);
661			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
662		}
663
664		ip->ip_len = htons(ip->ip_len);
665		ip->ip_off = htons(ip->ip_off);
666
667		/* NB: callee frees mbuf */
668		error = ipsec4_process_packet(m, sp->req, flags, 0);
669		splx(s);
670		goto done;
671	} else {
672		splx(s);
673
674		if (error != 0) {
675			/*
676			 * Hack: -EINVAL is used to signal that a packet
677			 * should be silently discarded.  This is typically
678			 * because we asked key management for an SA and
679			 * it was delayed (e.g. kicked up to IKE).
680			 */
681			if (error == -EINVAL)
682				error = 0;
683			goto bad;
684		} else {
685			/* No IPsec processing for this packet. */
686		}
687#ifdef notyet
688		/*
689		 * If deferred crypto processing is needed, check that
690		 * the interface supports it.
691		 */
692		mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
693		if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
694			/* notify IPsec to do its own crypto */
695			ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
696			error = EHOSTUNREACH;
697			goto bad;
698		}
699#endif
700	}
701spd_done:
702#endif /* FAST_IPSEC */
703
704	/*
705	 * IpHack's section.
706	 * - Xlate: translate packet's addr/port (NAT).
707	 * - Firewall: deny/allow/etc.
708	 * - Wrap: fake packet's addr/port <unimpl.>
709	 * - Encapsulate: put it in another IP and send out. <unimp.>
710	 */
711#ifdef PFIL_HOOKS
712	/*
713	 * Run through list of hooks for output packets.
714	 */
715	m1 = m;
716	pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
717	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
718		if (pfh->pfil_func) {
719			rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1);
720			if (rv) {
721				error = EHOSTUNREACH;
722				goto done;
723			}
724			m = m1;
725			if (m == NULL)
726				goto done;
727			ip = mtod(m, struct ip *);
728		}
729#endif /* PFIL_HOOKS */
730
731	/*
732	 * Check with the firewall...
733	 * but not if we are already being fwd'd from a firewall.
734	 */
735	if (fw_enable && IPFW_LOADED && !args.next_hop) {
736		struct sockaddr_in *old = dst;
737
738		args.m = m;
739		args.next_hop = dst;
740		args.oif = ifp;
741		off = ip_fw_chk_ptr(&args);
742		m = args.m;
743		dst = args.next_hop;
744
745                /*
746		 * On return we must do the following:
747		 * m == NULL	-> drop the pkt (old interface, deprecated)
748		 * (off & IP_FW_PORT_DENY_FLAG)	-> drop the pkt (new interface)
749		 * 1<=off<= 0xffff		-> DIVERT
750		 * (off & IP_FW_PORT_DYNT_FLAG)	-> send to a DUMMYNET pipe
751		 * (off & IP_FW_PORT_TEE_FLAG)	-> TEE the packet
752		 * dst != old			-> IPFIREWALL_FORWARD
753		 * off==0, dst==old		-> accept
754		 * If some of the above modules are not compiled in, then
755		 * we should't have to check the corresponding condition
756		 * (because the ipfw control socket should not accept
757		 * unsupported rules), but better play safe and drop
758		 * packets in case of doubt.
759		 */
760		if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
761			if (m)
762				m_freem(m);
763			error = EACCES;
764			goto done;
765		}
766		ip = mtod(m, struct ip *);
767		if (off == 0 && dst == old)		/* common case */
768			goto pass;
769                if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
770			/*
771			 * pass the pkt to dummynet. Need to include
772			 * pipe number, m, ifp, ro, dst because these are
773			 * not recomputed in the next pass.
774			 * All other parameters have been already used and
775			 * so they are not needed anymore.
776			 * XXX note: if the ifp or ro entry are deleted
777			 * while a pkt is in dummynet, we are in trouble!
778			 */
779			args.ro = ro;
780			args.dst = dst;
781			args.flags = flags;
782
783			error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
784				&args);
785			goto done;
786		}
787#ifdef IPDIVERT
788		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
789			struct mbuf *clone = NULL;
790
791			/* Clone packet if we're doing a 'tee' */
792			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
793				clone = m_dup(m, M_DONTWAIT);
794
795			/*
796			 * XXX
797			 * delayed checksums are not currently compatible
798			 * with divert sockets.
799			 */
800			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
801				in_delayed_cksum(m);
802				m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
803			}
804
805			/* Restore packet header fields to original values */
806			ip->ip_len = htons(ip->ip_len);
807			ip->ip_off = htons(ip->ip_off);
808
809			/* Deliver packet to divert input routine */
810			divert_packet(m, 0, off & 0xffff, args.divert_rule);
811
812			/* If 'tee', continue with original packet */
813			if (clone != NULL) {
814				m = clone;
815				ip = mtod(m, struct ip *);
816				goto pass;
817			}
818			goto done;
819		}
820#endif
821
822		/* IPFIREWALL_FORWARD */
823		/*
824		 * Check dst to make sure it is directly reachable on the
825		 * interface we previously thought it was.
826		 * If it isn't (which may be likely in some situations) we have
827		 * to re-route it (ie, find a route for the next-hop and the
828		 * associated interface) and set them here. This is nested
829		 * forwarding which in most cases is undesirable, except where
830		 * such control is nigh impossible. So we do it here.
831		 * And I'm babbling.
832		 */
833		if (off == 0 && old != dst) { /* FORWARD, dst has changed */
834#if 0
835			/*
836			 * XXX To improve readability, this block should be
837			 * changed into a function call as below:
838			 */
839			error = ip_ipforward(&m, &dst, &ifp);
840			if (error)
841				goto bad;
842			if (m == NULL) /* ip_input consumed the mbuf */
843				goto done;
844#else
845			struct in_ifaddr *ia;
846
847			/*
848			 * XXX sro_fwd below is static, and a pointer
849			 * to it gets passed to routines downstream.
850			 * This could have surprisingly bad results in
851			 * practice, because its content is overwritten
852			 * by subsequent packets.
853			 */
854			/* There must be a better way to do this next line... */
855			static struct route sro_fwd;
856			struct route *ro_fwd = &sro_fwd;
857
858#if 0
859			print_ip("IPFIREWALL_FORWARD: New dst ip: ",
860			    dst->sin_addr, "\n");
861#endif
862
863			/*
864			 * We need to figure out if we have been forwarded
865			 * to a local socket. If so, then we should somehow
866			 * "loop back" to ip_input, and get directed to the
867			 * PCB as if we had received this packet. This is
868			 * because it may be dificult to identify the packets
869			 * you want to forward until they are being output
870			 * and have selected an interface. (e.g. locally
871			 * initiated packets) If we used the loopback inteface,
872			 * we would not be able to control what happens
873			 * as the packet runs through ip_input() as
874			 * it is done through a ISR.
875			 */
876			LIST_FOREACH(ia,
877			    INADDR_HASH(dst->sin_addr.s_addr), ia_hash) {
878				/*
879				 * If the addr to forward to is one
880				 * of ours, we pretend to
881				 * be the destination for this packet.
882				 */
883				if (IA_SIN(ia)->sin_addr.s_addr ==
884						 dst->sin_addr.s_addr)
885					break;
886			}
887			if (ia) {	/* tell ip_input "dont filter" */
888				struct m_hdr tag;
889
890				tag.mh_type = MT_TAG;
891				tag.mh_flags = PACKET_TAG_IPFORWARD;
892				tag.mh_data = (caddr_t)args.next_hop;
893				tag.mh_next = m;
894
895				if (m->m_pkthdr.rcvif == NULL)
896					m->m_pkthdr.rcvif = ifunit("lo0");
897				if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
898					m->m_pkthdr.csum_flags |=
899					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
900					m0->m_pkthdr.csum_data = 0xffff;
901				}
902				m->m_pkthdr.csum_flags |=
903				    CSUM_IP_CHECKED | CSUM_IP_VALID;
904				ip->ip_len = htons(ip->ip_len);
905				ip->ip_off = htons(ip->ip_off);
906				ip_input((struct mbuf *)&tag);
907				goto done;
908			}
909			/* Some of the logic for this was
910			 * nicked from above.
911			 *
912			 * This rewrites the cached route in a local PCB.
913			 * Is this what we want to do?
914			 */
915			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
916
917			ro_fwd->ro_rt = 0;
918			rtalloc_ign(ro_fwd, RTF_PRCLONING);
919
920			if (ro_fwd->ro_rt == 0) {
921				ipstat.ips_noroute++;
922				error = EHOSTUNREACH;
923				goto bad;
924			}
925
926			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
927			ifp = ro_fwd->ro_rt->rt_ifp;
928			ro_fwd->ro_rt->rt_use++;
929			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
930				dst = (struct sockaddr_in *)
931					ro_fwd->ro_rt->rt_gateway;
932			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
933				isbroadcast =
934				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
935			else
936				isbroadcast = in_broadcast(dst->sin_addr, ifp);
937			if (ro->ro_rt)
938				RTFREE(ro->ro_rt);
939			ro->ro_rt = ro_fwd->ro_rt;
940			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
941
942#endif	/* ... block to be put into a function */
943			/*
944			 * If we added a default src ip earlier,
945			 * which would have been gotten from the-then
946			 * interface, do it again, from the new one.
947			 */
948			if (src_was_INADDR_ANY)
949				ip->ip_src = IA_SIN(ia)->sin_addr;
950			goto pass ;
951		}
952
953                /*
954                 * if we get here, none of the above matches, and
955                 * we have to drop the pkt
956                 */
957		m_freem(m);
958                error = EACCES; /* not sure this is the right error msg */
959                goto done;
960	}
961
962pass:
963	/* 127/8 must not appear on wire - RFC1122. */
964	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
965	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
966		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
967			ipstat.ips_badaddr++;
968			error = EADDRNOTAVAIL;
969			goto bad;
970		}
971	}
972
973	m->m_pkthdr.csum_flags |= CSUM_IP;
974	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
975	if (sw_csum & CSUM_DELAY_DATA) {
976		in_delayed_cksum(m);
977		sw_csum &= ~CSUM_DELAY_DATA;
978	}
979	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
980
981	/*
982	 * If small enough for interface, or the interface will take
983	 * care of the fragmentation for us, can just send directly.
984	 */
985	if ((u_short)ip->ip_len <= ifp->if_mtu ||
986	    ifp->if_hwassist & CSUM_FRAGMENT) {
987		ip->ip_len = htons(ip->ip_len);
988		ip->ip_off = htons(ip->ip_off);
989		ip->ip_sum = 0;
990		if (sw_csum & CSUM_DELAY_IP)
991			ip->ip_sum = in_cksum(m, hlen);
992
993		/* Record statistics for this interface address. */
994		if (!(flags & IP_FORWARDING) && ia) {
995			ia->ia_ifa.if_opackets++;
996			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
997		}
998
999#ifdef IPSEC
1000		/* clean ipsec history once it goes out of the node */
1001		ipsec_delaux(m);
1002#endif
1003
1004		error = (*ifp->if_output)(ifp, m,
1005				(struct sockaddr *)dst, ro->ro_rt);
1006		goto done;
1007	}
1008	/*
1009	 * Too large for interface; fragment if possible.
1010	 * Must be able to put at least 8 bytes per fragment.
1011	 */
1012	if (ip->ip_off & IP_DF) {
1013		error = EMSGSIZE;
1014		/*
1015		 * This case can happen if the user changed the MTU
1016		 * of an interface after enabling IP on it.  Because
1017		 * most netifs don't keep track of routes pointing to
1018		 * them, there is no way for one to update all its
1019		 * routes when the MTU is changed.
1020		 */
1021		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
1022		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
1023		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1024			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1025		}
1026		ipstat.ips_cantfrag++;
1027		goto bad;
1028	}
1029	len = (ifp->if_mtu - hlen) &~ 7;
1030	if (len < 8) {
1031		error = EMSGSIZE;
1032		goto bad;
1033	}
1034
1035	/*
1036	 * if the interface will not calculate checksums on
1037	 * fragmented packets, then do it here.
1038	 */
1039	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1040	    (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
1041		in_delayed_cksum(m);
1042		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1043	}
1044
1045	if (len > PAGE_SIZE) {
1046		/*
1047		 * Fragement large datagrams such that each segment
1048		 * contains a multiple of PAGE_SIZE amount of data,
1049		 * plus headers. This enables a receiver to perform
1050		 * page-flipping zero-copy optimizations.
1051		 */
1052
1053		int newlen;
1054		struct mbuf *mtmp;
1055
1056		for (mtmp = m, off = 0;
1057		     mtmp && ((off + mtmp->m_len) <= ifp->if_mtu);
1058		     mtmp = mtmp->m_next) {
1059			off += mtmp->m_len;
1060		}
1061		/*
1062		 * firstlen (off - hlen) must be aligned on an
1063		 * 8-byte boundary
1064		 */
1065		if (off < hlen)
1066			goto smart_frag_failure;
1067		off = ((off - hlen) & ~7) + hlen;
1068		newlen = (~PAGE_MASK) & ifp->if_mtu;
1069		if ((newlen + sizeof (struct ip)) > ifp->if_mtu) {
1070			/* we failed, go back the default */
1071smart_frag_failure:
1072			newlen = len;
1073			off = hlen + len;
1074		}
1075
1076/*		printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n",
1077		len, hlen, sizeof (struct ip), newlen, off);*/
1078
1079		len = newlen;
1080
1081	} else {
1082		off = hlen + len;
1083	}
1084
1085
1086
1087    {
1088	int mhlen, firstlen = off - hlen;
1089	struct mbuf **mnext = &m->m_nextpkt;
1090	int nfrags = 1;
1091
1092	/*
1093	 * Loop through length of segment after first fragment,
1094	 * make new header and copy data of each part and link onto chain.
1095	 */
1096	m0 = m;
1097	mhlen = sizeof (struct ip);
1098	for (; off < (u_short)ip->ip_len; off += len) {
1099		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1100		if (m == 0) {
1101			error = ENOBUFS;
1102			ipstat.ips_odropped++;
1103			goto sendorfree;
1104		}
1105		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1106		m->m_data += max_linkhdr;
1107		mhip = mtod(m, struct ip *);
1108		*mhip = *ip;
1109		if (hlen > sizeof (struct ip)) {
1110			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1111			mhip->ip_v = IPVERSION;
1112			mhip->ip_hl = mhlen >> 2;
1113		}
1114		m->m_len = mhlen;
1115		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
1116		if (off + len >= (u_short)ip->ip_len)
1117			len = (u_short)ip->ip_len - off;
1118		else
1119			mhip->ip_off |= IP_MF;
1120		mhip->ip_len = htons((u_short)(len + mhlen));
1121		m->m_next = m_copy(m0, off, len);
1122		if (m->m_next == 0) {
1123			(void) m_free(m);
1124			error = ENOBUFS;	/* ??? */
1125			ipstat.ips_odropped++;
1126			goto sendorfree;
1127		}
1128		m->m_pkthdr.len = mhlen + len;
1129		m->m_pkthdr.rcvif = (struct ifnet *)0;
1130#ifdef MAC
1131		mac_create_fragment(m0, m);
1132#endif
1133		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1134		mhip->ip_off = htons(mhip->ip_off);
1135		mhip->ip_sum = 0;
1136		if (sw_csum & CSUM_DELAY_IP)
1137			mhip->ip_sum = in_cksum(m, mhlen);
1138		*mnext = m;
1139		mnext = &m->m_nextpkt;
1140		nfrags++;
1141	}
1142	ipstat.ips_ofragments += nfrags;
1143
1144	/* set first/last markers for fragment chain */
1145	m->m_flags |= M_LASTFRAG;
1146	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1147	m0->m_pkthdr.csum_data = nfrags;
1148
1149	/*
1150	 * Update first fragment by trimming what's been copied out
1151	 * and updating header, then send each fragment (in order).
1152	 */
1153	m = m0;
1154	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
1155	m->m_pkthdr.len = hlen + firstlen;
1156	ip->ip_len = htons((u_short)m->m_pkthdr.len);
1157	ip->ip_off |= IP_MF;
1158	ip->ip_off = htons(ip->ip_off);
1159	ip->ip_sum = 0;
1160	if (sw_csum & CSUM_DELAY_IP)
1161		ip->ip_sum = in_cksum(m, hlen);
1162sendorfree:
1163	for (m = m0; m; m = m0) {
1164		m0 = m->m_nextpkt;
1165		m->m_nextpkt = 0;
1166#ifdef IPSEC
1167		/* clean ipsec history once it goes out of the node */
1168		ipsec_delaux(m);
1169#endif
1170		if (error == 0) {
1171			/* Record statistics for this interface address. */
1172			if (ia != NULL) {
1173				ia->ia_ifa.if_opackets++;
1174				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1175			}
1176
1177			error = (*ifp->if_output)(ifp, m,
1178			    (struct sockaddr *)dst, ro->ro_rt);
1179		} else
1180			m_freem(m);
1181	}
1182
1183	if (error == 0)
1184		ipstat.ips_fragmented++;
1185    }
1186done:
1187#ifdef IPSEC
1188	if (ro == &iproute && ro->ro_rt) {
1189		RTFREE(ro->ro_rt);
1190		ro->ro_rt = NULL;
1191	}
1192	if (sp != NULL) {
1193		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1194			printf("DP ip_output call free SP:%p\n", sp));
1195		key_freesp(sp);
1196	}
1197#endif /* IPSEC */
1198#ifdef FAST_IPSEC
1199	if (ro == &iproute && ro->ro_rt) {
1200		RTFREE(ro->ro_rt);
1201		ro->ro_rt = NULL;
1202	}
1203	if (sp != NULL)
1204		KEY_FREESP(&sp);
1205#endif /* FAST_IPSEC */
1206	return (error);
1207bad:
1208	m_freem(m);
1209	goto done;
1210}
1211
1212void
1213in_delayed_cksum(struct mbuf *m)
1214{
1215	struct ip *ip;
1216	u_short csum, offset;
1217
1218	ip = mtod(m, struct ip *);
1219	offset = ip->ip_hl << 2 ;
1220	csum = in_cksum_skip(m, ip->ip_len, offset);
1221	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1222		csum = 0xffff;
1223	offset += m->m_pkthdr.csum_data;	/* checksum offset */
1224
1225	if (offset + sizeof(u_short) > m->m_len) {
1226		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
1227		    m->m_len, offset, ip->ip_p);
1228		/*
1229		 * XXX
1230		 * this shouldn't happen, but if it does, the
1231		 * correct behavior may be to insert the checksum
1232		 * in the existing chain instead of rearranging it.
1233		 */
1234		m = m_pullup(m, offset + sizeof(u_short));
1235	}
1236	*(u_short *)(m->m_data + offset) = csum;
1237}
1238
1239/*
1240 * Insert IP options into preformed packet.
1241 * Adjust IP destination as required for IP source routing,
1242 * as indicated by a non-zero in_addr at the start of the options.
1243 *
1244 * XXX This routine assumes that the packet has no options in place.
1245 */
1246static struct mbuf *
1247ip_insertoptions(m, opt, phlen)
1248	register struct mbuf *m;
1249	struct mbuf *opt;
1250	int *phlen;
1251{
1252	register struct ipoption *p = mtod(opt, struct ipoption *);
1253	struct mbuf *n;
1254	register struct ip *ip = mtod(m, struct ip *);
1255	unsigned optlen;
1256
1257	optlen = opt->m_len - sizeof(p->ipopt_dst);
1258	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) {
1259		*phlen = 0;
1260		return (m);		/* XXX should fail */
1261	}
1262	if (p->ipopt_dst.s_addr)
1263		ip->ip_dst = p->ipopt_dst;
1264	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1265		MGETHDR(n, M_DONTWAIT, MT_HEADER);
1266		if (n == 0) {
1267			*phlen = 0;
1268			return (m);
1269		}
1270		n->m_pkthdr.rcvif = (struct ifnet *)0;
1271#ifdef MAC
1272		mac_create_mbuf_from_mbuf(m, n);
1273#endif
1274		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1275		m->m_len -= sizeof(struct ip);
1276		m->m_data += sizeof(struct ip);
1277		n->m_next = m;
1278		m = n;
1279		m->m_len = optlen + sizeof(struct ip);
1280		m->m_data += max_linkhdr;
1281		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1282	} else {
1283		m->m_data -= optlen;
1284		m->m_len += optlen;
1285		m->m_pkthdr.len += optlen;
1286		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1287	}
1288	ip = mtod(m, struct ip *);
1289	bcopy(p->ipopt_list, ip + 1, optlen);
1290	*phlen = sizeof(struct ip) + optlen;
1291	ip->ip_v = IPVERSION;
1292	ip->ip_hl = *phlen >> 2;
1293	ip->ip_len += optlen;
1294	return (m);
1295}
1296
1297/*
1298 * Copy options from ip to jp,
1299 * omitting those not copied during fragmentation.
1300 */
1301int
1302ip_optcopy(ip, jp)
1303	struct ip *ip, *jp;
1304{
1305	register u_char *cp, *dp;
1306	int opt, optlen, cnt;
1307
1308	cp = (u_char *)(ip + 1);
1309	dp = (u_char *)(jp + 1);
1310	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1311	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1312		opt = cp[0];
1313		if (opt == IPOPT_EOL)
1314			break;
1315		if (opt == IPOPT_NOP) {
1316			/* Preserve for IP mcast tunnel's LSRR alignment. */
1317			*dp++ = IPOPT_NOP;
1318			optlen = 1;
1319			continue;
1320		}
1321
1322		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1323		    ("ip_optcopy: malformed ipv4 option"));
1324		optlen = cp[IPOPT_OLEN];
1325		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1326		    ("ip_optcopy: malformed ipv4 option"));
1327
1328		/* bogus lengths should have been caught by ip_dooptions */
1329		if (optlen > cnt)
1330			optlen = cnt;
1331		if (IPOPT_COPIED(opt)) {
1332			bcopy(cp, dp, optlen);
1333			dp += optlen;
1334		}
1335	}
1336	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1337		*dp++ = IPOPT_EOL;
1338	return (optlen);
1339}
1340
1341/*
1342 * IP socket option processing.
1343 */
1344int
1345ip_ctloutput(so, sopt)
1346	struct socket *so;
1347	struct sockopt *sopt;
1348{
1349	struct	inpcb *inp = sotoinpcb(so);
1350	int	error, optval;
1351
1352	error = optval = 0;
1353	if (sopt->sopt_level != IPPROTO_IP) {
1354		return (EINVAL);
1355	}
1356
1357	switch (sopt->sopt_dir) {
1358	case SOPT_SET:
1359		switch (sopt->sopt_name) {
1360		case IP_OPTIONS:
1361#ifdef notyet
1362		case IP_RETOPTS:
1363#endif
1364		{
1365			struct mbuf *m;
1366			if (sopt->sopt_valsize > MLEN) {
1367				error = EMSGSIZE;
1368				break;
1369			}
1370			MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1371			if (m == 0) {
1372				error = ENOBUFS;
1373				break;
1374			}
1375			m->m_len = sopt->sopt_valsize;
1376			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1377					    m->m_len);
1378
1379			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1380					   m));
1381		}
1382
1383		case IP_TOS:
1384		case IP_TTL:
1385		case IP_RECVOPTS:
1386		case IP_RECVRETOPTS:
1387		case IP_RECVDSTADDR:
1388		case IP_RECVIF:
1389		case IP_FAITH:
1390			error = sooptcopyin(sopt, &optval, sizeof optval,
1391					    sizeof optval);
1392			if (error)
1393				break;
1394
1395			switch (sopt->sopt_name) {
1396			case IP_TOS:
1397				inp->inp_ip_tos = optval;
1398				break;
1399
1400			case IP_TTL:
1401				inp->inp_ip_ttl = optval;
1402				break;
1403#define	OPTSET(bit) \
1404	if (optval) \
1405		inp->inp_flags |= bit; \
1406	else \
1407		inp->inp_flags &= ~bit;
1408
1409			case IP_RECVOPTS:
1410				OPTSET(INP_RECVOPTS);
1411				break;
1412
1413			case IP_RECVRETOPTS:
1414				OPTSET(INP_RECVRETOPTS);
1415				break;
1416
1417			case IP_RECVDSTADDR:
1418				OPTSET(INP_RECVDSTADDR);
1419				break;
1420
1421			case IP_RECVIF:
1422				OPTSET(INP_RECVIF);
1423				break;
1424
1425			case IP_FAITH:
1426				OPTSET(INP_FAITH);
1427				break;
1428			}
1429			break;
1430#undef OPTSET
1431
1432		case IP_MULTICAST_IF:
1433		case IP_MULTICAST_VIF:
1434		case IP_MULTICAST_TTL:
1435		case IP_MULTICAST_LOOP:
1436		case IP_ADD_MEMBERSHIP:
1437		case IP_DROP_MEMBERSHIP:
1438			error = ip_setmoptions(sopt, &inp->inp_moptions);
1439			break;
1440
1441		case IP_PORTRANGE:
1442			error = sooptcopyin(sopt, &optval, sizeof optval,
1443					    sizeof optval);
1444			if (error)
1445				break;
1446
1447			switch (optval) {
1448			case IP_PORTRANGE_DEFAULT:
1449				inp->inp_flags &= ~(INP_LOWPORT);
1450				inp->inp_flags &= ~(INP_HIGHPORT);
1451				break;
1452
1453			case IP_PORTRANGE_HIGH:
1454				inp->inp_flags &= ~(INP_LOWPORT);
1455				inp->inp_flags |= INP_HIGHPORT;
1456				break;
1457
1458			case IP_PORTRANGE_LOW:
1459				inp->inp_flags &= ~(INP_HIGHPORT);
1460				inp->inp_flags |= INP_LOWPORT;
1461				break;
1462
1463			default:
1464				error = EINVAL;
1465				break;
1466			}
1467			break;
1468
1469#if defined(IPSEC) || defined(FAST_IPSEC)
1470		case IP_IPSEC_POLICY:
1471		{
1472			caddr_t req;
1473			size_t len = 0;
1474			int priv;
1475			struct mbuf *m;
1476			int optname;
1477
1478			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1479				break;
1480			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1481				break;
1482			priv = (sopt->sopt_td != NULL &&
1483				suser(sopt->sopt_td) != 0) ? 0 : 1;
1484			req = mtod(m, caddr_t);
1485			len = m->m_len;
1486			optname = sopt->sopt_name;
1487			error = ipsec4_set_policy(inp, optname, req, len, priv);
1488			m_freem(m);
1489			break;
1490		}
1491#endif /*IPSEC*/
1492
1493		default:
1494			error = ENOPROTOOPT;
1495			break;
1496		}
1497		break;
1498
1499	case SOPT_GET:
1500		switch (sopt->sopt_name) {
1501		case IP_OPTIONS:
1502		case IP_RETOPTS:
1503			if (inp->inp_options)
1504				error = sooptcopyout(sopt,
1505						     mtod(inp->inp_options,
1506							  char *),
1507						     inp->inp_options->m_len);
1508			else
1509				sopt->sopt_valsize = 0;
1510			break;
1511
1512		case IP_TOS:
1513		case IP_TTL:
1514		case IP_RECVOPTS:
1515		case IP_RECVRETOPTS:
1516		case IP_RECVDSTADDR:
1517		case IP_RECVIF:
1518		case IP_PORTRANGE:
1519		case IP_FAITH:
1520			switch (sopt->sopt_name) {
1521
1522			case IP_TOS:
1523				optval = inp->inp_ip_tos;
1524				break;
1525
1526			case IP_TTL:
1527				optval = inp->inp_ip_ttl;
1528				break;
1529
1530#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1531
1532			case IP_RECVOPTS:
1533				optval = OPTBIT(INP_RECVOPTS);
1534				break;
1535
1536			case IP_RECVRETOPTS:
1537				optval = OPTBIT(INP_RECVRETOPTS);
1538				break;
1539
1540			case IP_RECVDSTADDR:
1541				optval = OPTBIT(INP_RECVDSTADDR);
1542				break;
1543
1544			case IP_RECVIF:
1545				optval = OPTBIT(INP_RECVIF);
1546				break;
1547
1548			case IP_PORTRANGE:
1549				if (inp->inp_flags & INP_HIGHPORT)
1550					optval = IP_PORTRANGE_HIGH;
1551				else if (inp->inp_flags & INP_LOWPORT)
1552					optval = IP_PORTRANGE_LOW;
1553				else
1554					optval = 0;
1555				break;
1556
1557			case IP_FAITH:
1558				optval = OPTBIT(INP_FAITH);
1559				break;
1560			}
1561			error = sooptcopyout(sopt, &optval, sizeof optval);
1562			break;
1563
1564		case IP_MULTICAST_IF:
1565		case IP_MULTICAST_VIF:
1566		case IP_MULTICAST_TTL:
1567		case IP_MULTICAST_LOOP:
1568		case IP_ADD_MEMBERSHIP:
1569		case IP_DROP_MEMBERSHIP:
1570			error = ip_getmoptions(sopt, inp->inp_moptions);
1571			break;
1572
1573#if defined(IPSEC) || defined(FAST_IPSEC)
1574		case IP_IPSEC_POLICY:
1575		{
1576			struct mbuf *m = NULL;
1577			caddr_t req = NULL;
1578			size_t len = 0;
1579
1580			if (m != 0) {
1581				req = mtod(m, caddr_t);
1582				len = m->m_len;
1583			}
1584			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1585			if (error == 0)
1586				error = soopt_mcopyout(sopt, m); /* XXX */
1587			if (error == 0)
1588				m_freem(m);
1589			break;
1590		}
1591#endif /*IPSEC*/
1592
1593		default:
1594			error = ENOPROTOOPT;
1595			break;
1596		}
1597		break;
1598	}
1599	return (error);
1600}
1601
1602/*
1603 * Set up IP options in pcb for insertion in output packets.
1604 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1605 * with destination address if source routed.
1606 */
1607static int
1608ip_pcbopts(optname, pcbopt, m)
1609	int optname;
1610	struct mbuf **pcbopt;
1611	register struct mbuf *m;
1612{
1613	register int cnt, optlen;
1614	register u_char *cp;
1615	u_char opt;
1616
1617	/* turn off any old options */
1618	if (*pcbopt)
1619		(void)m_free(*pcbopt);
1620	*pcbopt = 0;
1621	if (m == (struct mbuf *)0 || m->m_len == 0) {
1622		/*
1623		 * Only turning off any previous options.
1624		 */
1625		if (m)
1626			(void)m_free(m);
1627		return (0);
1628	}
1629
1630	if (m->m_len % sizeof(int32_t))
1631		goto bad;
1632	/*
1633	 * IP first-hop destination address will be stored before
1634	 * actual options; move other options back
1635	 * and clear it when none present.
1636	 */
1637	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1638		goto bad;
1639	cnt = m->m_len;
1640	m->m_len += sizeof(struct in_addr);
1641	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1642	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1643	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1644
1645	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1646		opt = cp[IPOPT_OPTVAL];
1647		if (opt == IPOPT_EOL)
1648			break;
1649		if (opt == IPOPT_NOP)
1650			optlen = 1;
1651		else {
1652			if (cnt < IPOPT_OLEN + sizeof(*cp))
1653				goto bad;
1654			optlen = cp[IPOPT_OLEN];
1655			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1656				goto bad;
1657		}
1658		switch (opt) {
1659
1660		default:
1661			break;
1662
1663		case IPOPT_LSRR:
1664		case IPOPT_SSRR:
1665			/*
1666			 * user process specifies route as:
1667			 *	->A->B->C->D
1668			 * D must be our final destination (but we can't
1669			 * check that since we may not have connected yet).
1670			 * A is first hop destination, which doesn't appear in
1671			 * actual IP option, but is stored before the options.
1672			 */
1673			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1674				goto bad;
1675			m->m_len -= sizeof(struct in_addr);
1676			cnt -= sizeof(struct in_addr);
1677			optlen -= sizeof(struct in_addr);
1678			cp[IPOPT_OLEN] = optlen;
1679			/*
1680			 * Move first hop before start of options.
1681			 */
1682			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1683			    sizeof(struct in_addr));
1684			/*
1685			 * Then copy rest of options back
1686			 * to close up the deleted entry.
1687			 */
1688			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1689			    sizeof(struct in_addr)),
1690			    (caddr_t)&cp[IPOPT_OFFSET+1],
1691			    (unsigned)cnt + sizeof(struct in_addr));
1692			break;
1693		}
1694	}
1695	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1696		goto bad;
1697	*pcbopt = m;
1698	return (0);
1699
1700bad:
1701	(void)m_free(m);
1702	return (EINVAL);
1703}
1704
1705/*
1706 * XXX
1707 * The whole multicast option thing needs to be re-thought.
1708 * Several of these options are equally applicable to non-multicast
1709 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1710 * standard option (IP_TTL).
1711 */
1712
1713/*
1714 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1715 */
1716static struct ifnet *
1717ip_multicast_if(a, ifindexp)
1718	struct in_addr *a;
1719	int *ifindexp;
1720{
1721	int ifindex;
1722	struct ifnet *ifp;
1723
1724	if (ifindexp)
1725		*ifindexp = 0;
1726	if (ntohl(a->s_addr) >> 24 == 0) {
1727		ifindex = ntohl(a->s_addr) & 0xffffff;
1728		if (ifindex < 0 || if_index < ifindex)
1729			return NULL;
1730		ifp = ifnet_byindex(ifindex);
1731		if (ifindexp)
1732			*ifindexp = ifindex;
1733	} else {
1734		INADDR_TO_IFP(*a, ifp);
1735	}
1736	return ifp;
1737}
1738
1739/*
1740 * Set the IP multicast options in response to user setsockopt().
1741 */
1742static int
1743ip_setmoptions(sopt, imop)
1744	struct sockopt *sopt;
1745	struct ip_moptions **imop;
1746{
1747	int error = 0;
1748	int i;
1749	struct in_addr addr;
1750	struct ip_mreq mreq;
1751	struct ifnet *ifp;
1752	struct ip_moptions *imo = *imop;
1753	struct route ro;
1754	struct sockaddr_in *dst;
1755	int ifindex;
1756	int s;
1757
1758	if (imo == NULL) {
1759		/*
1760		 * No multicast option buffer attached to the pcb;
1761		 * allocate one and initialize to default values.
1762		 */
1763		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1764		    M_WAITOK);
1765
1766		if (imo == NULL)
1767			return (ENOBUFS);
1768		*imop = imo;
1769		imo->imo_multicast_ifp = NULL;
1770		imo->imo_multicast_addr.s_addr = INADDR_ANY;
1771		imo->imo_multicast_vif = -1;
1772		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1773		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1774		imo->imo_num_memberships = 0;
1775	}
1776
1777	switch (sopt->sopt_name) {
1778	/* store an index number for the vif you wanna use in the send */
1779	case IP_MULTICAST_VIF:
1780		if (legal_vif_num == 0) {
1781			error = EOPNOTSUPP;
1782			break;
1783		}
1784		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1785		if (error)
1786			break;
1787		if (!legal_vif_num(i) && (i != -1)) {
1788			error = EINVAL;
1789			break;
1790		}
1791		imo->imo_multicast_vif = i;
1792		break;
1793
1794	case IP_MULTICAST_IF:
1795		/*
1796		 * Select the interface for outgoing multicast packets.
1797		 */
1798		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1799		if (error)
1800			break;
1801		/*
1802		 * INADDR_ANY is used to remove a previous selection.
1803		 * When no interface is selected, a default one is
1804		 * chosen every time a multicast packet is sent.
1805		 */
1806		if (addr.s_addr == INADDR_ANY) {
1807			imo->imo_multicast_ifp = NULL;
1808			break;
1809		}
1810		/*
1811		 * The selected interface is identified by its local
1812		 * IP address.  Find the interface and confirm that
1813		 * it supports multicasting.
1814		 */
1815		s = splimp();
1816		ifp = ip_multicast_if(&addr, &ifindex);
1817		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1818			splx(s);
1819			error = EADDRNOTAVAIL;
1820			break;
1821		}
1822		imo->imo_multicast_ifp = ifp;
1823		if (ifindex)
1824			imo->imo_multicast_addr = addr;
1825		else
1826			imo->imo_multicast_addr.s_addr = INADDR_ANY;
1827		splx(s);
1828		break;
1829
1830	case IP_MULTICAST_TTL:
1831		/*
1832		 * Set the IP time-to-live for outgoing multicast packets.
1833		 * The original multicast API required a char argument,
1834		 * which is inconsistent with the rest of the socket API.
1835		 * We allow either a char or an int.
1836		 */
1837		if (sopt->sopt_valsize == 1) {
1838			u_char ttl;
1839			error = sooptcopyin(sopt, &ttl, 1, 1);
1840			if (error)
1841				break;
1842			imo->imo_multicast_ttl = ttl;
1843		} else {
1844			u_int ttl;
1845			error = sooptcopyin(sopt, &ttl, sizeof ttl,
1846					    sizeof ttl);
1847			if (error)
1848				break;
1849			if (ttl > 255)
1850				error = EINVAL;
1851			else
1852				imo->imo_multicast_ttl = ttl;
1853		}
1854		break;
1855
1856	case IP_MULTICAST_LOOP:
1857		/*
1858		 * Set the loopback flag for outgoing multicast packets.
1859		 * Must be zero or one.  The original multicast API required a
1860		 * char argument, which is inconsistent with the rest
1861		 * of the socket API.  We allow either a char or an int.
1862		 */
1863		if (sopt->sopt_valsize == 1) {
1864			u_char loop;
1865			error = sooptcopyin(sopt, &loop, 1, 1);
1866			if (error)
1867				break;
1868			imo->imo_multicast_loop = !!loop;
1869		} else {
1870			u_int loop;
1871			error = sooptcopyin(sopt, &loop, sizeof loop,
1872					    sizeof loop);
1873			if (error)
1874				break;
1875			imo->imo_multicast_loop = !!loop;
1876		}
1877		break;
1878
1879	case IP_ADD_MEMBERSHIP:
1880		/*
1881		 * Add a multicast group membership.
1882		 * Group must be a valid IP multicast address.
1883		 */
1884		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1885		if (error)
1886			break;
1887
1888		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1889			error = EINVAL;
1890			break;
1891		}
1892		s = splimp();
1893		/*
1894		 * If no interface address was provided, use the interface of
1895		 * the route to the given multicast address.
1896		 */
1897		if (mreq.imr_interface.s_addr == INADDR_ANY) {
1898			bzero((caddr_t)&ro, sizeof(ro));
1899			dst = (struct sockaddr_in *)&ro.ro_dst;
1900			dst->sin_len = sizeof(*dst);
1901			dst->sin_family = AF_INET;
1902			dst->sin_addr = mreq.imr_multiaddr;
1903			rtalloc(&ro);
1904			if (ro.ro_rt == NULL) {
1905				error = EADDRNOTAVAIL;
1906				splx(s);
1907				break;
1908			}
1909			ifp = ro.ro_rt->rt_ifp;
1910			rtfree(ro.ro_rt);
1911		}
1912		else {
1913			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1914		}
1915
1916		/*
1917		 * See if we found an interface, and confirm that it
1918		 * supports multicast.
1919		 */
1920		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1921			error = EADDRNOTAVAIL;
1922			splx(s);
1923			break;
1924		}
1925		/*
1926		 * See if the membership already exists or if all the
1927		 * membership slots are full.
1928		 */
1929		for (i = 0; i < imo->imo_num_memberships; ++i) {
1930			if (imo->imo_membership[i]->inm_ifp == ifp &&
1931			    imo->imo_membership[i]->inm_addr.s_addr
1932						== mreq.imr_multiaddr.s_addr)
1933				break;
1934		}
1935		if (i < imo->imo_num_memberships) {
1936			error = EADDRINUSE;
1937			splx(s);
1938			break;
1939		}
1940		if (i == IP_MAX_MEMBERSHIPS) {
1941			error = ETOOMANYREFS;
1942			splx(s);
1943			break;
1944		}
1945		/*
1946		 * Everything looks good; add a new record to the multicast
1947		 * address list for the given interface.
1948		 */
1949		if ((imo->imo_membership[i] =
1950		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1951			error = ENOBUFS;
1952			splx(s);
1953			break;
1954		}
1955		++imo->imo_num_memberships;
1956		splx(s);
1957		break;
1958
1959	case IP_DROP_MEMBERSHIP:
1960		/*
1961		 * Drop a multicast group membership.
1962		 * Group must be a valid IP multicast address.
1963		 */
1964		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1965		if (error)
1966			break;
1967
1968		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1969			error = EINVAL;
1970			break;
1971		}
1972
1973		s = splimp();
1974		/*
1975		 * If an interface address was specified, get a pointer
1976		 * to its ifnet structure.
1977		 */
1978		if (mreq.imr_interface.s_addr == INADDR_ANY)
1979			ifp = NULL;
1980		else {
1981			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1982			if (ifp == NULL) {
1983				error = EADDRNOTAVAIL;
1984				splx(s);
1985				break;
1986			}
1987		}
1988		/*
1989		 * Find the membership in the membership array.
1990		 */
1991		for (i = 0; i < imo->imo_num_memberships; ++i) {
1992			if ((ifp == NULL ||
1993			     imo->imo_membership[i]->inm_ifp == ifp) &&
1994			     imo->imo_membership[i]->inm_addr.s_addr ==
1995			     mreq.imr_multiaddr.s_addr)
1996				break;
1997		}
1998		if (i == imo->imo_num_memberships) {
1999			error = EADDRNOTAVAIL;
2000			splx(s);
2001			break;
2002		}
2003		/*
2004		 * Give up the multicast address record to which the
2005		 * membership points.
2006		 */
2007		in_delmulti(imo->imo_membership[i]);
2008		/*
2009		 * Remove the gap in the membership array.
2010		 */
2011		for (++i; i < imo->imo_num_memberships; ++i)
2012			imo->imo_membership[i-1] = imo->imo_membership[i];
2013		--imo->imo_num_memberships;
2014		splx(s);
2015		break;
2016
2017	default:
2018		error = EOPNOTSUPP;
2019		break;
2020	}
2021
2022	/*
2023	 * If all options have default values, no need to keep the mbuf.
2024	 */
2025	if (imo->imo_multicast_ifp == NULL &&
2026	    imo->imo_multicast_vif == -1 &&
2027	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2028	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2029	    imo->imo_num_memberships == 0) {
2030		free(*imop, M_IPMOPTS);
2031		*imop = NULL;
2032	}
2033
2034	return (error);
2035}
2036
2037/*
2038 * Return the IP multicast options in response to user getsockopt().
2039 */
2040static int
2041ip_getmoptions(sopt, imo)
2042	struct sockopt *sopt;
2043	register struct ip_moptions *imo;
2044{
2045	struct in_addr addr;
2046	struct in_ifaddr *ia;
2047	int error, optval;
2048	u_char coptval;
2049
2050	error = 0;
2051	switch (sopt->sopt_name) {
2052	case IP_MULTICAST_VIF:
2053		if (imo != NULL)
2054			optval = imo->imo_multicast_vif;
2055		else
2056			optval = -1;
2057		error = sooptcopyout(sopt, &optval, sizeof optval);
2058		break;
2059
2060	case IP_MULTICAST_IF:
2061		if (imo == NULL || imo->imo_multicast_ifp == NULL)
2062			addr.s_addr = INADDR_ANY;
2063		else if (imo->imo_multicast_addr.s_addr) {
2064			/* return the value user has set */
2065			addr = imo->imo_multicast_addr;
2066		} else {
2067			IFP_TO_IA(imo->imo_multicast_ifp, ia);
2068			addr.s_addr = (ia == NULL) ? INADDR_ANY
2069				: IA_SIN(ia)->sin_addr.s_addr;
2070		}
2071		error = sooptcopyout(sopt, &addr, sizeof addr);
2072		break;
2073
2074	case IP_MULTICAST_TTL:
2075		if (imo == 0)
2076			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2077		else
2078			optval = coptval = imo->imo_multicast_ttl;
2079		if (sopt->sopt_valsize == 1)
2080			error = sooptcopyout(sopt, &coptval, 1);
2081		else
2082			error = sooptcopyout(sopt, &optval, sizeof optval);
2083		break;
2084
2085	case IP_MULTICAST_LOOP:
2086		if (imo == 0)
2087			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2088		else
2089			optval = coptval = imo->imo_multicast_loop;
2090		if (sopt->sopt_valsize == 1)
2091			error = sooptcopyout(sopt, &coptval, 1);
2092		else
2093			error = sooptcopyout(sopt, &optval, sizeof optval);
2094		break;
2095
2096	default:
2097		error = ENOPROTOOPT;
2098		break;
2099	}
2100	return (error);
2101}
2102
2103/*
2104 * Discard the IP multicast options.
2105 */
2106void
2107ip_freemoptions(imo)
2108	register struct ip_moptions *imo;
2109{
2110	register int i;
2111
2112	if (imo != NULL) {
2113		for (i = 0; i < imo->imo_num_memberships; ++i)
2114			in_delmulti(imo->imo_membership[i]);
2115		free(imo, M_IPMOPTS);
2116	}
2117}
2118
2119/*
2120 * Routine called from ip_output() to loop back a copy of an IP multicast
2121 * packet to the input queue of a specified interface.  Note that this
2122 * calls the output routine of the loopback "driver", but with an interface
2123 * pointer that might NOT be a loopback interface -- evil, but easier than
2124 * replicating that code here.
2125 */
2126static void
2127ip_mloopback(ifp, m, dst, hlen)
2128	struct ifnet *ifp;
2129	register struct mbuf *m;
2130	register struct sockaddr_in *dst;
2131	int hlen;
2132{
2133	register struct ip *ip;
2134	struct mbuf *copym;
2135
2136	copym = m_copy(m, 0, M_COPYALL);
2137	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2138		copym = m_pullup(copym, hlen);
2139	if (copym != NULL) {
2140		/*
2141		 * We don't bother to fragment if the IP length is greater
2142		 * than the interface's MTU.  Can this possibly matter?
2143		 */
2144		ip = mtod(copym, struct ip *);
2145		ip->ip_len = htons(ip->ip_len);
2146		ip->ip_off = htons(ip->ip_off);
2147		ip->ip_sum = 0;
2148		ip->ip_sum = in_cksum(copym, hlen);
2149		/*
2150		 * NB:
2151		 * It's not clear whether there are any lingering
2152		 * reentrancy problems in other areas which might
2153		 * be exposed by using ip_input directly (in
2154		 * particular, everything which modifies the packet
2155		 * in-place).  Yet another option is using the
2156		 * protosw directly to deliver the looped back
2157		 * packet.  For the moment, we'll err on the side
2158		 * of safety by using if_simloop().
2159		 */
2160#if 1 /* XXX */
2161		if (dst->sin_family != AF_INET) {
2162			printf("ip_mloopback: bad address family %d\n",
2163						dst->sin_family);
2164			dst->sin_family = AF_INET;
2165		}
2166#endif
2167
2168#ifdef notdef
2169		copym->m_pkthdr.rcvif = ifp;
2170		ip_input(copym);
2171#else
2172		/* if the checksum hasn't been computed, mark it as valid */
2173		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2174			copym->m_pkthdr.csum_flags |=
2175			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2176			copym->m_pkthdr.csum_data = 0xffff;
2177		}
2178		if_simloop(ifp, copym, dst->sin_family, 0);
2179#endif
2180	}
2181}
2182