ip_output.c revision 57114
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 57114 2000-02-10 14:19:53Z luigi $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43#include "opt_ipsec.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/proc.h>
54
55#include <net/if.h>
56#include <net/route.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/ip.h>
61#include <netinet/in_pcb.h>
62#include <netinet/in_var.h>
63#include <netinet/ip_var.h>
64
65#include "faith.h"
66
67#ifdef vax
68#include <machine/mtpr.h>
69#endif
70#include <machine/in_cksum.h>
71
72static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
73
74#ifdef IPSEC
75#include <netinet6/ipsec.h>
76#include <netkey/key.h>
77#ifdef IPSEC_DEBUG
78#include <netkey/key_debug.h>
79#else
80#define	KEYDEBUG(lev,arg)
81#endif
82#endif /*IPSEC*/
83
84#include <netinet/ip_fw.h>
85
86#ifdef DUMMYNET
87#include <netinet/ip_dummynet.h>
88#endif
89
90#ifdef IPFIREWALL_FORWARD_DEBUG
91#define print_ip(a)	 printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
92				 		  (ntohl(a.s_addr)>>16)&0xFF,\
93						  (ntohl(a.s_addr)>>8)&0xFF,\
94						  (ntohl(a.s_addr))&0xFF);
95#endif
96
97u_short ip_id;
98
99static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
100static void	ip_mloopback
101	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
102static int	ip_getmoptions
103	__P((struct sockopt *, struct ip_moptions *));
104static int	ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
105static int	ip_setmoptions
106	__P((struct sockopt *, struct ip_moptions **));
107
108#if defined(IPFILTER_LKM) || defined(IPFILTER)
109int	ip_optcopy __P((struct ip *, struct ip *));
110extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
111#else
112static int	ip_optcopy __P((struct ip *, struct ip *));
113#endif
114
115
116extern	struct protosw inetsw[];
117
118/*
119 * IP output.  The packet in mbuf chain m contains a skeletal IP
120 * header (with len, off, ttl, proto, tos, src, dst).
121 * The mbuf chain containing the packet will be freed.
122 * The mbuf opt, if present, will not be freed.
123 */
124int
125ip_output(m0, opt, ro, flags, imo)
126	struct mbuf *m0;
127	struct mbuf *opt;
128	struct route *ro;
129	int flags;
130	struct ip_moptions *imo;
131{
132	struct ip *ip, *mhip;
133	struct ifnet *ifp;
134	struct mbuf *m = m0;
135	int hlen = sizeof (struct ip);
136	int len, off, error = 0;
137	struct sockaddr_in *dst;
138	struct in_ifaddr *ia;
139	int isbroadcast;
140#ifdef IPSEC
141	struct route iproute;
142	struct socket *so = NULL;
143	struct secpolicy *sp = NULL;
144#endif
145	u_int16_t divert_cookie;		/* firewall cookie */
146#ifdef IPFIREWALL_FORWARD
147	int fwd_rewrite_src = 0;
148#endif
149	struct ip_fw_chain *rule = NULL;
150
151#ifdef IPDIVERT
152	/* Get and reset firewall cookie */
153	divert_cookie = ip_divert_cookie;
154	ip_divert_cookie = 0;
155#else
156	divert_cookie = 0;
157#endif
158
159	/*
160	 * NOTE: If IP_SOCKINMRCVIF flag is set, 'socket *' is kept in
161	 * m->m_pkthdr.rcvif for later IPSEC check. In this case,
162	 * m->m_pkthdr will be NULL cleared after the contents is saved in
163	 * 'so'.
164	 * NULL clearance of rcvif should be natural because the packet should
165	 * have been sent from my own socket and has no rcvif in this case.
166	 * It is also necessary because someone might consider it as
167	 * 'ifnet *', and cause SEGV.
168	 */
169#if defined(IPFIREWALL) && defined(DUMMYNET)
170        /*
171         * dummynet packet are prepended a vestigial mbuf with
172         * m_type = MT_DUMMYNET and m_data pointing to the matching
173         * rule.
174         */
175        if (m->m_type == MT_DUMMYNET) {
176            /*
177             * the packet was already tagged, so part of the
178             * processing was already done, and we need to go down.
179             * Get parameters from the header.
180             */
181            rule = (struct ip_fw_chain *)(m->m_data) ;
182	    opt = NULL ;
183	    ro = & ( ((struct dn_pkt *)m)->ro ) ;
184	    imo = NULL ;
185	    dst = ((struct dn_pkt *)m)->dn_dst ;
186	    ifp = ((struct dn_pkt *)m)->ifp ;
187	    flags = ((struct dn_pkt *)m)->flags ;
188
189            m0 = m = m->m_next ;
190#ifdef IPSEC
191	    if ((flags & IP_SOCKINMRCVIF) != 0) {
192	        so = (struct socket *)m->m_pkthdr.rcvif;
193	        m->m_pkthdr.rcvif = NULL;
194	    }
195#endif
196            ip = mtod(m, struct ip *);
197            hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
198            goto sendit;
199        } else
200            rule = NULL ;
201#endif
202#ifdef IPSEC
203	if ((flags & IP_SOCKINMRCVIF) != 0) {
204		so = (struct socket *)m->m_pkthdr.rcvif;
205		m->m_pkthdr.rcvif = NULL;
206	}
207#endif
208
209#ifdef	DIAGNOSTIC
210	if ((m->m_flags & M_PKTHDR) == 0)
211		panic("ip_output no HDR");
212	if (!ro)
213		panic("ip_output no route, proto = %d",
214		      mtod(m, struct ip *)->ip_p);
215#endif
216	if (opt) {
217		m = ip_insertoptions(m, opt, &len);
218		hlen = len;
219	}
220	ip = mtod(m, struct ip *);
221	/*
222	 * Fill in IP header.
223	 */
224	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
225		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
226		ip->ip_off &= IP_DF;
227		ip->ip_id = htons(ip_id++);
228		ipstat.ips_localout++;
229	} else {
230		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
231	}
232
233	dst = (struct sockaddr_in *)&ro->ro_dst;
234	/*
235	 * If there is a cached route,
236	 * check that it is to the same destination
237	 * and is still up.  If not, free it and try again.
238	 */
239	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
240	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
241		RTFREE(ro->ro_rt);
242		ro->ro_rt = (struct rtentry *)0;
243	}
244	if (ro->ro_rt == 0) {
245		dst->sin_family = AF_INET;
246		dst->sin_len = sizeof(*dst);
247		dst->sin_addr = ip->ip_dst;
248	}
249	/*
250	 * If routing to interface only,
251	 * short circuit routing lookup.
252	 */
253#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
254#define sintosa(sin)	((struct sockaddr *)(sin))
255	if (flags & IP_ROUTETOIF) {
256		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
257		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
258			ipstat.ips_noroute++;
259			error = ENETUNREACH;
260			goto bad;
261		}
262		ifp = ia->ia_ifp;
263		ip->ip_ttl = 1;
264		isbroadcast = in_broadcast(dst->sin_addr, ifp);
265	} else {
266		/*
267		 * If this is the case, we probably don't want to allocate
268		 * a protocol-cloned route since we didn't get one from the
269		 * ULP.  This lets TCP do its thing, while not burdening
270		 * forwarding or ICMP with the overhead of cloning a route.
271		 * Of course, we still want to do any cloning requested by
272		 * the link layer, as this is probably required in all cases
273		 * for correct operation (as it is for ARP).
274		 */
275		if (ro->ro_rt == 0)
276			rtalloc_ign(ro, RTF_PRCLONING);
277		if (ro->ro_rt == 0) {
278			ipstat.ips_noroute++;
279			error = EHOSTUNREACH;
280			goto bad;
281		}
282		ia = ifatoia(ro->ro_rt->rt_ifa);
283		ifp = ro->ro_rt->rt_ifp;
284		ro->ro_rt->rt_use++;
285		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
286			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
287		if (ro->ro_rt->rt_flags & RTF_HOST)
288			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
289		else
290			isbroadcast = in_broadcast(dst->sin_addr, ifp);
291	}
292	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
293		struct in_multi *inm;
294
295		m->m_flags |= M_MCAST;
296		/*
297		 * IP destination address is multicast.  Make sure "dst"
298		 * still points to the address in "ro".  (It may have been
299		 * changed to point to a gateway address, above.)
300		 */
301		dst = (struct sockaddr_in *)&ro->ro_dst;
302		/*
303		 * See if the caller provided any multicast options
304		 */
305		if (imo != NULL) {
306			ip->ip_ttl = imo->imo_multicast_ttl;
307			if (imo->imo_multicast_ifp != NULL)
308				ifp = imo->imo_multicast_ifp;
309			if (imo->imo_multicast_vif != -1)
310				ip->ip_src.s_addr =
311				    ip_mcast_src(imo->imo_multicast_vif);
312		} else
313			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
314		/*
315		 * Confirm that the outgoing interface supports multicast.
316		 */
317		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
318			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
319				ipstat.ips_noroute++;
320				error = ENETUNREACH;
321				goto bad;
322			}
323		}
324		/*
325		 * If source address not specified yet, use address
326		 * of outgoing interface.
327		 */
328		if (ip->ip_src.s_addr == INADDR_ANY) {
329			register struct in_ifaddr *ia1;
330
331			for (ia1 = in_ifaddrhead.tqh_first; ia1;
332			     ia1 = ia1->ia_link.tqe_next)
333				if (ia1->ia_ifp == ifp) {
334					ip->ip_src = IA_SIN(ia1)->sin_addr;
335					break;
336				}
337		}
338
339		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
340		if (inm != NULL &&
341		   (imo == NULL || imo->imo_multicast_loop)) {
342			/*
343			 * If we belong to the destination multicast group
344			 * on the outgoing interface, and the caller did not
345			 * forbid loopback, loop back a copy.
346			 */
347			ip_mloopback(ifp, m, dst, hlen);
348		}
349		else {
350			/*
351			 * If we are acting as a multicast router, perform
352			 * multicast forwarding as if the packet had just
353			 * arrived on the interface to which we are about
354			 * to send.  The multicast forwarding function
355			 * recursively calls this function, using the
356			 * IP_FORWARDING flag to prevent infinite recursion.
357			 *
358			 * Multicasts that are looped back by ip_mloopback(),
359			 * above, will be forwarded by the ip_input() routine,
360			 * if necessary.
361			 */
362			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
363				/*
364				 * Check if rsvp daemon is running. If not, don't
365				 * set ip_moptions. This ensures that the packet
366				 * is multicast and not just sent down one link
367				 * as prescribed by rsvpd.
368				 */
369				if (!rsvp_on)
370				  imo = NULL;
371				if (ip_mforward(ip, ifp, m, imo) != 0) {
372					m_freem(m);
373					goto done;
374				}
375			}
376		}
377
378		/*
379		 * Multicasts with a time-to-live of zero may be looped-
380		 * back, above, but must not be transmitted on a network.
381		 * Also, multicasts addressed to the loopback interface
382		 * are not sent -- the above call to ip_mloopback() will
383		 * loop back a copy if this host actually belongs to the
384		 * destination group on the loopback interface.
385		 */
386		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
387			m_freem(m);
388			goto done;
389		}
390
391		goto sendit;
392	}
393#ifndef notdef
394	/*
395	 * If source address not specified yet, use address
396	 * of outgoing interface.
397	 */
398	if (ip->ip_src.s_addr == INADDR_ANY) {
399		ip->ip_src = IA_SIN(ia)->sin_addr;
400#ifdef IPFIREWALL_FORWARD
401		/* Keep note that we did this - if the firewall changes
402		 * the next-hop, our interface may change, changing the
403		 * default source IP. It's a shame so much effort happens
404		 * twice. Oh well.
405		 */
406		fwd_rewrite_src++;
407#endif /* IPFIREWALL_FORWARD */
408	}
409#endif /* notdef */
410	/*
411	 * Verify that we have any chance at all of being able to queue
412	 *      the packet or packet fragments
413	 */
414	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
415		ifp->if_snd.ifq_maxlen) {
416			error = ENOBUFS;
417			goto bad;
418	}
419
420	/*
421	 * Look for broadcast address and
422	 * and verify user is allowed to send
423	 * such a packet.
424	 */
425	if (isbroadcast) {
426		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
427			error = EADDRNOTAVAIL;
428			goto bad;
429		}
430		if ((flags & IP_ALLOWBROADCAST) == 0) {
431			error = EACCES;
432			goto bad;
433		}
434		/* don't allow broadcast messages to be fragmented */
435		if ((u_short)ip->ip_len > ifp->if_mtu) {
436			error = EMSGSIZE;
437			goto bad;
438		}
439		m->m_flags |= M_BCAST;
440	} else {
441		m->m_flags &= ~M_BCAST;
442	}
443
444sendit:
445	/*
446	 * IpHack's section.
447	 * - Xlate: translate packet's addr/port (NAT).
448	 * - Firewall: deny/allow/etc.
449	 * - Wrap: fake packet's addr/port <unimpl.>
450	 * - Encapsulate: put it in another IP and send out. <unimp.>
451	 */
452#if defined(IPFILTER) || defined(IPFILTER_LKM)
453	if (fr_checkp) {
454		struct  mbuf    *m1 = m;
455
456		if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
457			goto done;
458		ip = mtod(m = m1, struct ip *);
459	}
460#endif
461
462	/*
463	 * Check with the firewall...
464	 */
465	if (fw_enable && ip_fw_chk_ptr) {
466		struct sockaddr_in *old = dst;
467
468		off = (*ip_fw_chk_ptr)(&ip,
469		    hlen, ifp, &divert_cookie, &m, &rule, &dst);
470                /*
471                 * On return we must do the following:
472                 * m == NULL         -> drop the pkt
473                 * 1<=off<= 0xffff   -> DIVERT
474                 * (off & 0x10000)   -> send to a DUMMYNET pipe
475                 * (off & 0x20000)   -> TEE the packet
476                 * dst != old        -> IPFIREWALL_FORWARD
477                 * off==0, dst==old  -> accept
478                 * If some of the above modules is not compiled in, then
479                 * we should't have to check the corresponding condition
480                 * (because the ipfw control socket should not accept
481                 * unsupported rules), but better play safe and drop
482                 * packets in case of doubt.
483                 */
484		if (!m) { /* firewall said to reject */
485			error = EACCES;
486			goto done;
487		}
488		if (off == 0 && dst == old) /* common case */
489			goto pass ;
490#ifdef DUMMYNET
491                if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
492                    /*
493                     * pass the pkt to dummynet. Need to include
494                     * pipe number, m, ifp, ro, dst because these are
495                     * not recomputed in the next pass.
496                     * All other parameters have been already used and
497                     * so they are not needed anymore.
498                     * XXX note: if the ifp or ro entry are deleted
499                     * while a pkt is in dummynet, we are in trouble!
500                     */
501                    dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,dst,rule,
502				flags);
503			goto done;
504		}
505#endif
506#ifdef IPDIVERT
507		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
508			struct mbuf *clone = NULL;
509
510			/* Clone packet if we're doing a 'tee' */
511			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
512				clone = m_dup(m, M_DONTWAIT);
513
514			/* Restore packet header fields to original values */
515			HTONS(ip->ip_len);
516			HTONS(ip->ip_off);
517
518			/* Deliver packet to divert input routine */
519			ip_divert_cookie = divert_cookie;
520			divert_packet(m, 0, off & 0xffff);
521
522			/* If 'tee', continue with original packet */
523			if (clone != NULL) {
524				m = clone;
525				ip = mtod(m, struct ip *);
526				goto pass;
527			}
528			goto done;
529		}
530#endif
531
532#ifdef IPFIREWALL_FORWARD
533		/* Here we check dst to make sure it's directly reachable on the
534		 * interface we previously thought it was.
535		 * If it isn't (which may be likely in some situations) we have
536		 * to re-route it (ie, find a route for the next-hop and the
537		 * associated interface) and set them here. This is nested
538		 * forwarding which in most cases is undesirable, except where
539		 * such control is nigh impossible. So we do it here.
540		 * And I'm babbling.
541		 */
542		if (off == 0 && old != dst) {
543			struct in_ifaddr *ia;
544
545			/* It's changed... */
546			/* There must be a better way to do this next line... */
547			static struct route sro_fwd, *ro_fwd = &sro_fwd;
548#ifdef IPFIREWALL_FORWARD_DEBUG
549			printf("IPFIREWALL_FORWARD: New dst ip: ");
550			print_ip(dst->sin_addr);
551			printf("\n");
552#endif
553			/*
554			 * We need to figure out if we have been forwarded
555			 * to a local socket. If so then we should somehow
556			 * "loop back" to ip_input, and get directed to the
557			 * PCB as if we had received this packet. This is
558			 * because it may be dificult to identify the packets
559			 * you want to forward until they are being output
560			 * and have selected an interface. (e.g. locally
561			 * initiated packets) If we used the loopback inteface,
562			 * we would not be able to control what happens
563			 * as the packet runs through ip_input() as
564			 * it is done through a ISR.
565			 */
566			for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
567					ia = TAILQ_NEXT(ia, ia_link)) {
568				/*
569				 * If the addr to forward to is one
570				 * of ours, we pretend to
571				 * be the destination for this packet.
572				 */
573				if (IA_SIN(ia)->sin_addr.s_addr ==
574						 dst->sin_addr.s_addr)
575					break;
576			}
577			if (ia) {
578				/* tell ip_input "dont filter" */
579				ip_fw_fwd_addr = dst;
580				if (m->m_pkthdr.rcvif == NULL)
581					m->m_pkthdr.rcvif = ifunit("lo0");
582				ip->ip_len = htons((u_short)ip->ip_len);
583				ip->ip_off = htons((u_short)ip->ip_off);
584				ip->ip_sum = 0;
585				if (ip->ip_vhl == IP_VHL_BORING) {
586					ip->ip_sum = in_cksum_hdr(ip);
587				} else {
588					ip->ip_sum = in_cksum(m, hlen);
589				}
590				ip_input(m);
591				goto done;
592			}
593			/* Some of the logic for this was
594			 * nicked from above.
595			 *
596			 * This rewrites the cached route in a local PCB.
597			 * Is this what we want to do?
598			 */
599			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
600
601			ro_fwd->ro_rt = 0;
602			rtalloc_ign(ro_fwd, RTF_PRCLONING);
603
604			if (ro_fwd->ro_rt == 0) {
605				ipstat.ips_noroute++;
606				error = EHOSTUNREACH;
607				goto bad;
608			}
609
610			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
611			ifp = ro_fwd->ro_rt->rt_ifp;
612			ro_fwd->ro_rt->rt_use++;
613			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
614				dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
615			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
616				isbroadcast =
617				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
618			else
619				isbroadcast = in_broadcast(dst->sin_addr, ifp);
620			RTFREE(ro->ro_rt);
621			ro->ro_rt = ro_fwd->ro_rt;
622			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
623
624			/*
625			 * If we added a default src ip earlier,
626			 * which would have been gotten from the-then
627			 * interface, do it again, from the new one.
628			 */
629			if (fwd_rewrite_src)
630				ip->ip_src = IA_SIN(ia)->sin_addr;
631			goto pass ;
632		}
633#endif /* IPFIREWALL_FORWARD */
634                /*
635                 * if we get here, none of the above matches, and
636                 * we have to drop the pkt
637                 */
638		m_freem(m);
639                error = EACCES; /* not sure this is the right error msg */
640                goto done;
641	}
642
643pass:
644#ifdef IPSEC
645	/* get SP for this packet */
646	if (so == NULL)
647		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
648	else
649		sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
650
651	if (sp == NULL) {
652		ipsecstat.out_inval++;
653		goto bad;
654	}
655
656	error = 0;
657
658	/* check policy */
659	switch (sp->policy) {
660	case IPSEC_POLICY_DISCARD:
661		/*
662		 * This packet is just discarded.
663		 */
664		ipsecstat.out_polvio++;
665		goto bad;
666
667	case IPSEC_POLICY_BYPASS:
668	case IPSEC_POLICY_NONE:
669		/* no need to do IPsec. */
670		goto skip_ipsec;
671
672	case IPSEC_POLICY_IPSEC:
673		if (sp->req == NULL) {
674			/* XXX should be panic ? */
675			printf("ip_output: No IPsec request specified.\n");
676			error = EINVAL;
677			goto bad;
678		}
679		break;
680
681	case IPSEC_POLICY_ENTRUST:
682	default:
683		printf("ip_output: Invalid policy found. %d\n", sp->policy);
684	}
685
686	ip->ip_len = htons((u_short)ip->ip_len);
687	ip->ip_off = htons((u_short)ip->ip_off);
688	ip->ip_sum = 0;
689
690    {
691	struct ipsec_output_state state;
692	bzero(&state, sizeof(state));
693	state.m = m;
694	if (flags & IP_ROUTETOIF) {
695		state.ro = &iproute;
696		bzero(&iproute, sizeof(iproute));
697	} else
698		state.ro = ro;
699	state.dst = (struct sockaddr *)dst;
700
701	error = ipsec4_output(&state, sp, flags);
702
703	m = state.m;
704	if (flags & IP_ROUTETOIF) {
705		/*
706		 * if we have tunnel mode SA, we may need to ignore
707		 * IP_ROUTETOIF.
708		 */
709		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
710			flags &= ~IP_ROUTETOIF;
711			ro = state.ro;
712		}
713	} else
714		ro = state.ro;
715	dst = (struct sockaddr_in *)state.dst;
716	if (error) {
717		/* mbuf is already reclaimed in ipsec4_output. */
718		m0 = NULL;
719		switch (error) {
720		case EHOSTUNREACH:
721		case ENETUNREACH:
722		case EMSGSIZE:
723		case ENOBUFS:
724		case ENOMEM:
725			break;
726		default:
727			printf("ip4_output (ipsec): error code %d\n", error);
728			/*fall through*/
729		case ENOENT:
730			/* don't show these error codes to the user */
731			error = 0;
732			break;
733		}
734		goto bad;
735	}
736    }
737
738	/* be sure to update variables that are affected by ipsec4_output() */
739	ip = mtod(m, struct ip *);
740#ifdef _IP_VHL
741	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
742#else
743	hlen = ip->ip_hl << 2;
744#endif
745	if (ro->ro_rt == NULL) {
746		if ((flags & IP_ROUTETOIF) == 0) {
747			printf("ip_output: "
748				"can't update route after IPsec processing\n");
749			error = EHOSTUNREACH;	/*XXX*/
750			goto bad;
751		}
752	} else {
753		/* nobody uses ia beyond here */
754		ifp = ro->ro_rt->rt_ifp;
755	}
756
757	/* make it flipped, again. */
758	ip->ip_len = ntohs((u_short)ip->ip_len);
759	ip->ip_off = ntohs((u_short)ip->ip_off);
760skip_ipsec:
761#endif /*IPSEC*/
762
763	/*
764	 * If small enough for interface, can just send directly.
765	 */
766	if ((u_short)ip->ip_len <= ifp->if_mtu) {
767		ip->ip_len = htons((u_short)ip->ip_len);
768		ip->ip_off = htons((u_short)ip->ip_off);
769		ip->ip_sum = 0;
770		if (ip->ip_vhl == IP_VHL_BORING) {
771			ip->ip_sum = in_cksum_hdr(ip);
772		} else {
773			ip->ip_sum = in_cksum(m, hlen);
774		}
775		error = (*ifp->if_output)(ifp, m,
776				(struct sockaddr *)dst, ro->ro_rt);
777		goto done;
778	}
779	/*
780	 * Too large for interface; fragment if possible.
781	 * Must be able to put at least 8 bytes per fragment.
782	 */
783	if (ip->ip_off & IP_DF) {
784		error = EMSGSIZE;
785		/*
786		 * This case can happen if the user changed the MTU
787		 * of an interface after enabling IP on it.  Because
788		 * most netifs don't keep track of routes pointing to
789		 * them, there is no way for one to update all its
790		 * routes when the MTU is changed.
791		 */
792		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
793		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
794		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
795			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
796		}
797		ipstat.ips_cantfrag++;
798		goto bad;
799	}
800	len = (ifp->if_mtu - hlen) &~ 7;
801	if (len < 8) {
802		error = EMSGSIZE;
803		goto bad;
804	}
805
806    {
807	int mhlen, firstlen = len;
808	struct mbuf **mnext = &m->m_nextpkt;
809
810	/*
811	 * Loop through length of segment after first fragment,
812	 * make new header and copy data of each part and link onto chain.
813	 */
814	m0 = m;
815	mhlen = sizeof (struct ip);
816	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
817		MGETHDR(m, M_DONTWAIT, MT_HEADER);
818		if (m == 0) {
819			error = ENOBUFS;
820			ipstat.ips_odropped++;
821			goto sendorfree;
822		}
823		m->m_flags |= (m0->m_flags & M_MCAST);
824		m->m_data += max_linkhdr;
825		mhip = mtod(m, struct ip *);
826		*mhip = *ip;
827		if (hlen > sizeof (struct ip)) {
828			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
829			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
830		}
831		m->m_len = mhlen;
832		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
833		if (ip->ip_off & IP_MF)
834			mhip->ip_off |= IP_MF;
835		if (off + len >= (u_short)ip->ip_len)
836			len = (u_short)ip->ip_len - off;
837		else
838			mhip->ip_off |= IP_MF;
839		mhip->ip_len = htons((u_short)(len + mhlen));
840		m->m_next = m_copy(m0, off, len);
841		if (m->m_next == 0) {
842			(void) m_free(m);
843			error = ENOBUFS;	/* ??? */
844			ipstat.ips_odropped++;
845			goto sendorfree;
846		}
847		m->m_pkthdr.len = mhlen + len;
848		m->m_pkthdr.rcvif = (struct ifnet *)0;
849		mhip->ip_off = htons((u_short)mhip->ip_off);
850		mhip->ip_sum = 0;
851		if (mhip->ip_vhl == IP_VHL_BORING) {
852			mhip->ip_sum = in_cksum_hdr(mhip);
853		} else {
854			mhip->ip_sum = in_cksum(m, mhlen);
855		}
856		*mnext = m;
857		mnext = &m->m_nextpkt;
858		ipstat.ips_ofragments++;
859	}
860	/*
861	 * Update first fragment by trimming what's been copied out
862	 * and updating header, then send each fragment (in order).
863	 */
864	m = m0;
865	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
866	m->m_pkthdr.len = hlen + firstlen;
867	ip->ip_len = htons((u_short)m->m_pkthdr.len);
868	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
869	ip->ip_sum = 0;
870	if (ip->ip_vhl == IP_VHL_BORING) {
871		ip->ip_sum = in_cksum_hdr(ip);
872	} else {
873		ip->ip_sum = in_cksum(m, hlen);
874	}
875sendorfree:
876	for (m = m0; m; m = m0) {
877		m0 = m->m_nextpkt;
878		m->m_nextpkt = 0;
879		if (error == 0)
880			error = (*ifp->if_output)(ifp, m,
881			    (struct sockaddr *)dst, ro->ro_rt);
882		else
883			m_freem(m);
884	}
885
886	if (error == 0)
887		ipstat.ips_fragmented++;
888    }
889done:
890#ifdef IPSEC
891	if (ro == &iproute && ro->ro_rt) {
892		RTFREE(ro->ro_rt);
893		ro->ro_rt = NULL;
894	}
895	if (sp != NULL) {
896		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
897			printf("DP ip_output call free SP:%p\n", sp));
898		key_freesp(sp);
899	}
900#endif /* IPSEC */
901	return (error);
902bad:
903	m_freem(m0);
904	goto done;
905}
906
907/*
908 * Insert IP options into preformed packet.
909 * Adjust IP destination as required for IP source routing,
910 * as indicated by a non-zero in_addr at the start of the options.
911 *
912 * XXX This routine assumes that the packet has no options in place.
913 */
914static struct mbuf *
915ip_insertoptions(m, opt, phlen)
916	register struct mbuf *m;
917	struct mbuf *opt;
918	int *phlen;
919{
920	register struct ipoption *p = mtod(opt, struct ipoption *);
921	struct mbuf *n;
922	register struct ip *ip = mtod(m, struct ip *);
923	unsigned optlen;
924
925	optlen = opt->m_len - sizeof(p->ipopt_dst);
926	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
927		return (m);		/* XXX should fail */
928	if (p->ipopt_dst.s_addr)
929		ip->ip_dst = p->ipopt_dst;
930	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
931		MGETHDR(n, M_DONTWAIT, MT_HEADER);
932		if (n == 0)
933			return (m);
934		n->m_pkthdr.rcvif = (struct ifnet *)0;
935		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
936		m->m_len -= sizeof(struct ip);
937		m->m_data += sizeof(struct ip);
938		n->m_next = m;
939		m = n;
940		m->m_len = optlen + sizeof(struct ip);
941		m->m_data += max_linkhdr;
942		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
943	} else {
944		m->m_data -= optlen;
945		m->m_len += optlen;
946		m->m_pkthdr.len += optlen;
947		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
948	}
949	ip = mtod(m, struct ip *);
950	bcopy(p->ipopt_list, ip + 1, optlen);
951	*phlen = sizeof(struct ip) + optlen;
952	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
953	ip->ip_len += optlen;
954	return (m);
955}
956
957/*
958 * Copy options from ip to jp,
959 * omitting those not copied during fragmentation.
960 */
961#if !defined(IPFILTER) && !defined(IPFILTER_LKM)
962static
963#endif
964int
965ip_optcopy(ip, jp)
966	struct ip *ip, *jp;
967{
968	register u_char *cp, *dp;
969	int opt, optlen, cnt;
970
971	cp = (u_char *)(ip + 1);
972	dp = (u_char *)(jp + 1);
973	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
974	for (; cnt > 0; cnt -= optlen, cp += optlen) {
975		opt = cp[0];
976		if (opt == IPOPT_EOL)
977			break;
978		if (opt == IPOPT_NOP) {
979			/* Preserve for IP mcast tunnel's LSRR alignment. */
980			*dp++ = IPOPT_NOP;
981			optlen = 1;
982			continue;
983		} else
984			optlen = cp[IPOPT_OLEN];
985		/* bogus lengths should have been caught by ip_dooptions */
986		if (optlen > cnt)
987			optlen = cnt;
988		if (IPOPT_COPIED(opt)) {
989			bcopy(cp, dp, optlen);
990			dp += optlen;
991		}
992	}
993	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
994		*dp++ = IPOPT_EOL;
995	return (optlen);
996}
997
998/*
999 * IP socket option processing.
1000 */
1001int
1002ip_ctloutput(so, sopt)
1003	struct socket *so;
1004	struct sockopt *sopt;
1005{
1006	struct	inpcb *inp = sotoinpcb(so);
1007	int	error, optval;
1008
1009	error = optval = 0;
1010	if (sopt->sopt_level != IPPROTO_IP) {
1011		return (EINVAL);
1012	}
1013
1014	switch (sopt->sopt_dir) {
1015	case SOPT_SET:
1016		switch (sopt->sopt_name) {
1017		case IP_OPTIONS:
1018#ifdef notyet
1019		case IP_RETOPTS:
1020#endif
1021		{
1022			struct mbuf *m;
1023			if (sopt->sopt_valsize > MLEN) {
1024				error = EMSGSIZE;
1025				break;
1026			}
1027			MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1028			if (m == 0) {
1029				error = ENOBUFS;
1030				break;
1031			}
1032			m->m_len = sopt->sopt_valsize;
1033			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1034					    m->m_len);
1035
1036			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1037					   m));
1038		}
1039
1040		case IP_TOS:
1041		case IP_TTL:
1042		case IP_RECVOPTS:
1043		case IP_RECVRETOPTS:
1044		case IP_RECVDSTADDR:
1045		case IP_RECVIF:
1046#if defined(NFAITH) && NFAITH > 0
1047		case IP_FAITH:
1048#endif
1049			error = sooptcopyin(sopt, &optval, sizeof optval,
1050					    sizeof optval);
1051			if (error)
1052				break;
1053
1054			switch (sopt->sopt_name) {
1055			case IP_TOS:
1056				inp->inp_ip_tos = optval;
1057				break;
1058
1059			case IP_TTL:
1060				inp->inp_ip_ttl = optval;
1061				break;
1062#define	OPTSET(bit) \
1063	if (optval) \
1064		inp->inp_flags |= bit; \
1065	else \
1066		inp->inp_flags &= ~bit;
1067
1068			case IP_RECVOPTS:
1069				OPTSET(INP_RECVOPTS);
1070				break;
1071
1072			case IP_RECVRETOPTS:
1073				OPTSET(INP_RECVRETOPTS);
1074				break;
1075
1076			case IP_RECVDSTADDR:
1077				OPTSET(INP_RECVDSTADDR);
1078				break;
1079
1080			case IP_RECVIF:
1081				OPTSET(INP_RECVIF);
1082				break;
1083
1084#if defined(NFAITH) && NFAITH > 0
1085			case IP_FAITH:
1086				OPTSET(INP_FAITH);
1087				break;
1088#endif
1089			}
1090			break;
1091#undef OPTSET
1092
1093		case IP_MULTICAST_IF:
1094		case IP_MULTICAST_VIF:
1095		case IP_MULTICAST_TTL:
1096		case IP_MULTICAST_LOOP:
1097		case IP_ADD_MEMBERSHIP:
1098		case IP_DROP_MEMBERSHIP:
1099			error = ip_setmoptions(sopt, &inp->inp_moptions);
1100			break;
1101
1102		case IP_PORTRANGE:
1103			error = sooptcopyin(sopt, &optval, sizeof optval,
1104					    sizeof optval);
1105			if (error)
1106				break;
1107
1108			switch (optval) {
1109			case IP_PORTRANGE_DEFAULT:
1110				inp->inp_flags &= ~(INP_LOWPORT);
1111				inp->inp_flags &= ~(INP_HIGHPORT);
1112				break;
1113
1114			case IP_PORTRANGE_HIGH:
1115				inp->inp_flags &= ~(INP_LOWPORT);
1116				inp->inp_flags |= INP_HIGHPORT;
1117				break;
1118
1119			case IP_PORTRANGE_LOW:
1120				inp->inp_flags &= ~(INP_HIGHPORT);
1121				inp->inp_flags |= INP_LOWPORT;
1122				break;
1123
1124			default:
1125				error = EINVAL;
1126				break;
1127			}
1128			break;
1129
1130#ifdef IPSEC
1131		case IP_IPSEC_POLICY:
1132		{
1133			caddr_t req;
1134			int priv;
1135			struct mbuf *m;
1136			int optname;
1137
1138			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1139				break;
1140			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1141				break;
1142			priv = (sopt->sopt_p != NULL &&
1143				suser(sopt->sopt_p) != 0) ? 0 : 1;
1144			req = mtod(m, caddr_t);
1145			optname = sopt->sopt_name;
1146			error = ipsec4_set_policy(inp, optname, req, priv);
1147			m_freem(m);
1148			break;
1149		}
1150#endif /*IPSEC*/
1151
1152		default:
1153			error = ENOPROTOOPT;
1154			break;
1155		}
1156		break;
1157
1158	case SOPT_GET:
1159		switch (sopt->sopt_name) {
1160		case IP_OPTIONS:
1161		case IP_RETOPTS:
1162			if (inp->inp_options)
1163				error = sooptcopyout(sopt,
1164						     mtod(inp->inp_options,
1165							  char *),
1166						     inp->inp_options->m_len);
1167			else
1168				sopt->sopt_valsize = 0;
1169			break;
1170
1171		case IP_TOS:
1172		case IP_TTL:
1173		case IP_RECVOPTS:
1174		case IP_RECVRETOPTS:
1175		case IP_RECVDSTADDR:
1176		case IP_RECVIF:
1177		case IP_PORTRANGE:
1178#if defined(NFAITH) && NFAITH > 0
1179		case IP_FAITH:
1180#endif
1181			switch (sopt->sopt_name) {
1182
1183			case IP_TOS:
1184				optval = inp->inp_ip_tos;
1185				break;
1186
1187			case IP_TTL:
1188				optval = inp->inp_ip_ttl;
1189				break;
1190
1191#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1192
1193			case IP_RECVOPTS:
1194				optval = OPTBIT(INP_RECVOPTS);
1195				break;
1196
1197			case IP_RECVRETOPTS:
1198				optval = OPTBIT(INP_RECVRETOPTS);
1199				break;
1200
1201			case IP_RECVDSTADDR:
1202				optval = OPTBIT(INP_RECVDSTADDR);
1203				break;
1204
1205			case IP_RECVIF:
1206				optval = OPTBIT(INP_RECVIF);
1207				break;
1208
1209			case IP_PORTRANGE:
1210				if (inp->inp_flags & INP_HIGHPORT)
1211					optval = IP_PORTRANGE_HIGH;
1212				else if (inp->inp_flags & INP_LOWPORT)
1213					optval = IP_PORTRANGE_LOW;
1214				else
1215					optval = 0;
1216				break;
1217
1218#if defined(NFAITH) && NFAITH > 0
1219			case IP_FAITH:
1220				optval = OPTBIT(INP_FAITH);
1221				break;
1222#endif
1223			}
1224			error = sooptcopyout(sopt, &optval, sizeof optval);
1225			break;
1226
1227		case IP_MULTICAST_IF:
1228		case IP_MULTICAST_VIF:
1229		case IP_MULTICAST_TTL:
1230		case IP_MULTICAST_LOOP:
1231		case IP_ADD_MEMBERSHIP:
1232		case IP_DROP_MEMBERSHIP:
1233			error = ip_getmoptions(sopt, inp->inp_moptions);
1234			break;
1235
1236#ifdef IPSEC
1237		case IP_IPSEC_POLICY:
1238		{
1239			struct mbuf *m;
1240			caddr_t req = NULL;
1241
1242			if (m != 0)
1243				req = mtod(m, caddr_t);
1244			error = ipsec4_get_policy(sotoinpcb(so), req, &m);
1245			if (error == 0)
1246				error = soopt_mcopyout(sopt, m); /* XXX */
1247			m_freem(m);
1248			break;
1249		}
1250#endif /*IPSEC*/
1251
1252		default:
1253			error = ENOPROTOOPT;
1254			break;
1255		}
1256		break;
1257	}
1258	return (error);
1259}
1260
1261/*
1262 * Set up IP options in pcb for insertion in output packets.
1263 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1264 * with destination address if source routed.
1265 */
1266static int
1267ip_pcbopts(optname, pcbopt, m)
1268	int optname;
1269	struct mbuf **pcbopt;
1270	register struct mbuf *m;
1271{
1272	register int cnt, optlen;
1273	register u_char *cp;
1274	u_char opt;
1275
1276	/* turn off any old options */
1277	if (*pcbopt)
1278		(void)m_free(*pcbopt);
1279	*pcbopt = 0;
1280	if (m == (struct mbuf *)0 || m->m_len == 0) {
1281		/*
1282		 * Only turning off any previous options.
1283		 */
1284		if (m)
1285			(void)m_free(m);
1286		return (0);
1287	}
1288
1289#ifndef	vax
1290	if (m->m_len % sizeof(int32_t))
1291		goto bad;
1292#endif
1293	/*
1294	 * IP first-hop destination address will be stored before
1295	 * actual options; move other options back
1296	 * and clear it when none present.
1297	 */
1298	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1299		goto bad;
1300	cnt = m->m_len;
1301	m->m_len += sizeof(struct in_addr);
1302	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1303	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1304	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1305
1306	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1307		opt = cp[IPOPT_OPTVAL];
1308		if (opt == IPOPT_EOL)
1309			break;
1310		if (opt == IPOPT_NOP)
1311			optlen = 1;
1312		else {
1313			optlen = cp[IPOPT_OLEN];
1314			if (optlen <= IPOPT_OLEN || optlen > cnt)
1315				goto bad;
1316		}
1317		switch (opt) {
1318
1319		default:
1320			break;
1321
1322		case IPOPT_LSRR:
1323		case IPOPT_SSRR:
1324			/*
1325			 * user process specifies route as:
1326			 *	->A->B->C->D
1327			 * D must be our final destination (but we can't
1328			 * check that since we may not have connected yet).
1329			 * A is first hop destination, which doesn't appear in
1330			 * actual IP option, but is stored before the options.
1331			 */
1332			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1333				goto bad;
1334			m->m_len -= sizeof(struct in_addr);
1335			cnt -= sizeof(struct in_addr);
1336			optlen -= sizeof(struct in_addr);
1337			cp[IPOPT_OLEN] = optlen;
1338			/*
1339			 * Move first hop before start of options.
1340			 */
1341			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1342			    sizeof(struct in_addr));
1343			/*
1344			 * Then copy rest of options back
1345			 * to close up the deleted entry.
1346			 */
1347			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1348			    sizeof(struct in_addr)),
1349			    (caddr_t)&cp[IPOPT_OFFSET+1],
1350			    (unsigned)cnt + sizeof(struct in_addr));
1351			break;
1352		}
1353	}
1354	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1355		goto bad;
1356	*pcbopt = m;
1357	return (0);
1358
1359bad:
1360	(void)m_free(m);
1361	return (EINVAL);
1362}
1363
1364/*
1365 * XXX
1366 * The whole multicast option thing needs to be re-thought.
1367 * Several of these options are equally applicable to non-multicast
1368 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1369 * standard option (IP_TTL).
1370 */
1371/*
1372 * Set the IP multicast options in response to user setsockopt().
1373 */
1374static int
1375ip_setmoptions(sopt, imop)
1376	struct sockopt *sopt;
1377	struct ip_moptions **imop;
1378{
1379	int error = 0;
1380	int i;
1381	struct in_addr addr;
1382	struct ip_mreq mreq;
1383	struct ifnet *ifp;
1384	struct ip_moptions *imo = *imop;
1385	struct route ro;
1386	struct sockaddr_in *dst;
1387	int s;
1388
1389	if (imo == NULL) {
1390		/*
1391		 * No multicast option buffer attached to the pcb;
1392		 * allocate one and initialize to default values.
1393		 */
1394		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1395		    M_WAITOK);
1396
1397		if (imo == NULL)
1398			return (ENOBUFS);
1399		*imop = imo;
1400		imo->imo_multicast_ifp = NULL;
1401		imo->imo_multicast_vif = -1;
1402		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1403		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1404		imo->imo_num_memberships = 0;
1405	}
1406
1407	switch (sopt->sopt_name) {
1408	/* store an index number for the vif you wanna use in the send */
1409	case IP_MULTICAST_VIF:
1410		if (legal_vif_num == 0) {
1411			error = EOPNOTSUPP;
1412			break;
1413		}
1414		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1415		if (error)
1416			break;
1417		if (!legal_vif_num(i) && (i != -1)) {
1418			error = EINVAL;
1419			break;
1420		}
1421		imo->imo_multicast_vif = i;
1422		break;
1423
1424	case IP_MULTICAST_IF:
1425		/*
1426		 * Select the interface for outgoing multicast packets.
1427		 */
1428		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1429		if (error)
1430			break;
1431		/*
1432		 * INADDR_ANY is used to remove a previous selection.
1433		 * When no interface is selected, a default one is
1434		 * chosen every time a multicast packet is sent.
1435		 */
1436		if (addr.s_addr == INADDR_ANY) {
1437			imo->imo_multicast_ifp = NULL;
1438			break;
1439		}
1440		/*
1441		 * The selected interface is identified by its local
1442		 * IP address.  Find the interface and confirm that
1443		 * it supports multicasting.
1444		 */
1445		s = splimp();
1446		INADDR_TO_IFP(addr, ifp);
1447		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1448			splx(s);
1449			error = EADDRNOTAVAIL;
1450			break;
1451		}
1452		imo->imo_multicast_ifp = ifp;
1453		splx(s);
1454		break;
1455
1456	case IP_MULTICAST_TTL:
1457		/*
1458		 * Set the IP time-to-live for outgoing multicast packets.
1459		 * The original multicast API required a char argument,
1460		 * which is inconsistent with the rest of the socket API.
1461		 * We allow either a char or an int.
1462		 */
1463		if (sopt->sopt_valsize == 1) {
1464			u_char ttl;
1465			error = sooptcopyin(sopt, &ttl, 1, 1);
1466			if (error)
1467				break;
1468			imo->imo_multicast_ttl = ttl;
1469		} else {
1470			u_int ttl;
1471			error = sooptcopyin(sopt, &ttl, sizeof ttl,
1472					    sizeof ttl);
1473			if (error)
1474				break;
1475			if (ttl > 255)
1476				error = EINVAL;
1477			else
1478				imo->imo_multicast_ttl = ttl;
1479		}
1480		break;
1481
1482	case IP_MULTICAST_LOOP:
1483		/*
1484		 * Set the loopback flag for outgoing multicast packets.
1485		 * Must be zero or one.  The original multicast API required a
1486		 * char argument, which is inconsistent with the rest
1487		 * of the socket API.  We allow either a char or an int.
1488		 */
1489		if (sopt->sopt_valsize == 1) {
1490			u_char loop;
1491			error = sooptcopyin(sopt, &loop, 1, 1);
1492			if (error)
1493				break;
1494			imo->imo_multicast_loop = !!loop;
1495		} else {
1496			u_int loop;
1497			error = sooptcopyin(sopt, &loop, sizeof loop,
1498					    sizeof loop);
1499			if (error)
1500				break;
1501			imo->imo_multicast_loop = !!loop;
1502		}
1503		break;
1504
1505	case IP_ADD_MEMBERSHIP:
1506		/*
1507		 * Add a multicast group membership.
1508		 * Group must be a valid IP multicast address.
1509		 */
1510		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1511		if (error)
1512			break;
1513
1514		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1515			error = EINVAL;
1516			break;
1517		}
1518		s = splimp();
1519		/*
1520		 * If no interface address was provided, use the interface of
1521		 * the route to the given multicast address.
1522		 */
1523		if (mreq.imr_interface.s_addr == INADDR_ANY) {
1524			bzero((caddr_t)&ro, sizeof(ro));
1525			dst = (struct sockaddr_in *)&ro.ro_dst;
1526			dst->sin_len = sizeof(*dst);
1527			dst->sin_family = AF_INET;
1528			dst->sin_addr = mreq.imr_multiaddr;
1529			rtalloc(&ro);
1530			if (ro.ro_rt == NULL) {
1531				error = EADDRNOTAVAIL;
1532				splx(s);
1533				break;
1534			}
1535			ifp = ro.ro_rt->rt_ifp;
1536			rtfree(ro.ro_rt);
1537		}
1538		else {
1539			INADDR_TO_IFP(mreq.imr_interface, ifp);
1540		}
1541
1542		/*
1543		 * See if we found an interface, and confirm that it
1544		 * supports multicast.
1545		 */
1546		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1547			error = EADDRNOTAVAIL;
1548			splx(s);
1549			break;
1550		}
1551		/*
1552		 * See if the membership already exists or if all the
1553		 * membership slots are full.
1554		 */
1555		for (i = 0; i < imo->imo_num_memberships; ++i) {
1556			if (imo->imo_membership[i]->inm_ifp == ifp &&
1557			    imo->imo_membership[i]->inm_addr.s_addr
1558						== mreq.imr_multiaddr.s_addr)
1559				break;
1560		}
1561		if (i < imo->imo_num_memberships) {
1562			error = EADDRINUSE;
1563			splx(s);
1564			break;
1565		}
1566		if (i == IP_MAX_MEMBERSHIPS) {
1567			error = ETOOMANYREFS;
1568			splx(s);
1569			break;
1570		}
1571		/*
1572		 * Everything looks good; add a new record to the multicast
1573		 * address list for the given interface.
1574		 */
1575		if ((imo->imo_membership[i] =
1576		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1577			error = ENOBUFS;
1578			splx(s);
1579			break;
1580		}
1581		++imo->imo_num_memberships;
1582		splx(s);
1583		break;
1584
1585	case IP_DROP_MEMBERSHIP:
1586		/*
1587		 * Drop a multicast group membership.
1588		 * Group must be a valid IP multicast address.
1589		 */
1590		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1591		if (error)
1592			break;
1593
1594		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1595			error = EINVAL;
1596			break;
1597		}
1598
1599		s = splimp();
1600		/*
1601		 * If an interface address was specified, get a pointer
1602		 * to its ifnet structure.
1603		 */
1604		if (mreq.imr_interface.s_addr == INADDR_ANY)
1605			ifp = NULL;
1606		else {
1607			INADDR_TO_IFP(mreq.imr_interface, ifp);
1608			if (ifp == NULL) {
1609				error = EADDRNOTAVAIL;
1610				splx(s);
1611				break;
1612			}
1613		}
1614		/*
1615		 * Find the membership in the membership array.
1616		 */
1617		for (i = 0; i < imo->imo_num_memberships; ++i) {
1618			if ((ifp == NULL ||
1619			     imo->imo_membership[i]->inm_ifp == ifp) &&
1620			     imo->imo_membership[i]->inm_addr.s_addr ==
1621			     mreq.imr_multiaddr.s_addr)
1622				break;
1623		}
1624		if (i == imo->imo_num_memberships) {
1625			error = EADDRNOTAVAIL;
1626			splx(s);
1627			break;
1628		}
1629		/*
1630		 * Give up the multicast address record to which the
1631		 * membership points.
1632		 */
1633		in_delmulti(imo->imo_membership[i]);
1634		/*
1635		 * Remove the gap in the membership array.
1636		 */
1637		for (++i; i < imo->imo_num_memberships; ++i)
1638			imo->imo_membership[i-1] = imo->imo_membership[i];
1639		--imo->imo_num_memberships;
1640		splx(s);
1641		break;
1642
1643	default:
1644		error = EOPNOTSUPP;
1645		break;
1646	}
1647
1648	/*
1649	 * If all options have default values, no need to keep the mbuf.
1650	 */
1651	if (imo->imo_multicast_ifp == NULL &&
1652	    imo->imo_multicast_vif == -1 &&
1653	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1654	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1655	    imo->imo_num_memberships == 0) {
1656		free(*imop, M_IPMOPTS);
1657		*imop = NULL;
1658	}
1659
1660	return (error);
1661}
1662
1663/*
1664 * Return the IP multicast options in response to user getsockopt().
1665 */
1666static int
1667ip_getmoptions(sopt, imo)
1668	struct sockopt *sopt;
1669	register struct ip_moptions *imo;
1670{
1671	struct in_addr addr;
1672	struct in_ifaddr *ia;
1673	int error, optval;
1674	u_char coptval;
1675
1676	error = 0;
1677	switch (sopt->sopt_name) {
1678	case IP_MULTICAST_VIF:
1679		if (imo != NULL)
1680			optval = imo->imo_multicast_vif;
1681		else
1682			optval = -1;
1683		error = sooptcopyout(sopt, &optval, sizeof optval);
1684		break;
1685
1686	case IP_MULTICAST_IF:
1687		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1688			addr.s_addr = INADDR_ANY;
1689		else {
1690			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1691			addr.s_addr = (ia == NULL) ? INADDR_ANY
1692				: IA_SIN(ia)->sin_addr.s_addr;
1693		}
1694		error = sooptcopyout(sopt, &addr, sizeof addr);
1695		break;
1696
1697	case IP_MULTICAST_TTL:
1698		if (imo == 0)
1699			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1700		else
1701			optval = coptval = imo->imo_multicast_ttl;
1702		if (sopt->sopt_valsize == 1)
1703			error = sooptcopyout(sopt, &coptval, 1);
1704		else
1705			error = sooptcopyout(sopt, &optval, sizeof optval);
1706		break;
1707
1708	case IP_MULTICAST_LOOP:
1709		if (imo == 0)
1710			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1711		else
1712			optval = coptval = imo->imo_multicast_loop;
1713		if (sopt->sopt_valsize == 1)
1714			error = sooptcopyout(sopt, &coptval, 1);
1715		else
1716			error = sooptcopyout(sopt, &optval, sizeof optval);
1717		break;
1718
1719	default:
1720		error = ENOPROTOOPT;
1721		break;
1722	}
1723	return (error);
1724}
1725
1726/*
1727 * Discard the IP multicast options.
1728 */
1729void
1730ip_freemoptions(imo)
1731	register struct ip_moptions *imo;
1732{
1733	register int i;
1734
1735	if (imo != NULL) {
1736		for (i = 0; i < imo->imo_num_memberships; ++i)
1737			in_delmulti(imo->imo_membership[i]);
1738		free(imo, M_IPMOPTS);
1739	}
1740}
1741
1742/*
1743 * Routine called from ip_output() to loop back a copy of an IP multicast
1744 * packet to the input queue of a specified interface.  Note that this
1745 * calls the output routine of the loopback "driver", but with an interface
1746 * pointer that might NOT be a loopback interface -- evil, but easier than
1747 * replicating that code here.
1748 */
1749static void
1750ip_mloopback(ifp, m, dst, hlen)
1751	struct ifnet *ifp;
1752	register struct mbuf *m;
1753	register struct sockaddr_in *dst;
1754	int hlen;
1755{
1756	register struct ip *ip;
1757	struct mbuf *copym;
1758
1759	copym = m_copy(m, 0, M_COPYALL);
1760	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1761		copym = m_pullup(copym, hlen);
1762	if (copym != NULL) {
1763		/*
1764		 * We don't bother to fragment if the IP length is greater
1765		 * than the interface's MTU.  Can this possibly matter?
1766		 */
1767		ip = mtod(copym, struct ip *);
1768		ip->ip_len = htons((u_short)ip->ip_len);
1769		ip->ip_off = htons((u_short)ip->ip_off);
1770		ip->ip_sum = 0;
1771		if (ip->ip_vhl == IP_VHL_BORING) {
1772			ip->ip_sum = in_cksum_hdr(ip);
1773		} else {
1774			ip->ip_sum = in_cksum(copym, hlen);
1775		}
1776		/*
1777		 * NB:
1778		 * It's not clear whether there are any lingering
1779		 * reentrancy problems in other areas which might
1780		 * be exposed by using ip_input directly (in
1781		 * particular, everything which modifies the packet
1782		 * in-place).  Yet another option is using the
1783		 * protosw directly to deliver the looped back
1784		 * packet.  For the moment, we'll err on the side
1785		 * of safety by using if_simloop().
1786		 */
1787#if 1 /* XXX */
1788		if (dst->sin_family != AF_INET) {
1789			printf("ip_mloopback: bad address family %d\n",
1790						dst->sin_family);
1791			dst->sin_family = AF_INET;
1792		}
1793#endif
1794
1795#ifdef notdef
1796		copym->m_pkthdr.rcvif = ifp;
1797		ip_input(copym);
1798#else
1799		if_simloop(ifp, copym, (struct sockaddr *)dst, 0);
1800#endif
1801	}
1802}
1803