ip_output.c revision 55632
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 55632 2000-01-09 03:06:28Z shin $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43#include "opt_ipsec.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/malloc.h>
49#include <sys/mbuf.h>
50#include <sys/protosw.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#include <sys/proc.h>
54
55#include <net/if.h>
56#include <net/route.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/ip.h>
61#include <netinet/in_pcb.h>
62#include <netinet/in_var.h>
63#include <netinet/ip_var.h>
64
65#include "faith.h"
66
67#ifdef vax
68#include <machine/mtpr.h>
69#endif
70#include <machine/in_cksum.h>
71
72static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
73
74#ifdef IPSEC
75#include <netinet6/ipsec.h>
76#include <netkey/key.h>
77#ifdef IPSEC_DEBUG
78#include <netkey/key_debug.h>
79#else
80#define	KEYDEBUG(lev,arg)
81#endif
82#endif /*IPSEC*/
83
84#include <netinet/ip_fw.h>
85
86#ifdef DUMMYNET
87#include <netinet/ip_dummynet.h>
88#endif
89
90#ifdef IPFIREWALL_FORWARD_DEBUG
91#define print_ip(a)	 printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
92				 		  (ntohl(a.s_addr)>>16)&0xFF,\
93						  (ntohl(a.s_addr)>>8)&0xFF,\
94						  (ntohl(a.s_addr))&0xFF);
95#endif
96
97u_short ip_id;
98
99static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
100static void	ip_mloopback
101	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
102static int	ip_getmoptions
103	__P((struct sockopt *, struct ip_moptions *));
104static int	ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
105static int	ip_setmoptions
106	__P((struct sockopt *, struct ip_moptions **));
107
108#if defined(IPFILTER_LKM) || defined(IPFILTER)
109int	ip_optcopy __P((struct ip *, struct ip *));
110extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
111#else
112static int	ip_optcopy __P((struct ip *, struct ip *));
113#endif
114
115
116extern	struct protosw inetsw[];
117
118/*
119 * IP output.  The packet in mbuf chain m contains a skeletal IP
120 * header (with len, off, ttl, proto, tos, src, dst).
121 * The mbuf chain containing the packet will be freed.
122 * The mbuf opt, if present, will not be freed.
123 */
124int
125ip_output(m0, opt, ro, flags, imo)
126	struct mbuf *m0;
127	struct mbuf *opt;
128	struct route *ro;
129	int flags;
130	struct ip_moptions *imo;
131{
132	struct ip *ip, *mhip;
133	struct ifnet *ifp;
134	struct mbuf *m = m0;
135	int hlen = sizeof (struct ip);
136	int len, off, error = 0;
137	struct sockaddr_in *dst;
138	struct in_ifaddr *ia;
139	int isbroadcast;
140#ifdef IPSEC
141	struct route iproute;
142	struct socket *so = NULL;
143	struct secpolicy *sp = NULL;
144#endif
145	u_int16_t divert_cookie;		/* firewall cookie */
146#ifdef IPFIREWALL_FORWARD
147	int fwd_rewrite_src = 0;
148#endif
149	struct ip_fw_chain *rule = NULL;
150
151#ifdef IPDIVERT
152	/* Get and reset firewall cookie */
153	divert_cookie = ip_divert_cookie;
154	ip_divert_cookie = 0;
155#else
156	divert_cookie = 0;
157#endif
158
159	/*
160	 * NOTE: If IP_SOCKINMRCVIF flag is set, 'socket *' is kept in
161	 * m->m_pkthdr.rcvif for later IPSEC check. In this case,
162	 * m->m_pkthdr will be NULL cleared after the contents is saved in
163	 * 'so'.
164	 * NULL clearance of rcvif should be natural because the packet should
165	 * have been sent from my own socket and has no rcvif in this case.
166	 * It is also necessary because someone might consider it as
167	 * 'ifnet *', and cause SEGV.
168	 */
169#if defined(IPFIREWALL) && defined(DUMMYNET)
170        /*
171         * dummynet packet are prepended a vestigial mbuf with
172         * m_type = MT_DUMMYNET and m_data pointing to the matching
173         * rule.
174         */
175        if (m->m_type == MT_DUMMYNET) {
176            /*
177             * the packet was already tagged, so part of the
178             * processing was already done, and we need to go down.
179             * * Get parameters from the header.
180             */
181            rule = (struct ip_fw_chain *)(m->m_data) ;
182	    opt = NULL ;
183	    ro = & ( ((struct dn_pkt *)m)->ro ) ;
184	    imo = NULL ;
185	    dst = ((struct dn_pkt *)m)->dn_dst ;
186	    ifp = ((struct dn_pkt *)m)->ifp ;
187	    flags = ((struct dn_pkt *)m)->flags ;
188
189            m0 = m = m->m_next ;
190#ifdef IPSEC
191	    if ((flags & IP_SOCKINMRCVIF) != 0) {
192	        so = (struct socket *)m->m_pkthdr.rcvif;
193	        m->m_pkthdr.rcvif = NULL;
194	    }
195#endif
196            ip = mtod(m, struct ip *);
197            hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
198            goto sendit;
199        } else
200            rule = NULL ;
201#endif
202#ifdef IPSEC
203	if ((flags & IP_SOCKINMRCVIF) != 0) {
204		so = (struct socket *)m->m_pkthdr.rcvif;
205		m->m_pkthdr.rcvif = NULL;
206	}
207#endif
208
209#ifdef	DIAGNOSTIC
210	if ((m->m_flags & M_PKTHDR) == 0)
211		panic("ip_output no HDR");
212	if (!ro)
213		panic("ip_output no route, proto = %d",
214		      mtod(m, struct ip *)->ip_p);
215#endif
216	if (opt) {
217		m = ip_insertoptions(m, opt, &len);
218		hlen = len;
219	}
220	ip = mtod(m, struct ip *);
221	/*
222	 * Fill in IP header.
223	 */
224	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
225		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
226		ip->ip_off &= IP_DF;
227		ip->ip_id = htons(ip_id++);
228		ipstat.ips_localout++;
229	} else {
230		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
231	}
232
233	dst = (struct sockaddr_in *)&ro->ro_dst;
234	/*
235	 * If there is a cached route,
236	 * check that it is to the same destination
237	 * and is still up.  If not, free it and try again.
238	 */
239	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
240	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
241		RTFREE(ro->ro_rt);
242		ro->ro_rt = (struct rtentry *)0;
243	}
244	if (ro->ro_rt == 0) {
245		dst->sin_family = AF_INET;
246		dst->sin_len = sizeof(*dst);
247		dst->sin_addr = ip->ip_dst;
248	}
249	/*
250	 * If routing to interface only,
251	 * short circuit routing lookup.
252	 */
253#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
254#define sintosa(sin)	((struct sockaddr *)(sin))
255	if (flags & IP_ROUTETOIF) {
256		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
257		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
258			ipstat.ips_noroute++;
259			error = ENETUNREACH;
260			goto bad;
261		}
262		ifp = ia->ia_ifp;
263		ip->ip_ttl = 1;
264		isbroadcast = in_broadcast(dst->sin_addr, ifp);
265	} else {
266		/*
267		 * If this is the case, we probably don't want to allocate
268		 * a protocol-cloned route since we didn't get one from the
269		 * ULP.  This lets TCP do its thing, while not burdening
270		 * forwarding or ICMP with the overhead of cloning a route.
271		 * Of course, we still want to do any cloning requested by
272		 * the link layer, as this is probably required in all cases
273		 * for correct operation (as it is for ARP).
274		 */
275		if (ro->ro_rt == 0)
276			rtalloc_ign(ro, RTF_PRCLONING);
277		if (ro->ro_rt == 0) {
278			ipstat.ips_noroute++;
279			error = EHOSTUNREACH;
280			goto bad;
281		}
282		ia = ifatoia(ro->ro_rt->rt_ifa);
283		ifp = ro->ro_rt->rt_ifp;
284		ro->ro_rt->rt_use++;
285		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
286			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
287		if (ro->ro_rt->rt_flags & RTF_HOST)
288			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
289		else
290			isbroadcast = in_broadcast(dst->sin_addr, ifp);
291	}
292	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
293		struct in_multi *inm;
294
295		m->m_flags |= M_MCAST;
296		/*
297		 * IP destination address is multicast.  Make sure "dst"
298		 * still points to the address in "ro".  (It may have been
299		 * changed to point to a gateway address, above.)
300		 */
301		dst = (struct sockaddr_in *)&ro->ro_dst;
302		/*
303		 * See if the caller provided any multicast options
304		 */
305		if (imo != NULL) {
306			ip->ip_ttl = imo->imo_multicast_ttl;
307			if (imo->imo_multicast_ifp != NULL)
308				ifp = imo->imo_multicast_ifp;
309			if (imo->imo_multicast_vif != -1)
310				ip->ip_src.s_addr =
311				    ip_mcast_src(imo->imo_multicast_vif);
312		} else
313			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
314		/*
315		 * Confirm that the outgoing interface supports multicast.
316		 */
317		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
318			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
319				ipstat.ips_noroute++;
320				error = ENETUNREACH;
321				goto bad;
322			}
323		}
324		/*
325		 * If source address not specified yet, use address
326		 * of outgoing interface.
327		 */
328		if (ip->ip_src.s_addr == INADDR_ANY) {
329			register struct in_ifaddr *ia1;
330
331			for (ia1 = in_ifaddrhead.tqh_first; ia1;
332			     ia1 = ia1->ia_link.tqe_next)
333				if (ia1->ia_ifp == ifp) {
334					ip->ip_src = IA_SIN(ia1)->sin_addr;
335					break;
336				}
337		}
338
339		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
340		if (inm != NULL &&
341		   (imo == NULL || imo->imo_multicast_loop)) {
342			/*
343			 * If we belong to the destination multicast group
344			 * on the outgoing interface, and the caller did not
345			 * forbid loopback, loop back a copy.
346			 */
347			ip_mloopback(ifp, m, dst, hlen);
348		}
349		else {
350			/*
351			 * If we are acting as a multicast router, perform
352			 * multicast forwarding as if the packet had just
353			 * arrived on the interface to which we are about
354			 * to send.  The multicast forwarding function
355			 * recursively calls this function, using the
356			 * IP_FORWARDING flag to prevent infinite recursion.
357			 *
358			 * Multicasts that are looped back by ip_mloopback(),
359			 * above, will be forwarded by the ip_input() routine,
360			 * if necessary.
361			 */
362			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
363				/*
364				 * Check if rsvp daemon is running. If not, don't
365				 * set ip_moptions. This ensures that the packet
366				 * is multicast and not just sent down one link
367				 * as prescribed by rsvpd.
368				 */
369				if (!rsvp_on)
370				  imo = NULL;
371				if (ip_mforward(ip, ifp, m, imo) != 0) {
372					m_freem(m);
373					goto done;
374				}
375			}
376		}
377
378		/*
379		 * Multicasts with a time-to-live of zero may be looped-
380		 * back, above, but must not be transmitted on a network.
381		 * Also, multicasts addressed to the loopback interface
382		 * are not sent -- the above call to ip_mloopback() will
383		 * loop back a copy if this host actually belongs to the
384		 * destination group on the loopback interface.
385		 */
386		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
387			m_freem(m);
388			goto done;
389		}
390
391		goto sendit;
392	}
393#ifndef notdef
394	/*
395	 * If source address not specified yet, use address
396	 * of outgoing interface.
397	 */
398	if (ip->ip_src.s_addr == INADDR_ANY) {
399		ip->ip_src = IA_SIN(ia)->sin_addr;
400#ifdef IPFIREWALL_FORWARD
401		/* Keep note that we did this - if the firewall changes
402		 * the next-hop, our interface may change, changing the
403		 * default source IP. It's a shame so much effort happens
404		 * twice. Oh well.
405		 */
406		fwd_rewrite_src++;
407#endif /* IPFIREWALL_FORWARD */
408	}
409#endif /* notdef */
410	/*
411	 * Verify that we have any chance at all of being able to queue
412	 *      the packet or packet fragments
413	 */
414	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
415		ifp->if_snd.ifq_maxlen) {
416			error = ENOBUFS;
417			goto bad;
418	}
419
420	/*
421	 * Look for broadcast address and
422	 * and verify user is allowed to send
423	 * such a packet.
424	 */
425	if (isbroadcast) {
426		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
427			error = EADDRNOTAVAIL;
428			goto bad;
429		}
430		if ((flags & IP_ALLOWBROADCAST) == 0) {
431			error = EACCES;
432			goto bad;
433		}
434		/* don't allow broadcast messages to be fragmented */
435		if ((u_short)ip->ip_len > ifp->if_mtu) {
436			error = EMSGSIZE;
437			goto bad;
438		}
439		m->m_flags |= M_BCAST;
440	} else {
441		m->m_flags &= ~M_BCAST;
442	}
443
444sendit:
445	/*
446	 * IpHack's section.
447	 * - Xlate: translate packet's addr/port (NAT).
448	 * - Firewall: deny/allow/etc.
449	 * - Wrap: fake packet's addr/port <unimpl.>
450	 * - Encapsulate: put it in another IP and send out. <unimp.>
451	 */
452#if defined(IPFILTER) || defined(IPFILTER_LKM)
453	if (fr_checkp) {
454		struct  mbuf    *m1 = m;
455
456		if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
457			goto done;
458		ip = mtod(m = m1, struct ip *);
459	}
460#endif
461
462	/*
463	 * Check with the firewall...
464	 */
465	if (ip_fw_chk_ptr) {
466		struct sockaddr_in *old = dst;
467
468		off = (*ip_fw_chk_ptr)(&ip,
469		    hlen, ifp, &divert_cookie, &m, &rule, &dst);
470                /*
471                 * On return we must do the following:
472                 * m == NULL         -> drop the pkt
473                 * 1<=off<= 0xffff   -> DIVERT
474                 * (off & 0x10000)   -> send to a DUMMYNET pipe
475                 * (off & 0x20000)   -> TEE the packet
476                 * dst != old        -> IPFIREWALL_FORWARD
477                 * off==0, dst==old  -> accept
478                 * If some of the above modules is not compiled in, then
479                 * we should't have to check the corresponding condition
480                 * (because the ipfw control socket should not accept
481                 * unsupported rules), but better play safe and drop
482                 * packets in case of doubt.
483                 */
484		if (!m) { /* firewall said to reject */
485			error = EACCES;
486			goto done;
487		}
488		if (off == 0 && dst == old) /* common case */
489			goto pass ;
490#ifdef DUMMYNET
491                if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
492                    /*
493                     * pass the pkt to dummynet. Need to include
494                     * pipe number, m, ifp, ro, dst because these are
495                     * not recomputed in the next pass.
496                     * All other parameters have been already used and
497                     * so they are not needed anymore.
498                     * XXX note: if the ifp or ro entry are deleted
499                     * while a pkt is in dummynet, we are in trouble!
500                     */
501                    dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,dst,rule,
502				flags);
503			goto done;
504		}
505#endif
506#ifdef IPDIVERT
507		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
508			struct mbuf *clone = NULL;
509
510			/* Clone packet if we're doing a 'tee' */
511			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
512				clone = m_dup(m, M_DONTWAIT);
513
514			/* Restore packet header fields to original values */
515			HTONS(ip->ip_len);
516			HTONS(ip->ip_off);
517
518			/* Deliver packet to divert input routine */
519			ip_divert_cookie = divert_cookie;
520			divert_packet(m, 0, off & 0xffff);
521
522			/* If 'tee', continue with original packet */
523			if (clone != NULL) {
524				m = clone;
525				ip = mtod(m, struct ip *);
526				goto pass;
527			}
528			goto done;
529		}
530#endif
531
532#ifdef IPFIREWALL_FORWARD
533		/* Here we check dst to make sure it's directly reachable on the
534		 * interface we previously thought it was.
535		 * If it isn't (which may be likely in some situations) we have
536		 * to re-route it (ie, find a route for the next-hop and the
537		 * associated interface) and set them here. This is nested
538		 * forwarding which in most cases is undesirable, except where
539		 * such control is nigh impossible. So we do it here.
540		 * And I'm babbling.
541		 */
542		if (off == 0 && old != dst) {
543			struct in_ifaddr *ia;
544
545			/* It's changed... */
546			/* There must be a better way to do this next line... */
547			static struct route sro_fwd, *ro_fwd = &sro_fwd;
548#ifdef IPFIREWALL_FORWARD_DEBUG
549			printf("IPFIREWALL_FORWARD: New dst ip: ");
550			print_ip(dst->sin_addr);
551			printf("\n");
552#endif
553			/*
554			 * We need to figure out if we have been forwarded
555			 * to a local socket. If so then we should somehow
556			 * "loop back" to ip_input, and get directed to the
557			 * PCB as if we had received this packet. This is
558			 * because it may be dificult to identify the packets
559			 * you want to forward until they are being output
560			 * and have selected an interface. (e.g. locally
561			 * initiated packets) If we used the loopback inteface,
562			 * we would not be able to control what happens
563			 * as the packet runs through ip_input() as
564			 * it is done through a ISR.
565			 */
566			for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
567					ia = TAILQ_NEXT(ia, ia_link)) {
568				/*
569				 * If the addr to forward to is one
570				 * of ours, we pretend to
571				 * be the destination for this packet.
572				 */
573				if (IA_SIN(ia)->sin_addr.s_addr ==
574						 dst->sin_addr.s_addr)
575					break;
576			}
577			if (ia) {
578				/* tell ip_input "dont filter" */
579				ip_fw_fwd_addr = dst;
580				if (m->m_pkthdr.rcvif == NULL)
581					m->m_pkthdr.rcvif = ifunit("lo0");
582				ip->ip_len = htons((u_short)ip->ip_len);
583				ip->ip_off = htons((u_short)ip->ip_off);
584				ip->ip_sum = 0;
585				if (ip->ip_vhl == IP_VHL_BORING) {
586					ip->ip_sum = in_cksum_hdr(ip);
587				} else {
588					ip->ip_sum = in_cksum(m, hlen);
589				}
590				ip_input(m);
591				goto done;
592			}
593			/* Some of the logic for this was
594			 * nicked from above.
595			 *
596			 * This rewrites the cached route in a local PCB.
597			 * Is this what we want to do?
598			 */
599			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
600
601			ro_fwd->ro_rt = 0;
602			rtalloc_ign(ro_fwd, RTF_PRCLONING);
603
604			if (ro_fwd->ro_rt == 0) {
605				ipstat.ips_noroute++;
606				error = EHOSTUNREACH;
607				goto bad;
608			}
609
610			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
611			ifp = ro_fwd->ro_rt->rt_ifp;
612			ro_fwd->ro_rt->rt_use++;
613			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
614				dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
615			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
616				isbroadcast =
617				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
618			else
619				isbroadcast = in_broadcast(dst->sin_addr, ifp);
620			RTFREE(ro->ro_rt);
621			ro->ro_rt = ro_fwd->ro_rt;
622			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
623
624			/*
625			 * If we added a default src ip earlier,
626			 * which would have been gotten from the-then
627			 * interface, do it again, from the new one.
628			 */
629			if (fwd_rewrite_src)
630				ip->ip_src = IA_SIN(ia)->sin_addr;
631			goto pass ;
632		}
633#endif /* IPFIREWALL_FORWARD */
634                /*
635                 * if we get here, none of the above matches, and
636                 * we have to drop the pkt
637                 */
638		m_freem(m);
639                error = EACCES; /* not sure this is the right error msg */
640                goto done;
641	}
642
643pass:
644#ifdef IPSEC
645	/* get SP for this packet */
646	if (so == NULL)
647		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
648	else
649		sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
650
651	if (sp == NULL) {
652		ipsecstat.out_inval++;
653		goto bad;
654	}
655
656	error = 0;
657
658	/* check policy */
659	switch (sp->policy) {
660	case IPSEC_POLICY_DISCARD:
661		/*
662		 * This packet is just discarded.
663		 */
664		ipsecstat.out_polvio++;
665		goto bad;
666
667	case IPSEC_POLICY_BYPASS:
668	case IPSEC_POLICY_NONE:
669		/* no need to do IPsec. */
670		goto skip_ipsec;
671
672	case IPSEC_POLICY_IPSEC:
673		if (sp->req == NULL) {
674			/* XXX should be panic ? */
675			printf("ip_output: No IPsec request specified.\n");
676			error = EINVAL;
677			goto bad;
678		}
679		break;
680
681	case IPSEC_POLICY_ENTRUST:
682	default:
683		printf("ip_output: Invalid policy found. %d\n", sp->policy);
684	}
685
686	ip->ip_len = htons((u_short)ip->ip_len);
687	ip->ip_off = htons((u_short)ip->ip_off);
688	ip->ip_sum = 0;
689
690    {
691	struct ipsec_output_state state;
692	bzero(&state, sizeof(state));
693	state.m = m;
694	if (flags & IP_ROUTETOIF) {
695		state.ro = &iproute;
696		bzero(&iproute, sizeof(iproute));
697	} else
698		state.ro = ro;
699	state.dst = (struct sockaddr *)dst;
700
701	error = ipsec4_output(&state, sp, flags);
702
703	m = state.m;
704	if (flags & IP_ROUTETOIF) {
705		/*
706		 * if we have tunnel mode SA, we may need to ignore
707		 * IP_ROUTETOIF.
708		 */
709		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
710			flags &= ~IP_ROUTETOIF;
711			ro = state.ro;
712		}
713	} else
714		ro = state.ro;
715	dst = (struct sockaddr_in *)state.dst;
716	if (error) {
717		/* mbuf is already reclaimed in ipsec4_output. */
718		m0 = NULL;
719		switch (error) {
720		case EHOSTUNREACH:
721		case ENETUNREACH:
722		case EMSGSIZE:
723		case ENOBUFS:
724		case ENOMEM:
725			break;
726		default:
727			printf("ip4_output (ipsec): error code %d\n", error);
728			/*fall through*/
729		case ENOENT:
730			/* don't show these error codes to the user */
731			error = 0;
732			break;
733		}
734		goto bad;
735	}
736    }
737
738	/* be sure to update variables that are affected by ipsec4_output() */
739	ip = mtod(m, struct ip *);
740#ifdef _IP_VHL
741	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
742#else
743	hlen = ip->ip_hl << 2;
744#endif
745	if (ro->ro_rt == NULL) {
746		if ((flags & IP_ROUTETOIF) == 0) {
747			printf("ip_output: "
748				"can't update route after IPsec processing\n");
749			error = EHOSTUNREACH;	/*XXX*/
750			goto bad;
751		}
752	} else {
753		/* nobody uses ia beyond here */
754		ifp = ro->ro_rt->rt_ifp;
755	}
756
757	/* make it flipped, again. */
758	ip->ip_len = ntohs((u_short)ip->ip_len);
759	ip->ip_off = ntohs((u_short)ip->ip_off);
760skip_ipsec:
761#endif /*IPSEC*/
762
763	/*
764	 * If small enough for interface, can just send directly.
765	 */
766	if ((u_short)ip->ip_len <= ifp->if_mtu) {
767		ip->ip_len = htons((u_short)ip->ip_len);
768		ip->ip_off = htons((u_short)ip->ip_off);
769		ip->ip_sum = 0;
770		if (ip->ip_vhl == IP_VHL_BORING) {
771			ip->ip_sum = in_cksum_hdr(ip);
772		} else {
773			ip->ip_sum = in_cksum(m, hlen);
774		}
775		error = (*ifp->if_output)(ifp, m,
776				(struct sockaddr *)dst, ro->ro_rt);
777		goto done;
778	}
779	/*
780	 * Too large for interface; fragment if possible.
781	 * Must be able to put at least 8 bytes per fragment.
782	 */
783	if (ip->ip_off & IP_DF) {
784		error = EMSGSIZE;
785		/*
786		 * This case can happen if the user changed the MTU
787		 * of an interface after enabling IP on it.  Because
788		 * most netifs don't keep track of routes pointing to
789		 * them, there is no way for one to update all its
790		 * routes when the MTU is changed.
791		 */
792		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
793		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
794		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
795			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
796		}
797		ipstat.ips_cantfrag++;
798		goto bad;
799	}
800	len = (ifp->if_mtu - hlen) &~ 7;
801	if (len < 8) {
802		error = EMSGSIZE;
803		goto bad;
804	}
805
806    {
807	int mhlen, firstlen = len;
808	struct mbuf **mnext = &m->m_nextpkt;
809
810	/*
811	 * Loop through length of segment after first fragment,
812	 * make new header and copy data of each part and link onto chain.
813	 */
814	m0 = m;
815	mhlen = sizeof (struct ip);
816	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
817		MGETHDR(m, M_DONTWAIT, MT_HEADER);
818		if (m == 0) {
819			error = ENOBUFS;
820			ipstat.ips_odropped++;
821			goto sendorfree;
822		}
823		m->m_flags |= (m0->m_flags & M_MCAST);
824		m->m_data += max_linkhdr;
825		mhip = mtod(m, struct ip *);
826		*mhip = *ip;
827		if (hlen > sizeof (struct ip)) {
828			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
829			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
830		}
831		m->m_len = mhlen;
832		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
833		if (ip->ip_off & IP_MF)
834			mhip->ip_off |= IP_MF;
835		if (off + len >= (u_short)ip->ip_len)
836			len = (u_short)ip->ip_len - off;
837		else
838			mhip->ip_off |= IP_MF;
839		mhip->ip_len = htons((u_short)(len + mhlen));
840		m->m_next = m_copy(m0, off, len);
841		if (m->m_next == 0) {
842			(void) m_free(m);
843			error = ENOBUFS;	/* ??? */
844			ipstat.ips_odropped++;
845			goto sendorfree;
846		}
847		m->m_pkthdr.len = mhlen + len;
848		m->m_pkthdr.rcvif = (struct ifnet *)0;
849		mhip->ip_off = htons((u_short)mhip->ip_off);
850		mhip->ip_sum = 0;
851		if (mhip->ip_vhl == IP_VHL_BORING) {
852			mhip->ip_sum = in_cksum_hdr(mhip);
853		} else {
854			mhip->ip_sum = in_cksum(m, mhlen);
855		}
856		*mnext = m;
857		mnext = &m->m_nextpkt;
858		ipstat.ips_ofragments++;
859	}
860	/*
861	 * Update first fragment by trimming what's been copied out
862	 * and updating header, then send each fragment (in order).
863	 */
864	m = m0;
865	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
866	m->m_pkthdr.len = hlen + firstlen;
867	ip->ip_len = htons((u_short)m->m_pkthdr.len);
868	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
869	ip->ip_sum = 0;
870	if (ip->ip_vhl == IP_VHL_BORING) {
871		ip->ip_sum = in_cksum_hdr(ip);
872	} else {
873		ip->ip_sum = in_cksum(m, hlen);
874	}
875sendorfree:
876	for (m = m0; m; m = m0) {
877		m0 = m->m_nextpkt;
878		m->m_nextpkt = 0;
879		if (error == 0)
880			error = (*ifp->if_output)(ifp, m,
881			    (struct sockaddr *)dst, ro->ro_rt);
882		else
883			m_freem(m);
884	}
885
886	if (error == 0)
887		ipstat.ips_fragmented++;
888    }
889done:
890#ifdef IPSEC
891	if (ro == &iproute && ro->ro_rt) {
892		RTFREE(ro->ro_rt);
893		ro->ro_rt = NULL;
894	}
895	if (sp != NULL) {
896		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
897			printf("DP ip_output call free SP:%p\n", sp));
898		key_freesp(sp);
899	}
900#endif /* IPSEC */
901	return (error);
902bad:
903	m_freem(m0);
904	goto done;
905}
906
907/*
908 * Insert IP options into preformed packet.
909 * Adjust IP destination as required for IP source routing,
910 * as indicated by a non-zero in_addr at the start of the options.
911 *
912 * XXX This routine assumes that the packet has no options in place.
913 */
914static struct mbuf *
915ip_insertoptions(m, opt, phlen)
916	register struct mbuf *m;
917	struct mbuf *opt;
918	int *phlen;
919{
920	register struct ipoption *p = mtod(opt, struct ipoption *);
921	struct mbuf *n;
922	register struct ip *ip = mtod(m, struct ip *);
923	unsigned optlen;
924
925	optlen = opt->m_len - sizeof(p->ipopt_dst);
926	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
927		return (m);		/* XXX should fail */
928	if (p->ipopt_dst.s_addr)
929		ip->ip_dst = p->ipopt_dst;
930	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
931		MGETHDR(n, M_DONTWAIT, MT_HEADER);
932		if (n == 0)
933			return (m);
934		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
935		m->m_len -= sizeof(struct ip);
936		m->m_data += sizeof(struct ip);
937		n->m_next = m;
938		m = n;
939		m->m_len = optlen + sizeof(struct ip);
940		m->m_data += max_linkhdr;
941		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
942	} else {
943		m->m_data -= optlen;
944		m->m_len += optlen;
945		m->m_pkthdr.len += optlen;
946		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
947	}
948	ip = mtod(m, struct ip *);
949	bcopy(p->ipopt_list, ip + 1, optlen);
950	*phlen = sizeof(struct ip) + optlen;
951	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
952	ip->ip_len += optlen;
953	return (m);
954}
955
956/*
957 * Copy options from ip to jp,
958 * omitting those not copied during fragmentation.
959 */
960#if !defined(IPFILTER) && !defined(IPFILTER_LKM)
961static
962#endif
963int
964ip_optcopy(ip, jp)
965	struct ip *ip, *jp;
966{
967	register u_char *cp, *dp;
968	int opt, optlen, cnt;
969
970	cp = (u_char *)(ip + 1);
971	dp = (u_char *)(jp + 1);
972	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
973	for (; cnt > 0; cnt -= optlen, cp += optlen) {
974		opt = cp[0];
975		if (opt == IPOPT_EOL)
976			break;
977		if (opt == IPOPT_NOP) {
978			/* Preserve for IP mcast tunnel's LSRR alignment. */
979			*dp++ = IPOPT_NOP;
980			optlen = 1;
981			continue;
982		} else
983			optlen = cp[IPOPT_OLEN];
984		/* bogus lengths should have been caught by ip_dooptions */
985		if (optlen > cnt)
986			optlen = cnt;
987		if (IPOPT_COPIED(opt)) {
988			bcopy(cp, dp, optlen);
989			dp += optlen;
990		}
991	}
992	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
993		*dp++ = IPOPT_EOL;
994	return (optlen);
995}
996
997/*
998 * IP socket option processing.
999 */
1000int
1001ip_ctloutput(so, sopt)
1002	struct socket *so;
1003	struct sockopt *sopt;
1004{
1005	struct	inpcb *inp = sotoinpcb(so);
1006	int	error, optval;
1007
1008	error = optval = 0;
1009	if (sopt->sopt_level != IPPROTO_IP) {
1010		return (EINVAL);
1011	}
1012
1013	switch (sopt->sopt_dir) {
1014	case SOPT_SET:
1015		switch (sopt->sopt_name) {
1016		case IP_OPTIONS:
1017#ifdef notyet
1018		case IP_RETOPTS:
1019#endif
1020		{
1021			struct mbuf *m;
1022			if (sopt->sopt_valsize > MLEN) {
1023				error = EMSGSIZE;
1024				break;
1025			}
1026			MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
1027			if (m == 0) {
1028				error = ENOBUFS;
1029				break;
1030			}
1031			m->m_len = sopt->sopt_valsize;
1032			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1033					    m->m_len);
1034
1035			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1036					   m));
1037		}
1038
1039		case IP_TOS:
1040		case IP_TTL:
1041		case IP_RECVOPTS:
1042		case IP_RECVRETOPTS:
1043		case IP_RECVDSTADDR:
1044		case IP_RECVIF:
1045#if defined(NFAITH) && NFAITH > 0
1046		case IP_FAITH:
1047#endif
1048			error = sooptcopyin(sopt, &optval, sizeof optval,
1049					    sizeof optval);
1050			if (error)
1051				break;
1052
1053			switch (sopt->sopt_name) {
1054			case IP_TOS:
1055				inp->inp_ip_tos = optval;
1056				break;
1057
1058			case IP_TTL:
1059				inp->inp_ip_ttl = optval;
1060				break;
1061#define	OPTSET(bit) \
1062	if (optval) \
1063		inp->inp_flags |= bit; \
1064	else \
1065		inp->inp_flags &= ~bit;
1066
1067			case IP_RECVOPTS:
1068				OPTSET(INP_RECVOPTS);
1069				break;
1070
1071			case IP_RECVRETOPTS:
1072				OPTSET(INP_RECVRETOPTS);
1073				break;
1074
1075			case IP_RECVDSTADDR:
1076				OPTSET(INP_RECVDSTADDR);
1077				break;
1078
1079			case IP_RECVIF:
1080				OPTSET(INP_RECVIF);
1081				break;
1082
1083#if defined(NFAITH) && NFAITH > 0
1084			case IP_FAITH:
1085				OPTSET(INP_FAITH);
1086				break;
1087#endif
1088			}
1089			break;
1090#undef OPTSET
1091
1092		case IP_MULTICAST_IF:
1093		case IP_MULTICAST_VIF:
1094		case IP_MULTICAST_TTL:
1095		case IP_MULTICAST_LOOP:
1096		case IP_ADD_MEMBERSHIP:
1097		case IP_DROP_MEMBERSHIP:
1098			error = ip_setmoptions(sopt, &inp->inp_moptions);
1099			break;
1100
1101		case IP_PORTRANGE:
1102			error = sooptcopyin(sopt, &optval, sizeof optval,
1103					    sizeof optval);
1104			if (error)
1105				break;
1106
1107			switch (optval) {
1108			case IP_PORTRANGE_DEFAULT:
1109				inp->inp_flags &= ~(INP_LOWPORT);
1110				inp->inp_flags &= ~(INP_HIGHPORT);
1111				break;
1112
1113			case IP_PORTRANGE_HIGH:
1114				inp->inp_flags &= ~(INP_LOWPORT);
1115				inp->inp_flags |= INP_HIGHPORT;
1116				break;
1117
1118			case IP_PORTRANGE_LOW:
1119				inp->inp_flags &= ~(INP_HIGHPORT);
1120				inp->inp_flags |= INP_LOWPORT;
1121				break;
1122
1123			default:
1124				error = EINVAL;
1125				break;
1126			}
1127			break;
1128
1129#ifdef IPSEC
1130		case IP_IPSEC_POLICY:
1131		{
1132			caddr_t req;
1133			int priv;
1134			struct mbuf *m;
1135			int optname;
1136
1137			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1138				break;
1139			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1140				break;
1141			priv = (sopt->sopt_p != NULL &&
1142				suser(sopt->sopt_p) != 0) ? 0 : 1;
1143			req = mtod(m, caddr_t);
1144			optname = sopt->sopt_name;
1145			error = ipsec4_set_policy(inp, optname, req, priv);
1146			m_freem(m);
1147			break;
1148		}
1149#endif /*IPSEC*/
1150
1151		default:
1152			error = ENOPROTOOPT;
1153			break;
1154		}
1155		break;
1156
1157	case SOPT_GET:
1158		switch (sopt->sopt_name) {
1159		case IP_OPTIONS:
1160		case IP_RETOPTS:
1161			if (inp->inp_options)
1162				error = sooptcopyout(sopt,
1163						     mtod(inp->inp_options,
1164							  char *),
1165						     inp->inp_options->m_len);
1166			else
1167				sopt->sopt_valsize = 0;
1168			break;
1169
1170		case IP_TOS:
1171		case IP_TTL:
1172		case IP_RECVOPTS:
1173		case IP_RECVRETOPTS:
1174		case IP_RECVDSTADDR:
1175		case IP_RECVIF:
1176		case IP_PORTRANGE:
1177#if defined(NFAITH) && NFAITH > 0
1178		case IP_FAITH:
1179#endif
1180			switch (sopt->sopt_name) {
1181
1182			case IP_TOS:
1183				optval = inp->inp_ip_tos;
1184				break;
1185
1186			case IP_TTL:
1187				optval = inp->inp_ip_ttl;
1188				break;
1189
1190#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1191
1192			case IP_RECVOPTS:
1193				optval = OPTBIT(INP_RECVOPTS);
1194				break;
1195
1196			case IP_RECVRETOPTS:
1197				optval = OPTBIT(INP_RECVRETOPTS);
1198				break;
1199
1200			case IP_RECVDSTADDR:
1201				optval = OPTBIT(INP_RECVDSTADDR);
1202				break;
1203
1204			case IP_RECVIF:
1205				optval = OPTBIT(INP_RECVIF);
1206				break;
1207
1208			case IP_PORTRANGE:
1209				if (inp->inp_flags & INP_HIGHPORT)
1210					optval = IP_PORTRANGE_HIGH;
1211				else if (inp->inp_flags & INP_LOWPORT)
1212					optval = IP_PORTRANGE_LOW;
1213				else
1214					optval = 0;
1215				break;
1216
1217#if defined(NFAITH) && NFAITH > 0
1218			case IP_FAITH:
1219				optval = OPTBIT(INP_FAITH);
1220				break;
1221#endif
1222			}
1223			error = sooptcopyout(sopt, &optval, sizeof optval);
1224			break;
1225
1226		case IP_MULTICAST_IF:
1227		case IP_MULTICAST_VIF:
1228		case IP_MULTICAST_TTL:
1229		case IP_MULTICAST_LOOP:
1230		case IP_ADD_MEMBERSHIP:
1231		case IP_DROP_MEMBERSHIP:
1232			error = ip_getmoptions(sopt, inp->inp_moptions);
1233			break;
1234
1235#ifdef IPSEC
1236		case IP_IPSEC_POLICY:
1237		{
1238			struct mbuf *m;
1239			caddr_t req = NULL;
1240
1241			if (m != 0)
1242				req = mtod(m, caddr_t);
1243			error = ipsec4_get_policy(sotoinpcb(so), req, &m);
1244			if (error == 0)
1245				error = soopt_mcopyout(sopt, m); /* XXX */
1246			m_freem(m);
1247			break;
1248		}
1249#endif /*IPSEC*/
1250
1251		default:
1252			error = ENOPROTOOPT;
1253			break;
1254		}
1255		break;
1256	}
1257	return (error);
1258}
1259
1260/*
1261 * Set up IP options in pcb for insertion in output packets.
1262 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1263 * with destination address if source routed.
1264 */
1265static int
1266ip_pcbopts(optname, pcbopt, m)
1267	int optname;
1268	struct mbuf **pcbopt;
1269	register struct mbuf *m;
1270{
1271	register int cnt, optlen;
1272	register u_char *cp;
1273	u_char opt;
1274
1275	/* turn off any old options */
1276	if (*pcbopt)
1277		(void)m_free(*pcbopt);
1278	*pcbopt = 0;
1279	if (m == (struct mbuf *)0 || m->m_len == 0) {
1280		/*
1281		 * Only turning off any previous options.
1282		 */
1283		if (m)
1284			(void)m_free(m);
1285		return (0);
1286	}
1287
1288#ifndef	vax
1289	if (m->m_len % sizeof(int32_t))
1290		goto bad;
1291#endif
1292	/*
1293	 * IP first-hop destination address will be stored before
1294	 * actual options; move other options back
1295	 * and clear it when none present.
1296	 */
1297	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1298		goto bad;
1299	cnt = m->m_len;
1300	m->m_len += sizeof(struct in_addr);
1301	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1302	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1303	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1304
1305	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1306		opt = cp[IPOPT_OPTVAL];
1307		if (opt == IPOPT_EOL)
1308			break;
1309		if (opt == IPOPT_NOP)
1310			optlen = 1;
1311		else {
1312			optlen = cp[IPOPT_OLEN];
1313			if (optlen <= IPOPT_OLEN || optlen > cnt)
1314				goto bad;
1315		}
1316		switch (opt) {
1317
1318		default:
1319			break;
1320
1321		case IPOPT_LSRR:
1322		case IPOPT_SSRR:
1323			/*
1324			 * user process specifies route as:
1325			 *	->A->B->C->D
1326			 * D must be our final destination (but we can't
1327			 * check that since we may not have connected yet).
1328			 * A is first hop destination, which doesn't appear in
1329			 * actual IP option, but is stored before the options.
1330			 */
1331			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1332				goto bad;
1333			m->m_len -= sizeof(struct in_addr);
1334			cnt -= sizeof(struct in_addr);
1335			optlen -= sizeof(struct in_addr);
1336			cp[IPOPT_OLEN] = optlen;
1337			/*
1338			 * Move first hop before start of options.
1339			 */
1340			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1341			    sizeof(struct in_addr));
1342			/*
1343			 * Then copy rest of options back
1344			 * to close up the deleted entry.
1345			 */
1346			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1347			    sizeof(struct in_addr)),
1348			    (caddr_t)&cp[IPOPT_OFFSET+1],
1349			    (unsigned)cnt + sizeof(struct in_addr));
1350			break;
1351		}
1352	}
1353	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1354		goto bad;
1355	*pcbopt = m;
1356	return (0);
1357
1358bad:
1359	(void)m_free(m);
1360	return (EINVAL);
1361}
1362
1363/*
1364 * XXX
1365 * The whole multicast option thing needs to be re-thought.
1366 * Several of these options are equally applicable to non-multicast
1367 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1368 * standard option (IP_TTL).
1369 */
1370/*
1371 * Set the IP multicast options in response to user setsockopt().
1372 */
1373static int
1374ip_setmoptions(sopt, imop)
1375	struct sockopt *sopt;
1376	struct ip_moptions **imop;
1377{
1378	int error = 0;
1379	int i;
1380	struct in_addr addr;
1381	struct ip_mreq mreq;
1382	struct ifnet *ifp;
1383	struct ip_moptions *imo = *imop;
1384	struct route ro;
1385	struct sockaddr_in *dst;
1386	int s;
1387
1388	if (imo == NULL) {
1389		/*
1390		 * No multicast option buffer attached to the pcb;
1391		 * allocate one and initialize to default values.
1392		 */
1393		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1394		    M_WAITOK);
1395
1396		if (imo == NULL)
1397			return (ENOBUFS);
1398		*imop = imo;
1399		imo->imo_multicast_ifp = NULL;
1400		imo->imo_multicast_vif = -1;
1401		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1402		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1403		imo->imo_num_memberships = 0;
1404	}
1405
1406	switch (sopt->sopt_name) {
1407	/* store an index number for the vif you wanna use in the send */
1408	case IP_MULTICAST_VIF:
1409		if (legal_vif_num == 0) {
1410			error = EOPNOTSUPP;
1411			break;
1412		}
1413		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1414		if (error)
1415			break;
1416		if (!legal_vif_num(i) && (i != -1)) {
1417			error = EINVAL;
1418			break;
1419		}
1420		imo->imo_multicast_vif = i;
1421		break;
1422
1423	case IP_MULTICAST_IF:
1424		/*
1425		 * Select the interface for outgoing multicast packets.
1426		 */
1427		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1428		if (error)
1429			break;
1430		/*
1431		 * INADDR_ANY is used to remove a previous selection.
1432		 * When no interface is selected, a default one is
1433		 * chosen every time a multicast packet is sent.
1434		 */
1435		if (addr.s_addr == INADDR_ANY) {
1436			imo->imo_multicast_ifp = NULL;
1437			break;
1438		}
1439		/*
1440		 * The selected interface is identified by its local
1441		 * IP address.  Find the interface and confirm that
1442		 * it supports multicasting.
1443		 */
1444		s = splimp();
1445		INADDR_TO_IFP(addr, ifp);
1446		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1447			splx(s);
1448			error = EADDRNOTAVAIL;
1449			break;
1450		}
1451		imo->imo_multicast_ifp = ifp;
1452		splx(s);
1453		break;
1454
1455	case IP_MULTICAST_TTL:
1456		/*
1457		 * Set the IP time-to-live for outgoing multicast packets.
1458		 * The original multicast API required a char argument,
1459		 * which is inconsistent with the rest of the socket API.
1460		 * We allow either a char or an int.
1461		 */
1462		if (sopt->sopt_valsize == 1) {
1463			u_char ttl;
1464			error = sooptcopyin(sopt, &ttl, 1, 1);
1465			if (error)
1466				break;
1467			imo->imo_multicast_ttl = ttl;
1468		} else {
1469			u_int ttl;
1470			error = sooptcopyin(sopt, &ttl, sizeof ttl,
1471					    sizeof ttl);
1472			if (error)
1473				break;
1474			if (ttl > 255)
1475				error = EINVAL;
1476			else
1477				imo->imo_multicast_ttl = ttl;
1478		}
1479		break;
1480
1481	case IP_MULTICAST_LOOP:
1482		/*
1483		 * Set the loopback flag for outgoing multicast packets.
1484		 * Must be zero or one.  The original multicast API required a
1485		 * char argument, which is inconsistent with the rest
1486		 * of the socket API.  We allow either a char or an int.
1487		 */
1488		if (sopt->sopt_valsize == 1) {
1489			u_char loop;
1490			error = sooptcopyin(sopt, &loop, 1, 1);
1491			if (error)
1492				break;
1493			imo->imo_multicast_loop = !!loop;
1494		} else {
1495			u_int loop;
1496			error = sooptcopyin(sopt, &loop, sizeof loop,
1497					    sizeof loop);
1498			if (error)
1499				break;
1500			imo->imo_multicast_loop = !!loop;
1501		}
1502		break;
1503
1504	case IP_ADD_MEMBERSHIP:
1505		/*
1506		 * Add a multicast group membership.
1507		 * Group must be a valid IP multicast address.
1508		 */
1509		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1510		if (error)
1511			break;
1512
1513		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1514			error = EINVAL;
1515			break;
1516		}
1517		s = splimp();
1518		/*
1519		 * If no interface address was provided, use the interface of
1520		 * the route to the given multicast address.
1521		 */
1522		if (mreq.imr_interface.s_addr == INADDR_ANY) {
1523			bzero((caddr_t)&ro, sizeof(ro));
1524			dst = (struct sockaddr_in *)&ro.ro_dst;
1525			dst->sin_len = sizeof(*dst);
1526			dst->sin_family = AF_INET;
1527			dst->sin_addr = mreq.imr_multiaddr;
1528			rtalloc(&ro);
1529			if (ro.ro_rt == NULL) {
1530				error = EADDRNOTAVAIL;
1531				splx(s);
1532				break;
1533			}
1534			ifp = ro.ro_rt->rt_ifp;
1535			rtfree(ro.ro_rt);
1536		}
1537		else {
1538			INADDR_TO_IFP(mreq.imr_interface, ifp);
1539		}
1540
1541		/*
1542		 * See if we found an interface, and confirm that it
1543		 * supports multicast.
1544		 */
1545		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1546			error = EADDRNOTAVAIL;
1547			splx(s);
1548			break;
1549		}
1550		/*
1551		 * See if the membership already exists or if all the
1552		 * membership slots are full.
1553		 */
1554		for (i = 0; i < imo->imo_num_memberships; ++i) {
1555			if (imo->imo_membership[i]->inm_ifp == ifp &&
1556			    imo->imo_membership[i]->inm_addr.s_addr
1557						== mreq.imr_multiaddr.s_addr)
1558				break;
1559		}
1560		if (i < imo->imo_num_memberships) {
1561			error = EADDRINUSE;
1562			splx(s);
1563			break;
1564		}
1565		if (i == IP_MAX_MEMBERSHIPS) {
1566			error = ETOOMANYREFS;
1567			splx(s);
1568			break;
1569		}
1570		/*
1571		 * Everything looks good; add a new record to the multicast
1572		 * address list for the given interface.
1573		 */
1574		if ((imo->imo_membership[i] =
1575		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1576			error = ENOBUFS;
1577			splx(s);
1578			break;
1579		}
1580		++imo->imo_num_memberships;
1581		splx(s);
1582		break;
1583
1584	case IP_DROP_MEMBERSHIP:
1585		/*
1586		 * Drop a multicast group membership.
1587		 * Group must be a valid IP multicast address.
1588		 */
1589		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1590		if (error)
1591			break;
1592
1593		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1594			error = EINVAL;
1595			break;
1596		}
1597
1598		s = splimp();
1599		/*
1600		 * If an interface address was specified, get a pointer
1601		 * to its ifnet structure.
1602		 */
1603		if (mreq.imr_interface.s_addr == INADDR_ANY)
1604			ifp = NULL;
1605		else {
1606			INADDR_TO_IFP(mreq.imr_interface, ifp);
1607			if (ifp == NULL) {
1608				error = EADDRNOTAVAIL;
1609				splx(s);
1610				break;
1611			}
1612		}
1613		/*
1614		 * Find the membership in the membership array.
1615		 */
1616		for (i = 0; i < imo->imo_num_memberships; ++i) {
1617			if ((ifp == NULL ||
1618			     imo->imo_membership[i]->inm_ifp == ifp) &&
1619			     imo->imo_membership[i]->inm_addr.s_addr ==
1620			     mreq.imr_multiaddr.s_addr)
1621				break;
1622		}
1623		if (i == imo->imo_num_memberships) {
1624			error = EADDRNOTAVAIL;
1625			splx(s);
1626			break;
1627		}
1628		/*
1629		 * Give up the multicast address record to which the
1630		 * membership points.
1631		 */
1632		in_delmulti(imo->imo_membership[i]);
1633		/*
1634		 * Remove the gap in the membership array.
1635		 */
1636		for (++i; i < imo->imo_num_memberships; ++i)
1637			imo->imo_membership[i-1] = imo->imo_membership[i];
1638		--imo->imo_num_memberships;
1639		splx(s);
1640		break;
1641
1642	default:
1643		error = EOPNOTSUPP;
1644		break;
1645	}
1646
1647	/*
1648	 * If all options have default values, no need to keep the mbuf.
1649	 */
1650	if (imo->imo_multicast_ifp == NULL &&
1651	    imo->imo_multicast_vif == -1 &&
1652	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1653	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1654	    imo->imo_num_memberships == 0) {
1655		free(*imop, M_IPMOPTS);
1656		*imop = NULL;
1657	}
1658
1659	return (error);
1660}
1661
1662/*
1663 * Return the IP multicast options in response to user getsockopt().
1664 */
1665static int
1666ip_getmoptions(sopt, imo)
1667	struct sockopt *sopt;
1668	register struct ip_moptions *imo;
1669{
1670	struct in_addr addr;
1671	struct in_ifaddr *ia;
1672	int error, optval;
1673	u_char coptval;
1674
1675	error = 0;
1676	switch (sopt->sopt_name) {
1677	case IP_MULTICAST_VIF:
1678		if (imo != NULL)
1679			optval = imo->imo_multicast_vif;
1680		else
1681			optval = -1;
1682		error = sooptcopyout(sopt, &optval, sizeof optval);
1683		break;
1684
1685	case IP_MULTICAST_IF:
1686		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1687			addr.s_addr = INADDR_ANY;
1688		else {
1689			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1690			addr.s_addr = (ia == NULL) ? INADDR_ANY
1691				: IA_SIN(ia)->sin_addr.s_addr;
1692		}
1693		error = sooptcopyout(sopt, &addr, sizeof addr);
1694		break;
1695
1696	case IP_MULTICAST_TTL:
1697		if (imo == 0)
1698			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1699		else
1700			optval = coptval = imo->imo_multicast_ttl;
1701		if (sopt->sopt_valsize == 1)
1702			error = sooptcopyout(sopt, &coptval, 1);
1703		else
1704			error = sooptcopyout(sopt, &optval, sizeof optval);
1705		break;
1706
1707	case IP_MULTICAST_LOOP:
1708		if (imo == 0)
1709			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1710		else
1711			optval = coptval = imo->imo_multicast_loop;
1712		if (sopt->sopt_valsize == 1)
1713			error = sooptcopyout(sopt, &coptval, 1);
1714		else
1715			error = sooptcopyout(sopt, &optval, sizeof optval);
1716		break;
1717
1718	default:
1719		error = ENOPROTOOPT;
1720		break;
1721	}
1722	return (error);
1723}
1724
1725/*
1726 * Discard the IP multicast options.
1727 */
1728void
1729ip_freemoptions(imo)
1730	register struct ip_moptions *imo;
1731{
1732	register int i;
1733
1734	if (imo != NULL) {
1735		for (i = 0; i < imo->imo_num_memberships; ++i)
1736			in_delmulti(imo->imo_membership[i]);
1737		free(imo, M_IPMOPTS);
1738	}
1739}
1740
1741/*
1742 * Routine called from ip_output() to loop back a copy of an IP multicast
1743 * packet to the input queue of a specified interface.  Note that this
1744 * calls the output routine of the loopback "driver", but with an interface
1745 * pointer that might NOT be a loopback interface -- evil, but easier than
1746 * replicating that code here.
1747 */
1748static void
1749ip_mloopback(ifp, m, dst, hlen)
1750	struct ifnet *ifp;
1751	register struct mbuf *m;
1752	register struct sockaddr_in *dst;
1753	int hlen;
1754{
1755	register struct ip *ip;
1756	struct mbuf *copym;
1757
1758	copym = m_copy(m, 0, M_COPYALL);
1759	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1760		copym = m_pullup(copym, hlen);
1761	if (copym != NULL) {
1762		/*
1763		 * We don't bother to fragment if the IP length is greater
1764		 * than the interface's MTU.  Can this possibly matter?
1765		 */
1766		ip = mtod(copym, struct ip *);
1767		ip->ip_len = htons((u_short)ip->ip_len);
1768		ip->ip_off = htons((u_short)ip->ip_off);
1769		ip->ip_sum = 0;
1770		if (ip->ip_vhl == IP_VHL_BORING) {
1771			ip->ip_sum = in_cksum_hdr(ip);
1772		} else {
1773			ip->ip_sum = in_cksum(copym, hlen);
1774		}
1775		/*
1776		 * NB:
1777		 * It's not clear whether there are any lingering
1778		 * reentrancy problems in other areas which might
1779		 * be exposed by using ip_input directly (in
1780		 * particular, everything which modifies the packet
1781		 * in-place).  Yet another option is using the
1782		 * protosw directly to deliver the looped back
1783		 * packet.  For the moment, we'll err on the side
1784		 * of safety by using if_simloop().
1785		 */
1786#if 1 /* XXX */
1787		if (dst->sin_family != AF_INET) {
1788			printf("ip_mloopback: bad address family %d\n",
1789						dst->sin_family);
1790			dst->sin_family = AF_INET;
1791		}
1792#endif
1793
1794#ifdef notdef
1795		copym->m_pkthdr.rcvif = ifp;
1796		ip_input(copym);
1797#else
1798		if_simloop(ifp, copym, (struct sockaddr *)dst, 0);
1799#endif
1800	}
1801}
1802