ip_output.c revision 54175
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 * $FreeBSD: head/sys/netinet/ip_output.c 54175 1999-12-06 00:43:07Z archie $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdn.h"
41#include "opt_ipdivert.h"
42#include "opt_ipfilter.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#include <sys/protosw.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52
53#include <net/if.h>
54#include <net/route.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#include <netinet/in_pcb.h>
60#include <netinet/in_var.h>
61#include <netinet/ip_var.h>
62
63#ifdef vax
64#include <machine/mtpr.h>
65#endif
66#include <machine/in_cksum.h>
67
68static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
69
70#include <netinet/ip_fw.h>
71
72#ifdef DUMMYNET
73#include <netinet/ip_dummynet.h>
74#endif
75
76#ifdef IPFIREWALL_FORWARD_DEBUG
77#define print_ip(a)	 printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
78				 		  (ntohl(a.s_addr)>>16)&0xFF,\
79						  (ntohl(a.s_addr)>>8)&0xFF,\
80						  (ntohl(a.s_addr))&0xFF);
81#endif
82
83u_short ip_id;
84
85static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
86static void	ip_mloopback
87	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
88static int	ip_getmoptions
89	__P((struct sockopt *, struct ip_moptions *));
90static int	ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
91static int	ip_setmoptions
92	__P((struct sockopt *, struct ip_moptions **));
93
94#if defined(IPFILTER_LKM) || defined(IPFILTER)
95int	ip_optcopy __P((struct ip *, struct ip *));
96extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
97#else
98static int	ip_optcopy __P((struct ip *, struct ip *));
99#endif
100
101
102extern	struct protosw inetsw[];
103
104/*
105 * IP output.  The packet in mbuf chain m contains a skeletal IP
106 * header (with len, off, ttl, proto, tos, src, dst).
107 * The mbuf chain containing the packet will be freed.
108 * The mbuf opt, if present, will not be freed.
109 */
110int
111ip_output(m0, opt, ro, flags, imo)
112	struct mbuf *m0;
113	struct mbuf *opt;
114	struct route *ro;
115	int flags;
116	struct ip_moptions *imo;
117{
118	struct ip *ip, *mhip;
119	struct ifnet *ifp;
120	struct mbuf *m = m0;
121	int hlen = sizeof (struct ip);
122	int len, off, error = 0;
123	struct sockaddr_in *dst;
124	struct in_ifaddr *ia;
125	int isbroadcast;
126	u_int16_t divert_cookie;		/* firewall cookie */
127#ifdef IPFIREWALL_FORWARD
128	int fwd_rewrite_src = 0;
129#endif
130	struct ip_fw_chain *rule = NULL;
131
132#ifdef IPDIVERT
133	/* Get and reset firewall cookie */
134	divert_cookie = ip_divert_cookie;
135	ip_divert_cookie = 0;
136#else
137	divert_cookie = 0;
138#endif
139
140#if defined(IPFIREWALL) && defined(DUMMYNET)
141        /*
142         * dummynet packet are prepended a vestigial mbuf with
143         * m_type = MT_DUMMYNET and m_data pointing to the matching
144         * rule.
145         */
146        if (m->m_type == MT_DUMMYNET) {
147            /*
148             * the packet was already tagged, so part of the
149             * processing was already done, and we need to go down.
150             * opt, flags and imo have already been used, and now
151             * they are used to hold ifp, dst and NULL, respectively.
152             */
153            rule = (struct ip_fw_chain *)(m->m_data) ;
154            m0 = m = m->m_next ;
155            ip = mtod(m, struct ip *);
156            dst = (struct sockaddr_in *)flags ;
157            ifp = (struct ifnet *)opt;
158            hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
159            opt = NULL ;
160            flags = 0 ; /* XXX is this correct ? */
161            goto sendit;
162        } else
163            rule = NULL ;
164#endif
165
166#ifdef	DIAGNOSTIC
167	if ((m->m_flags & M_PKTHDR) == 0)
168		panic("ip_output no HDR");
169	if (!ro)
170		panic("ip_output no route, proto = %d",
171		      mtod(m, struct ip *)->ip_p);
172#endif
173	if (opt) {
174		m = ip_insertoptions(m, opt, &len);
175		hlen = len;
176	}
177	ip = mtod(m, struct ip *);
178	/*
179	 * Fill in IP header.
180	 */
181	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
182		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
183		ip->ip_off &= IP_DF;
184		ip->ip_id = htons(ip_id++);
185		ipstat.ips_localout++;
186	} else {
187		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
188	}
189
190	dst = (struct sockaddr_in *)&ro->ro_dst;
191	/*
192	 * If there is a cached route,
193	 * check that it is to the same destination
194	 * and is still up.  If not, free it and try again.
195	 */
196	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
197	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
198		RTFREE(ro->ro_rt);
199		ro->ro_rt = (struct rtentry *)0;
200	}
201	if (ro->ro_rt == 0) {
202		dst->sin_family = AF_INET;
203		dst->sin_len = sizeof(*dst);
204		dst->sin_addr = ip->ip_dst;
205	}
206	/*
207	 * If routing to interface only,
208	 * short circuit routing lookup.
209	 */
210#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
211#define sintosa(sin)	((struct sockaddr *)(sin))
212	if (flags & IP_ROUTETOIF) {
213		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
214		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
215			ipstat.ips_noroute++;
216			error = ENETUNREACH;
217			goto bad;
218		}
219		ifp = ia->ia_ifp;
220		ip->ip_ttl = 1;
221		isbroadcast = in_broadcast(dst->sin_addr, ifp);
222	} else {
223		/*
224		 * If this is the case, we probably don't want to allocate
225		 * a protocol-cloned route since we didn't get one from the
226		 * ULP.  This lets TCP do its thing, while not burdening
227		 * forwarding or ICMP with the overhead of cloning a route.
228		 * Of course, we still want to do any cloning requested by
229		 * the link layer, as this is probably required in all cases
230		 * for correct operation (as it is for ARP).
231		 */
232		if (ro->ro_rt == 0)
233			rtalloc_ign(ro, RTF_PRCLONING);
234		if (ro->ro_rt == 0) {
235			ipstat.ips_noroute++;
236			error = EHOSTUNREACH;
237			goto bad;
238		}
239		ia = ifatoia(ro->ro_rt->rt_ifa);
240		ifp = ro->ro_rt->rt_ifp;
241		ro->ro_rt->rt_use++;
242		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
243			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
244		if (ro->ro_rt->rt_flags & RTF_HOST)
245			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
246		else
247			isbroadcast = in_broadcast(dst->sin_addr, ifp);
248	}
249	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
250		struct in_multi *inm;
251
252		m->m_flags |= M_MCAST;
253		/*
254		 * IP destination address is multicast.  Make sure "dst"
255		 * still points to the address in "ro".  (It may have been
256		 * changed to point to a gateway address, above.)
257		 */
258		dst = (struct sockaddr_in *)&ro->ro_dst;
259		/*
260		 * See if the caller provided any multicast options
261		 */
262		if (imo != NULL) {
263			ip->ip_ttl = imo->imo_multicast_ttl;
264			if (imo->imo_multicast_ifp != NULL)
265				ifp = imo->imo_multicast_ifp;
266			if (imo->imo_multicast_vif != -1)
267				ip->ip_src.s_addr =
268				    ip_mcast_src(imo->imo_multicast_vif);
269		} else
270			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
271		/*
272		 * Confirm that the outgoing interface supports multicast.
273		 */
274		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
275			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
276				ipstat.ips_noroute++;
277				error = ENETUNREACH;
278				goto bad;
279			}
280		}
281		/*
282		 * If source address not specified yet, use address
283		 * of outgoing interface.
284		 */
285		if (ip->ip_src.s_addr == INADDR_ANY) {
286			register struct in_ifaddr *ia1;
287
288			for (ia1 = in_ifaddrhead.tqh_first; ia1;
289			     ia1 = ia1->ia_link.tqe_next)
290				if (ia1->ia_ifp == ifp) {
291					ip->ip_src = IA_SIN(ia1)->sin_addr;
292					break;
293				}
294		}
295
296		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
297		if (inm != NULL &&
298		   (imo == NULL || imo->imo_multicast_loop)) {
299			/*
300			 * If we belong to the destination multicast group
301			 * on the outgoing interface, and the caller did not
302			 * forbid loopback, loop back a copy.
303			 */
304			ip_mloopback(ifp, m, dst, hlen);
305		}
306		else {
307			/*
308			 * If we are acting as a multicast router, perform
309			 * multicast forwarding as if the packet had just
310			 * arrived on the interface to which we are about
311			 * to send.  The multicast forwarding function
312			 * recursively calls this function, using the
313			 * IP_FORWARDING flag to prevent infinite recursion.
314			 *
315			 * Multicasts that are looped back by ip_mloopback(),
316			 * above, will be forwarded by the ip_input() routine,
317			 * if necessary.
318			 */
319			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
320				/*
321				 * Check if rsvp daemon is running. If not, don't
322				 * set ip_moptions. This ensures that the packet
323				 * is multicast and not just sent down one link
324				 * as prescribed by rsvpd.
325				 */
326				if (!rsvp_on)
327				  imo = NULL;
328				if (ip_mforward(ip, ifp, m, imo) != 0) {
329					m_freem(m);
330					goto done;
331				}
332			}
333		}
334
335		/*
336		 * Multicasts with a time-to-live of zero may be looped-
337		 * back, above, but must not be transmitted on a network.
338		 * Also, multicasts addressed to the loopback interface
339		 * are not sent -- the above call to ip_mloopback() will
340		 * loop back a copy if this host actually belongs to the
341		 * destination group on the loopback interface.
342		 */
343		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
344			m_freem(m);
345			goto done;
346		}
347
348		goto sendit;
349	}
350#ifndef notdef
351	/*
352	 * If source address not specified yet, use address
353	 * of outgoing interface.
354	 */
355	if (ip->ip_src.s_addr == INADDR_ANY) {
356		ip->ip_src = IA_SIN(ia)->sin_addr;
357#ifdef IPFIREWALL_FORWARD
358		/* Keep note that we did this - if the firewall changes
359		 * the next-hop, our interface may change, changing the
360		 * default source IP. It's a shame so much effort happens
361		 * twice. Oh well.
362		 */
363		fwd_rewrite_src++;
364#endif /* IPFIREWALL_FORWARD */
365	}
366#endif /* notdef */
367	/*
368	 * Verify that we have any chance at all of being able to queue
369	 *      the packet or packet fragments
370	 */
371	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
372		ifp->if_snd.ifq_maxlen) {
373			error = ENOBUFS;
374			goto bad;
375	}
376
377	/*
378	 * Look for broadcast address and
379	 * and verify user is allowed to send
380	 * such a packet.
381	 */
382	if (isbroadcast) {
383		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
384			error = EADDRNOTAVAIL;
385			goto bad;
386		}
387		if ((flags & IP_ALLOWBROADCAST) == 0) {
388			error = EACCES;
389			goto bad;
390		}
391		/* don't allow broadcast messages to be fragmented */
392		if ((u_short)ip->ip_len > ifp->if_mtu) {
393			error = EMSGSIZE;
394			goto bad;
395		}
396		m->m_flags |= M_BCAST;
397	} else {
398		m->m_flags &= ~M_BCAST;
399	}
400
401sendit:
402	/*
403	 * IpHack's section.
404	 * - Xlate: translate packet's addr/port (NAT).
405	 * - Firewall: deny/allow/etc.
406	 * - Wrap: fake packet's addr/port <unimpl.>
407	 * - Encapsulate: put it in another IP and send out. <unimp.>
408	 */
409#if defined(IPFILTER) || defined(IPFILTER_LKM)
410	if (fr_checkp) {
411		struct  mbuf    *m1 = m;
412
413		if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
414			goto done;
415		ip = mtod(m = m1, struct ip *);
416	}
417#endif
418
419	/*
420	 * Check with the firewall...
421	 */
422	if (ip_fw_chk_ptr) {
423		struct sockaddr_in *old = dst;
424
425		off = (*ip_fw_chk_ptr)(&ip,
426		    hlen, ifp, &divert_cookie, &m, &rule, &dst);
427                /*
428                 * On return we must do the following:
429                 * m == NULL         -> drop the pkt
430                 * 1<=off<= 0xffff   -> DIVERT
431                 * (off & 0x10000)   -> send to a DUMMYNET pipe
432                 * (off & 0x20000)   -> TEE the packet
433                 * dst != old        -> IPFIREWALL_FORWARD
434                 * off==0, dst==old  -> accept
435                 * If some of the above modules is not compiled in, then
436                 * we should't have to check the corresponding condition
437                 * (because the ipfw control socket should not accept
438                 * unsupported rules), but better play safe and drop
439                 * packets in case of doubt.
440                 */
441		if (!m) { /* firewall said to reject */
442			error = EACCES;
443			goto done;
444		}
445		if (off == 0 && dst == old) /* common case */
446			goto pass ;
447#ifdef DUMMYNET
448                if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
449                    /*
450                     * pass the pkt to dummynet. Need to include
451                     * pipe number, m, ifp, ro, dst because these are
452                     * not recomputed in the next pass.
453                     * All other parameters have been already used and
454                     * so they are not needed anymore.
455                     * XXX note: if the ifp or ro entry are deleted
456                     * while a pkt is in dummynet, we are in trouble!
457                     */
458                    dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,dst,rule);
459			goto done;
460		}
461#endif
462#ifdef IPDIVERT
463		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
464			struct mbuf *clone = NULL;
465
466			/* Clone packet if we're doing a 'tee' */
467			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
468				clone = m_dup(m, M_DONTWAIT);
469
470			/* Restore packet header fields to original values */
471			HTONS(ip->ip_len);
472			HTONS(ip->ip_off);
473
474			/* Deliver packet to divert input routine */
475			ip_divert_cookie = divert_cookie;
476			divert_packet(m, 0, off & 0xffff);
477
478			/* If 'tee', continue with original packet */
479			if (clone != NULL) {
480				m = clone;
481				ip = mtod(m, struct ip *);
482				goto pass;
483			}
484			goto done;
485		}
486#endif
487
488#ifdef IPFIREWALL_FORWARD
489		/* Here we check dst to make sure it's directly reachable on the
490		 * interface we previously thought it was.
491		 * If it isn't (which may be likely in some situations) we have
492		 * to re-route it (ie, find a route for the next-hop and the
493		 * associated interface) and set them here. This is nested
494		 * forwarding which in most cases is undesirable, except where
495		 * such control is nigh impossible. So we do it here.
496		 * And I'm babbling.
497		 */
498		if (off == 0 && old != dst) {
499			struct in_ifaddr *ia;
500
501			/* It's changed... */
502			/* There must be a better way to do this next line... */
503			static struct route sro_fwd, *ro_fwd = &sro_fwd;
504#ifdef IPFIREWALL_FORWARD_DEBUG
505			printf("IPFIREWALL_FORWARD: New dst ip: ");
506			print_ip(dst->sin_addr);
507			printf("\n");
508#endif
509			/*
510			 * We need to figure out if we have been forwarded
511			 * to a local socket. If so then we should somehow
512			 * "loop back" to ip_input, and get directed to the
513			 * PCB as if we had received this packet. This is
514			 * because it may be dificult to identify the packets
515			 * you want to forward until they are being output
516			 * and have selected an interface. (e.g. locally
517			 * initiated packets) If we used the loopback inteface,
518			 * we would not be able to control what happens
519			 * as the packet runs through ip_input() as
520			 * it is done through a ISR.
521			 */
522			for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
523					ia = TAILQ_NEXT(ia, ia_link)) {
524				/*
525				 * If the addr to forward to is one
526				 * of ours, we pretend to
527				 * be the destination for this packet.
528				 */
529				if (IA_SIN(ia)->sin_addr.s_addr ==
530						 dst->sin_addr.s_addr)
531					break;
532			}
533			if (ia) {
534				/* tell ip_input "dont filter" */
535				ip_fw_fwd_addr = dst;
536				if (m->m_pkthdr.rcvif == NULL)
537					m->m_pkthdr.rcvif = ifunit("lo0");
538				ip->ip_len = htons((u_short)ip->ip_len);
539				ip->ip_off = htons((u_short)ip->ip_off);
540				ip->ip_sum = 0;
541				if (ip->ip_vhl == IP_VHL_BORING) {
542					ip->ip_sum = in_cksum_hdr(ip);
543				} else {
544					ip->ip_sum = in_cksum(m, hlen);
545				}
546				ip_input(m);
547				goto done;
548			}
549			/* Some of the logic for this was
550			 * nicked from above.
551			 *
552			 * This rewrites the cached route in a local PCB.
553			 * Is this what we want to do?
554			 */
555			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
556
557			ro_fwd->ro_rt = 0;
558			rtalloc_ign(ro_fwd, RTF_PRCLONING);
559
560			if (ro_fwd->ro_rt == 0) {
561				ipstat.ips_noroute++;
562				error = EHOSTUNREACH;
563				goto bad;
564			}
565
566			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
567			ifp = ro_fwd->ro_rt->rt_ifp;
568			ro_fwd->ro_rt->rt_use++;
569			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
570				dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
571			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
572				isbroadcast =
573				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
574			else
575				isbroadcast = in_broadcast(dst->sin_addr, ifp);
576			RTFREE(ro->ro_rt);
577			ro->ro_rt = ro_fwd->ro_rt;
578			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
579
580			/*
581			 * If we added a default src ip earlier,
582			 * which would have been gotten from the-then
583			 * interface, do it again, from the new one.
584			 */
585			if (fwd_rewrite_src)
586				ip->ip_src = IA_SIN(ia)->sin_addr;
587			goto pass ;
588		}
589#endif /* IPFIREWALL_FORWARD */
590                /*
591                 * if we get here, none of the above matches, and
592                 * we have to drop the pkt
593                 */
594		m_freem(m);
595                error = EACCES; /* not sure this is the right error msg */
596                goto done;
597	}
598
599pass:
600	/*
601	 * If small enough for interface, can just send directly.
602	 */
603	if ((u_short)ip->ip_len <= ifp->if_mtu) {
604		ip->ip_len = htons((u_short)ip->ip_len);
605		ip->ip_off = htons((u_short)ip->ip_off);
606		ip->ip_sum = 0;
607		if (ip->ip_vhl == IP_VHL_BORING) {
608			ip->ip_sum = in_cksum_hdr(ip);
609		} else {
610			ip->ip_sum = in_cksum(m, hlen);
611		}
612		error = (*ifp->if_output)(ifp, m,
613				(struct sockaddr *)dst, ro->ro_rt);
614		goto done;
615	}
616	/*
617	 * Too large for interface; fragment if possible.
618	 * Must be able to put at least 8 bytes per fragment.
619	 */
620	if (ip->ip_off & IP_DF) {
621		error = EMSGSIZE;
622		/*
623		 * This case can happen if the user changed the MTU
624		 * of an interface after enabling IP on it.  Because
625		 * most netifs don't keep track of routes pointing to
626		 * them, there is no way for one to update all its
627		 * routes when the MTU is changed.
628		 */
629		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
630		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
631		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
632			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
633		}
634		ipstat.ips_cantfrag++;
635		goto bad;
636	}
637	len = (ifp->if_mtu - hlen) &~ 7;
638	if (len < 8) {
639		error = EMSGSIZE;
640		goto bad;
641	}
642
643    {
644	int mhlen, firstlen = len;
645	struct mbuf **mnext = &m->m_nextpkt;
646
647	/*
648	 * Loop through length of segment after first fragment,
649	 * make new header and copy data of each part and link onto chain.
650	 */
651	m0 = m;
652	mhlen = sizeof (struct ip);
653	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
654		MGETHDR(m, M_DONTWAIT, MT_HEADER);
655		if (m == 0) {
656			error = ENOBUFS;
657			ipstat.ips_odropped++;
658			goto sendorfree;
659		}
660		m->m_flags |= (m0->m_flags & M_MCAST);
661		m->m_data += max_linkhdr;
662		mhip = mtod(m, struct ip *);
663		*mhip = *ip;
664		if (hlen > sizeof (struct ip)) {
665			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
666			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
667		}
668		m->m_len = mhlen;
669		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
670		if (ip->ip_off & IP_MF)
671			mhip->ip_off |= IP_MF;
672		if (off + len >= (u_short)ip->ip_len)
673			len = (u_short)ip->ip_len - off;
674		else
675			mhip->ip_off |= IP_MF;
676		mhip->ip_len = htons((u_short)(len + mhlen));
677		m->m_next = m_copy(m0, off, len);
678		if (m->m_next == 0) {
679			(void) m_free(m);
680			error = ENOBUFS;	/* ??? */
681			ipstat.ips_odropped++;
682			goto sendorfree;
683		}
684		m->m_pkthdr.len = mhlen + len;
685		m->m_pkthdr.rcvif = (struct ifnet *)0;
686		mhip->ip_off = htons((u_short)mhip->ip_off);
687		mhip->ip_sum = 0;
688		if (mhip->ip_vhl == IP_VHL_BORING) {
689			mhip->ip_sum = in_cksum_hdr(mhip);
690		} else {
691			mhip->ip_sum = in_cksum(m, mhlen);
692		}
693		*mnext = m;
694		mnext = &m->m_nextpkt;
695		ipstat.ips_ofragments++;
696	}
697	/*
698	 * Update first fragment by trimming what's been copied out
699	 * and updating header, then send each fragment (in order).
700	 */
701	m = m0;
702	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
703	m->m_pkthdr.len = hlen + firstlen;
704	ip->ip_len = htons((u_short)m->m_pkthdr.len);
705	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
706	ip->ip_sum = 0;
707	if (ip->ip_vhl == IP_VHL_BORING) {
708		ip->ip_sum = in_cksum_hdr(ip);
709	} else {
710		ip->ip_sum = in_cksum(m, hlen);
711	}
712sendorfree:
713	for (m = m0; m; m = m0) {
714		m0 = m->m_nextpkt;
715		m->m_nextpkt = 0;
716		if (error == 0)
717			error = (*ifp->if_output)(ifp, m,
718			    (struct sockaddr *)dst, ro->ro_rt);
719		else
720			m_freem(m);
721	}
722
723	if (error == 0)
724		ipstat.ips_fragmented++;
725    }
726done:
727	return (error);
728bad:
729	m_freem(m0);
730	goto done;
731}
732
733/*
734 * Insert IP options into preformed packet.
735 * Adjust IP destination as required for IP source routing,
736 * as indicated by a non-zero in_addr at the start of the options.
737 *
738 * XXX This routine assumes that the packet has no options in place.
739 */
740static struct mbuf *
741ip_insertoptions(m, opt, phlen)
742	register struct mbuf *m;
743	struct mbuf *opt;
744	int *phlen;
745{
746	register struct ipoption *p = mtod(opt, struct ipoption *);
747	struct mbuf *n;
748	register struct ip *ip = mtod(m, struct ip *);
749	unsigned optlen;
750
751	optlen = opt->m_len - sizeof(p->ipopt_dst);
752	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
753		return (m);		/* XXX should fail */
754	if (p->ipopt_dst.s_addr)
755		ip->ip_dst = p->ipopt_dst;
756	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
757		MGETHDR(n, M_DONTWAIT, MT_HEADER);
758		if (n == 0)
759			return (m);
760		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
761		m->m_len -= sizeof(struct ip);
762		m->m_data += sizeof(struct ip);
763		n->m_next = m;
764		m = n;
765		m->m_len = optlen + sizeof(struct ip);
766		m->m_data += max_linkhdr;
767		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
768	} else {
769		m->m_data -= optlen;
770		m->m_len += optlen;
771		m->m_pkthdr.len += optlen;
772		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
773	}
774	ip = mtod(m, struct ip *);
775	bcopy(p->ipopt_list, ip + 1, optlen);
776	*phlen = sizeof(struct ip) + optlen;
777	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
778	ip->ip_len += optlen;
779	return (m);
780}
781
782/*
783 * Copy options from ip to jp,
784 * omitting those not copied during fragmentation.
785 */
786#if !defined(IPFILTER) && !defined(IPFILTER_LKM)
787static
788#endif
789int
790ip_optcopy(ip, jp)
791	struct ip *ip, *jp;
792{
793	register u_char *cp, *dp;
794	int opt, optlen, cnt;
795
796	cp = (u_char *)(ip + 1);
797	dp = (u_char *)(jp + 1);
798	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
799	for (; cnt > 0; cnt -= optlen, cp += optlen) {
800		opt = cp[0];
801		if (opt == IPOPT_EOL)
802			break;
803		if (opt == IPOPT_NOP) {
804			/* Preserve for IP mcast tunnel's LSRR alignment. */
805			*dp++ = IPOPT_NOP;
806			optlen = 1;
807			continue;
808		} else
809			optlen = cp[IPOPT_OLEN];
810		/* bogus lengths should have been caught by ip_dooptions */
811		if (optlen > cnt)
812			optlen = cnt;
813		if (IPOPT_COPIED(opt)) {
814			bcopy(cp, dp, optlen);
815			dp += optlen;
816		}
817	}
818	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
819		*dp++ = IPOPT_EOL;
820	return (optlen);
821}
822
823/*
824 * IP socket option processing.
825 */
826int
827ip_ctloutput(so, sopt)
828	struct socket *so;
829	struct sockopt *sopt;
830{
831	struct	inpcb *inp = sotoinpcb(so);
832	int	error, optval;
833
834	error = optval = 0;
835	if (sopt->sopt_level != IPPROTO_IP) {
836		return (EINVAL);
837	}
838
839	switch (sopt->sopt_dir) {
840	case SOPT_SET:
841		switch (sopt->sopt_name) {
842		case IP_OPTIONS:
843#ifdef notyet
844		case IP_RETOPTS:
845#endif
846		{
847			struct mbuf *m;
848			if (sopt->sopt_valsize > MLEN) {
849				error = EMSGSIZE;
850				break;
851			}
852			MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
853			if (m == 0) {
854				error = ENOBUFS;
855				break;
856			}
857			m->m_len = sopt->sopt_valsize;
858			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
859					    m->m_len);
860
861			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
862					   m));
863		}
864
865		case IP_TOS:
866		case IP_TTL:
867		case IP_RECVOPTS:
868		case IP_RECVRETOPTS:
869		case IP_RECVDSTADDR:
870		case IP_RECVIF:
871			error = sooptcopyin(sopt, &optval, sizeof optval,
872					    sizeof optval);
873			if (error)
874				break;
875
876			switch (sopt->sopt_name) {
877			case IP_TOS:
878				inp->inp_ip_tos = optval;
879				break;
880
881			case IP_TTL:
882				inp->inp_ip_ttl = optval;
883				break;
884#define	OPTSET(bit) \
885	if (optval) \
886		inp->inp_flags |= bit; \
887	else \
888		inp->inp_flags &= ~bit;
889
890			case IP_RECVOPTS:
891				OPTSET(INP_RECVOPTS);
892				break;
893
894			case IP_RECVRETOPTS:
895				OPTSET(INP_RECVRETOPTS);
896				break;
897
898			case IP_RECVDSTADDR:
899				OPTSET(INP_RECVDSTADDR);
900				break;
901
902			case IP_RECVIF:
903				OPTSET(INP_RECVIF);
904				break;
905			}
906			break;
907#undef OPTSET
908
909		case IP_MULTICAST_IF:
910		case IP_MULTICAST_VIF:
911		case IP_MULTICAST_TTL:
912		case IP_MULTICAST_LOOP:
913		case IP_ADD_MEMBERSHIP:
914		case IP_DROP_MEMBERSHIP:
915			error = ip_setmoptions(sopt, &inp->inp_moptions);
916			break;
917
918		case IP_PORTRANGE:
919			error = sooptcopyin(sopt, &optval, sizeof optval,
920					    sizeof optval);
921			if (error)
922				break;
923
924			switch (optval) {
925			case IP_PORTRANGE_DEFAULT:
926				inp->inp_flags &= ~(INP_LOWPORT);
927				inp->inp_flags &= ~(INP_HIGHPORT);
928				break;
929
930			case IP_PORTRANGE_HIGH:
931				inp->inp_flags &= ~(INP_LOWPORT);
932				inp->inp_flags |= INP_HIGHPORT;
933				break;
934
935			case IP_PORTRANGE_LOW:
936				inp->inp_flags &= ~(INP_HIGHPORT);
937				inp->inp_flags |= INP_LOWPORT;
938				break;
939
940			default:
941				error = EINVAL;
942				break;
943			}
944			break;
945
946		default:
947			error = ENOPROTOOPT;
948			break;
949		}
950		break;
951
952	case SOPT_GET:
953		switch (sopt->sopt_name) {
954		case IP_OPTIONS:
955		case IP_RETOPTS:
956			if (inp->inp_options)
957				error = sooptcopyout(sopt,
958						     mtod(inp->inp_options,
959							  char *),
960						     inp->inp_options->m_len);
961			else
962				sopt->sopt_valsize = 0;
963			break;
964
965		case IP_TOS:
966		case IP_TTL:
967		case IP_RECVOPTS:
968		case IP_RECVRETOPTS:
969		case IP_RECVDSTADDR:
970		case IP_RECVIF:
971		case IP_PORTRANGE:
972			switch (sopt->sopt_name) {
973
974			case IP_TOS:
975				optval = inp->inp_ip_tos;
976				break;
977
978			case IP_TTL:
979				optval = inp->inp_ip_ttl;
980				break;
981
982#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
983
984			case IP_RECVOPTS:
985				optval = OPTBIT(INP_RECVOPTS);
986				break;
987
988			case IP_RECVRETOPTS:
989				optval = OPTBIT(INP_RECVRETOPTS);
990				break;
991
992			case IP_RECVDSTADDR:
993				optval = OPTBIT(INP_RECVDSTADDR);
994				break;
995
996			case IP_RECVIF:
997				optval = OPTBIT(INP_RECVIF);
998				break;
999
1000			case IP_PORTRANGE:
1001				if (inp->inp_flags & INP_HIGHPORT)
1002					optval = IP_PORTRANGE_HIGH;
1003				else if (inp->inp_flags & INP_LOWPORT)
1004					optval = IP_PORTRANGE_LOW;
1005				else
1006					optval = 0;
1007				break;
1008			}
1009			error = sooptcopyout(sopt, &optval, sizeof optval);
1010			break;
1011
1012		case IP_MULTICAST_IF:
1013		case IP_MULTICAST_VIF:
1014		case IP_MULTICAST_TTL:
1015		case IP_MULTICAST_LOOP:
1016		case IP_ADD_MEMBERSHIP:
1017		case IP_DROP_MEMBERSHIP:
1018			error = ip_getmoptions(sopt, inp->inp_moptions);
1019			break;
1020
1021		default:
1022			error = ENOPROTOOPT;
1023			break;
1024		}
1025		break;
1026	}
1027	return (error);
1028}
1029
1030/*
1031 * Set up IP options in pcb for insertion in output packets.
1032 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1033 * with destination address if source routed.
1034 */
1035static int
1036ip_pcbopts(optname, pcbopt, m)
1037	int optname;
1038	struct mbuf **pcbopt;
1039	register struct mbuf *m;
1040{
1041	register int cnt, optlen;
1042	register u_char *cp;
1043	u_char opt;
1044
1045	/* turn off any old options */
1046	if (*pcbopt)
1047		(void)m_free(*pcbopt);
1048	*pcbopt = 0;
1049	if (m == (struct mbuf *)0 || m->m_len == 0) {
1050		/*
1051		 * Only turning off any previous options.
1052		 */
1053		if (m)
1054			(void)m_free(m);
1055		return (0);
1056	}
1057
1058#ifndef	vax
1059	if (m->m_len % sizeof(int32_t))
1060		goto bad;
1061#endif
1062	/*
1063	 * IP first-hop destination address will be stored before
1064	 * actual options; move other options back
1065	 * and clear it when none present.
1066	 */
1067	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1068		goto bad;
1069	cnt = m->m_len;
1070	m->m_len += sizeof(struct in_addr);
1071	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1072	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1073	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1074
1075	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1076		opt = cp[IPOPT_OPTVAL];
1077		if (opt == IPOPT_EOL)
1078			break;
1079		if (opt == IPOPT_NOP)
1080			optlen = 1;
1081		else {
1082			optlen = cp[IPOPT_OLEN];
1083			if (optlen <= IPOPT_OLEN || optlen > cnt)
1084				goto bad;
1085		}
1086		switch (opt) {
1087
1088		default:
1089			break;
1090
1091		case IPOPT_LSRR:
1092		case IPOPT_SSRR:
1093			/*
1094			 * user process specifies route as:
1095			 *	->A->B->C->D
1096			 * D must be our final destination (but we can't
1097			 * check that since we may not have connected yet).
1098			 * A is first hop destination, which doesn't appear in
1099			 * actual IP option, but is stored before the options.
1100			 */
1101			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1102				goto bad;
1103			m->m_len -= sizeof(struct in_addr);
1104			cnt -= sizeof(struct in_addr);
1105			optlen -= sizeof(struct in_addr);
1106			cp[IPOPT_OLEN] = optlen;
1107			/*
1108			 * Move first hop before start of options.
1109			 */
1110			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1111			    sizeof(struct in_addr));
1112			/*
1113			 * Then copy rest of options back
1114			 * to close up the deleted entry.
1115			 */
1116			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1117			    sizeof(struct in_addr)),
1118			    (caddr_t)&cp[IPOPT_OFFSET+1],
1119			    (unsigned)cnt + sizeof(struct in_addr));
1120			break;
1121		}
1122	}
1123	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1124		goto bad;
1125	*pcbopt = m;
1126	return (0);
1127
1128bad:
1129	(void)m_free(m);
1130	return (EINVAL);
1131}
1132
1133/*
1134 * XXX
1135 * The whole multicast option thing needs to be re-thought.
1136 * Several of these options are equally applicable to non-multicast
1137 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1138 * standard option (IP_TTL).
1139 */
1140/*
1141 * Set the IP multicast options in response to user setsockopt().
1142 */
1143static int
1144ip_setmoptions(sopt, imop)
1145	struct sockopt *sopt;
1146	struct ip_moptions **imop;
1147{
1148	int error = 0;
1149	int i;
1150	struct in_addr addr;
1151	struct ip_mreq mreq;
1152	struct ifnet *ifp;
1153	struct ip_moptions *imo = *imop;
1154	struct route ro;
1155	struct sockaddr_in *dst;
1156	int s;
1157
1158	if (imo == NULL) {
1159		/*
1160		 * No multicast option buffer attached to the pcb;
1161		 * allocate one and initialize to default values.
1162		 */
1163		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1164		    M_WAITOK);
1165
1166		if (imo == NULL)
1167			return (ENOBUFS);
1168		*imop = imo;
1169		imo->imo_multicast_ifp = NULL;
1170		imo->imo_multicast_vif = -1;
1171		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1172		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1173		imo->imo_num_memberships = 0;
1174	}
1175
1176	switch (sopt->sopt_name) {
1177	/* store an index number for the vif you wanna use in the send */
1178	case IP_MULTICAST_VIF:
1179		if (legal_vif_num == 0) {
1180			error = EOPNOTSUPP;
1181			break;
1182		}
1183		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1184		if (error)
1185			break;
1186		if (!legal_vif_num(i) && (i != -1)) {
1187			error = EINVAL;
1188			break;
1189		}
1190		imo->imo_multicast_vif = i;
1191		break;
1192
1193	case IP_MULTICAST_IF:
1194		/*
1195		 * Select the interface for outgoing multicast packets.
1196		 */
1197		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1198		if (error)
1199			break;
1200		/*
1201		 * INADDR_ANY is used to remove a previous selection.
1202		 * When no interface is selected, a default one is
1203		 * chosen every time a multicast packet is sent.
1204		 */
1205		if (addr.s_addr == INADDR_ANY) {
1206			imo->imo_multicast_ifp = NULL;
1207			break;
1208		}
1209		/*
1210		 * The selected interface is identified by its local
1211		 * IP address.  Find the interface and confirm that
1212		 * it supports multicasting.
1213		 */
1214		s = splimp();
1215		INADDR_TO_IFP(addr, ifp);
1216		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1217			splx(s);
1218			error = EADDRNOTAVAIL;
1219			break;
1220		}
1221		imo->imo_multicast_ifp = ifp;
1222		splx(s);
1223		break;
1224
1225	case IP_MULTICAST_TTL:
1226		/*
1227		 * Set the IP time-to-live for outgoing multicast packets.
1228		 * The original multicast API required a char argument,
1229		 * which is inconsistent with the rest of the socket API.
1230		 * We allow either a char or an int.
1231		 */
1232		if (sopt->sopt_valsize == 1) {
1233			u_char ttl;
1234			error = sooptcopyin(sopt, &ttl, 1, 1);
1235			if (error)
1236				break;
1237			imo->imo_multicast_ttl = ttl;
1238		} else {
1239			u_int ttl;
1240			error = sooptcopyin(sopt, &ttl, sizeof ttl,
1241					    sizeof ttl);
1242			if (error)
1243				break;
1244			if (ttl > 255)
1245				error = EINVAL;
1246			else
1247				imo->imo_multicast_ttl = ttl;
1248		}
1249		break;
1250
1251	case IP_MULTICAST_LOOP:
1252		/*
1253		 * Set the loopback flag for outgoing multicast packets.
1254		 * Must be zero or one.  The original multicast API required a
1255		 * char argument, which is inconsistent with the rest
1256		 * of the socket API.  We allow either a char or an int.
1257		 */
1258		if (sopt->sopt_valsize == 1) {
1259			u_char loop;
1260			error = sooptcopyin(sopt, &loop, 1, 1);
1261			if (error)
1262				break;
1263			imo->imo_multicast_loop = !!loop;
1264		} else {
1265			u_int loop;
1266			error = sooptcopyin(sopt, &loop, sizeof loop,
1267					    sizeof loop);
1268			if (error)
1269				break;
1270			imo->imo_multicast_loop = !!loop;
1271		}
1272		break;
1273
1274	case IP_ADD_MEMBERSHIP:
1275		/*
1276		 * Add a multicast group membership.
1277		 * Group must be a valid IP multicast address.
1278		 */
1279		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1280		if (error)
1281			break;
1282
1283		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1284			error = EINVAL;
1285			break;
1286		}
1287		s = splimp();
1288		/*
1289		 * If no interface address was provided, use the interface of
1290		 * the route to the given multicast address.
1291		 */
1292		if (mreq.imr_interface.s_addr == INADDR_ANY) {
1293			bzero((caddr_t)&ro, sizeof(ro));
1294			dst = (struct sockaddr_in *)&ro.ro_dst;
1295			dst->sin_len = sizeof(*dst);
1296			dst->sin_family = AF_INET;
1297			dst->sin_addr = mreq.imr_multiaddr;
1298			rtalloc(&ro);
1299			if (ro.ro_rt == NULL) {
1300				error = EADDRNOTAVAIL;
1301				splx(s);
1302				break;
1303			}
1304			ifp = ro.ro_rt->rt_ifp;
1305			rtfree(ro.ro_rt);
1306		}
1307		else {
1308			INADDR_TO_IFP(mreq.imr_interface, ifp);
1309		}
1310
1311		/*
1312		 * See if we found an interface, and confirm that it
1313		 * supports multicast.
1314		 */
1315		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1316			error = EADDRNOTAVAIL;
1317			splx(s);
1318			break;
1319		}
1320		/*
1321		 * See if the membership already exists or if all the
1322		 * membership slots are full.
1323		 */
1324		for (i = 0; i < imo->imo_num_memberships; ++i) {
1325			if (imo->imo_membership[i]->inm_ifp == ifp &&
1326			    imo->imo_membership[i]->inm_addr.s_addr
1327						== mreq.imr_multiaddr.s_addr)
1328				break;
1329		}
1330		if (i < imo->imo_num_memberships) {
1331			error = EADDRINUSE;
1332			splx(s);
1333			break;
1334		}
1335		if (i == IP_MAX_MEMBERSHIPS) {
1336			error = ETOOMANYREFS;
1337			splx(s);
1338			break;
1339		}
1340		/*
1341		 * Everything looks good; add a new record to the multicast
1342		 * address list for the given interface.
1343		 */
1344		if ((imo->imo_membership[i] =
1345		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1346			error = ENOBUFS;
1347			splx(s);
1348			break;
1349		}
1350		++imo->imo_num_memberships;
1351		splx(s);
1352		break;
1353
1354	case IP_DROP_MEMBERSHIP:
1355		/*
1356		 * Drop a multicast group membership.
1357		 * Group must be a valid IP multicast address.
1358		 */
1359		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1360		if (error)
1361			break;
1362
1363		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1364			error = EINVAL;
1365			break;
1366		}
1367
1368		s = splimp();
1369		/*
1370		 * If an interface address was specified, get a pointer
1371		 * to its ifnet structure.
1372		 */
1373		if (mreq.imr_interface.s_addr == INADDR_ANY)
1374			ifp = NULL;
1375		else {
1376			INADDR_TO_IFP(mreq.imr_interface, ifp);
1377			if (ifp == NULL) {
1378				error = EADDRNOTAVAIL;
1379				splx(s);
1380				break;
1381			}
1382		}
1383		/*
1384		 * Find the membership in the membership array.
1385		 */
1386		for (i = 0; i < imo->imo_num_memberships; ++i) {
1387			if ((ifp == NULL ||
1388			     imo->imo_membership[i]->inm_ifp == ifp) &&
1389			     imo->imo_membership[i]->inm_addr.s_addr ==
1390			     mreq.imr_multiaddr.s_addr)
1391				break;
1392		}
1393		if (i == imo->imo_num_memberships) {
1394			error = EADDRNOTAVAIL;
1395			splx(s);
1396			break;
1397		}
1398		/*
1399		 * Give up the multicast address record to which the
1400		 * membership points.
1401		 */
1402		in_delmulti(imo->imo_membership[i]);
1403		/*
1404		 * Remove the gap in the membership array.
1405		 */
1406		for (++i; i < imo->imo_num_memberships; ++i)
1407			imo->imo_membership[i-1] = imo->imo_membership[i];
1408		--imo->imo_num_memberships;
1409		splx(s);
1410		break;
1411
1412	default:
1413		error = EOPNOTSUPP;
1414		break;
1415	}
1416
1417	/*
1418	 * If all options have default values, no need to keep the mbuf.
1419	 */
1420	if (imo->imo_multicast_ifp == NULL &&
1421	    imo->imo_multicast_vif == -1 &&
1422	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1423	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1424	    imo->imo_num_memberships == 0) {
1425		free(*imop, M_IPMOPTS);
1426		*imop = NULL;
1427	}
1428
1429	return (error);
1430}
1431
1432/*
1433 * Return the IP multicast options in response to user getsockopt().
1434 */
1435static int
1436ip_getmoptions(sopt, imo)
1437	struct sockopt *sopt;
1438	register struct ip_moptions *imo;
1439{
1440	struct in_addr addr;
1441	struct in_ifaddr *ia;
1442	int error, optval;
1443	u_char coptval;
1444
1445	error = 0;
1446	switch (sopt->sopt_name) {
1447	case IP_MULTICAST_VIF:
1448		if (imo != NULL)
1449			optval = imo->imo_multicast_vif;
1450		else
1451			optval = -1;
1452		error = sooptcopyout(sopt, &optval, sizeof optval);
1453		break;
1454
1455	case IP_MULTICAST_IF:
1456		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1457			addr.s_addr = INADDR_ANY;
1458		else {
1459			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1460			addr.s_addr = (ia == NULL) ? INADDR_ANY
1461				: IA_SIN(ia)->sin_addr.s_addr;
1462		}
1463		error = sooptcopyout(sopt, &addr, sizeof addr);
1464		break;
1465
1466	case IP_MULTICAST_TTL:
1467		if (imo == 0)
1468			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1469		else
1470			optval = coptval = imo->imo_multicast_ttl;
1471		if (sopt->sopt_valsize == 1)
1472			error = sooptcopyout(sopt, &coptval, 1);
1473		else
1474			error = sooptcopyout(sopt, &optval, sizeof optval);
1475		break;
1476
1477	case IP_MULTICAST_LOOP:
1478		if (imo == 0)
1479			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1480		else
1481			optval = coptval = imo->imo_multicast_loop;
1482		if (sopt->sopt_valsize == 1)
1483			error = sooptcopyout(sopt, &coptval, 1);
1484		else
1485			error = sooptcopyout(sopt, &optval, sizeof optval);
1486		break;
1487
1488	default:
1489		error = ENOPROTOOPT;
1490		break;
1491	}
1492	return (error);
1493}
1494
1495/*
1496 * Discard the IP multicast options.
1497 */
1498void
1499ip_freemoptions(imo)
1500	register struct ip_moptions *imo;
1501{
1502	register int i;
1503
1504	if (imo != NULL) {
1505		for (i = 0; i < imo->imo_num_memberships; ++i)
1506			in_delmulti(imo->imo_membership[i]);
1507		free(imo, M_IPMOPTS);
1508	}
1509}
1510
1511/*
1512 * Routine called from ip_output() to loop back a copy of an IP multicast
1513 * packet to the input queue of a specified interface.  Note that this
1514 * calls the output routine of the loopback "driver", but with an interface
1515 * pointer that might NOT be a loopback interface -- evil, but easier than
1516 * replicating that code here.
1517 */
1518static void
1519ip_mloopback(ifp, m, dst, hlen)
1520	struct ifnet *ifp;
1521	register struct mbuf *m;
1522	register struct sockaddr_in *dst;
1523	int hlen;
1524{
1525	register struct ip *ip;
1526	struct mbuf *copym;
1527
1528	copym = m_copy(m, 0, M_COPYALL);
1529	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1530		copym = m_pullup(copym, hlen);
1531	if (copym != NULL) {
1532		/*
1533		 * We don't bother to fragment if the IP length is greater
1534		 * than the interface's MTU.  Can this possibly matter?
1535		 */
1536		ip = mtod(copym, struct ip *);
1537		ip->ip_len = htons((u_short)ip->ip_len);
1538		ip->ip_off = htons((u_short)ip->ip_off);
1539		ip->ip_sum = 0;
1540		if (ip->ip_vhl == IP_VHL_BORING) {
1541			ip->ip_sum = in_cksum_hdr(ip);
1542		} else {
1543			ip->ip_sum = in_cksum(copym, hlen);
1544		}
1545		/*
1546		 * NB:
1547		 * It's not clear whether there are any lingering
1548		 * reentrancy problems in other areas which might
1549		 * be exposed by using ip_input directly (in
1550		 * particular, everything which modifies the packet
1551		 * in-place).  Yet another option is using the
1552		 * protosw directly to deliver the looped back
1553		 * packet.  For the moment, we'll err on the side
1554		 * of safety by using if_simloop().
1555		 */
1556#if 1 /* XXX */
1557		if (dst->sin_family != AF_INET) {
1558			printf("ip_mloopback: bad address family %d\n",
1559						dst->sin_family);
1560			dst->sin_family = AF_INET;
1561		}
1562#endif
1563
1564#ifdef notdef
1565		copym->m_pkthdr.rcvif = ifp;
1566		ip_input(copym);
1567#else
1568		if_simloop(ifp, copym, (struct sockaddr *)dst, 0);
1569#endif
1570	}
1571}
1572