ip_output.c revision 36369
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 *	$Id: ip_output.c,v 1.66 1998/03/21 11:34:20 peter Exp $
35 */
36
37#define _IP_VHL
38
39#include "opt_ipfw.h"
40#include "opt_ipdivert.h"
41#include "opt_ipfilter.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/protosw.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50
51#include <net/if.h>
52#include <net/route.h>
53
54#include <netinet/in.h>
55#include <netinet/in_systm.h>
56#include <netinet/ip.h>
57#include <netinet/in_pcb.h>
58#include <netinet/in_var.h>
59#include <netinet/ip_var.h>
60
61#ifdef vax
62#include <machine/mtpr.h>
63#endif
64#include <machine/in_cksum.h>
65
66static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
67
68#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
69#undef COMPAT_IPFW
70#define COMPAT_IPFW 1
71#else
72#undef COMPAT_IPFW
73#endif
74
75u_short ip_id;
76
77static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
78static void	ip_mloopback
79	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
80static int	ip_getmoptions
81	__P((int, struct ip_moptions *, struct mbuf **));
82static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
83static int	ip_setmoptions
84	__P((int, struct ip_moptions **, struct mbuf *));
85
86#if defined(IPFILTER_LKM) || defined(IPFILTER)
87int	ip_optcopy __P((struct ip *, struct ip *));
88extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
89#else
90static int	ip_optcopy __P((struct ip *, struct ip *));
91#endif
92
93
94extern	struct protosw inetsw[];
95
96/*
97 * IP output.  The packet in mbuf chain m contains a skeletal IP
98 * header (with len, off, ttl, proto, tos, src, dst).
99 * The mbuf chain containing the packet will be freed.
100 * The mbuf opt, if present, will not be freed.
101 */
102int
103ip_output(m0, opt, ro, flags, imo)
104	struct mbuf *m0;
105	struct mbuf *opt;
106	struct route *ro;
107	int flags;
108	struct ip_moptions *imo;
109{
110	struct ip *ip, *mhip;
111	struct ifnet *ifp;
112	struct mbuf *m = m0;
113	int hlen = sizeof (struct ip);
114	int len, off, error = 0;
115	struct sockaddr_in *dst;
116	struct in_ifaddr *ia;
117	int isbroadcast;
118
119#ifdef	DIAGNOSTIC
120	if ((m->m_flags & M_PKTHDR) == 0)
121		panic("ip_output no HDR");
122	if (!ro)
123		panic("ip_output no route, proto = %d",
124		      mtod(m, struct ip *)->ip_p);
125#endif
126	if (opt) {
127		m = ip_insertoptions(m, opt, &len);
128		hlen = len;
129	}
130	ip = mtod(m, struct ip *);
131	/*
132	 * Fill in IP header.
133	 */
134	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
135		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
136		ip->ip_off &= IP_DF;
137		ip->ip_id = htons(ip_id++);
138		ipstat.ips_localout++;
139	} else {
140		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
141	}
142
143	dst = (struct sockaddr_in *)&ro->ro_dst;
144	/*
145	 * If there is a cached route,
146	 * check that it is to the same destination
147	 * and is still up.  If not, free it and try again.
148	 */
149	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
150	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
151		RTFREE(ro->ro_rt);
152		ro->ro_rt = (struct rtentry *)0;
153	}
154	if (ro->ro_rt == 0) {
155		dst->sin_family = AF_INET;
156		dst->sin_len = sizeof(*dst);
157		dst->sin_addr = ip->ip_dst;
158	}
159	/*
160	 * If routing to interface only,
161	 * short circuit routing lookup.
162	 */
163#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
164#define sintosa(sin)	((struct sockaddr *)(sin))
165	if (flags & IP_ROUTETOIF) {
166		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
167		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
168			ipstat.ips_noroute++;
169			error = ENETUNREACH;
170			goto bad;
171		}
172		ifp = ia->ia_ifp;
173		ip->ip_ttl = 1;
174		isbroadcast = in_broadcast(dst->sin_addr, ifp);
175	} else {
176		/*
177		 * If this is the case, we probably don't want to allocate
178		 * a protocol-cloned route since we didn't get one from the
179		 * ULP.  This lets TCP do its thing, while not burdening
180		 * forwarding or ICMP with the overhead of cloning a route.
181		 * Of course, we still want to do any cloning requested by
182		 * the link layer, as this is probably required in all cases
183		 * for correct operation (as it is for ARP).
184		 */
185		if (ro->ro_rt == 0)
186			rtalloc_ign(ro, RTF_PRCLONING);
187		if (ro->ro_rt == 0) {
188			ipstat.ips_noroute++;
189			error = EHOSTUNREACH;
190			goto bad;
191		}
192		ia = ifatoia(ro->ro_rt->rt_ifa);
193		ifp = ro->ro_rt->rt_ifp;
194		ro->ro_rt->rt_use++;
195		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
196			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
197		if (ro->ro_rt->rt_flags & RTF_HOST)
198			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
199		else
200			isbroadcast = in_broadcast(dst->sin_addr, ifp);
201	}
202	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
203		struct in_multi *inm;
204
205		m->m_flags |= M_MCAST;
206		/*
207		 * IP destination address is multicast.  Make sure "dst"
208		 * still points to the address in "ro".  (It may have been
209		 * changed to point to a gateway address, above.)
210		 */
211		dst = (struct sockaddr_in *)&ro->ro_dst;
212		/*
213		 * See if the caller provided any multicast options
214		 */
215		if (imo != NULL) {
216			ip->ip_ttl = imo->imo_multicast_ttl;
217			if (imo->imo_multicast_ifp != NULL)
218				ifp = imo->imo_multicast_ifp;
219			if (imo->imo_multicast_vif != -1)
220				ip->ip_src.s_addr =
221				    ip_mcast_src(imo->imo_multicast_vif);
222		} else
223			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
224		/*
225		 * Confirm that the outgoing interface supports multicast.
226		 */
227		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
228			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
229				ipstat.ips_noroute++;
230				error = ENETUNREACH;
231				goto bad;
232			}
233		}
234		/*
235		 * If source address not specified yet, use address
236		 * of outgoing interface.
237		 */
238		if (ip->ip_src.s_addr == INADDR_ANY) {
239			register struct in_ifaddr *ia1;
240
241			for (ia1 = in_ifaddrhead.tqh_first; ia1;
242			     ia1 = ia1->ia_link.tqe_next)
243				if (ia1->ia_ifp == ifp) {
244					ip->ip_src = IA_SIN(ia1)->sin_addr;
245					break;
246				}
247		}
248
249		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
250		if (inm != NULL &&
251		   (imo == NULL || imo->imo_multicast_loop)) {
252			/*
253			 * If we belong to the destination multicast group
254			 * on the outgoing interface, and the caller did not
255			 * forbid loopback, loop back a copy.
256			 */
257			ip_mloopback(ifp, m, dst, hlen);
258		}
259		else {
260			/*
261			 * If we are acting as a multicast router, perform
262			 * multicast forwarding as if the packet had just
263			 * arrived on the interface to which we are about
264			 * to send.  The multicast forwarding function
265			 * recursively calls this function, using the
266			 * IP_FORWARDING flag to prevent infinite recursion.
267			 *
268			 * Multicasts that are looped back by ip_mloopback(),
269			 * above, will be forwarded by the ip_input() routine,
270			 * if necessary.
271			 */
272			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
273				/*
274				 * Check if rsvp daemon is running. If not, don't
275				 * set ip_moptions. This ensures that the packet
276				 * is multicast and not just sent down one link
277				 * as prescribed by rsvpd.
278				 */
279				if (!rsvp_on)
280				  imo = NULL;
281				if (ip_mforward(ip, ifp, m, imo) != 0) {
282					m_freem(m);
283					goto done;
284				}
285			}
286		}
287
288		/*
289		 * Multicasts with a time-to-live of zero may be looped-
290		 * back, above, but must not be transmitted on a network.
291		 * Also, multicasts addressed to the loopback interface
292		 * are not sent -- the above call to ip_mloopback() will
293		 * loop back a copy if this host actually belongs to the
294		 * destination group on the loopback interface.
295		 */
296		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
297			m_freem(m);
298			goto done;
299		}
300
301		goto sendit;
302	}
303#ifndef notdef
304	/*
305	 * If source address not specified yet, use address
306	 * of outgoing interface.
307	 */
308	if (ip->ip_src.s_addr == INADDR_ANY)
309		ip->ip_src = IA_SIN(ia)->sin_addr;
310#endif
311	/*
312	 * Verify that we have any chance at all of being able to queue
313	 *      the packet or packet fragments
314	 */
315	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
316		ifp->if_snd.ifq_maxlen) {
317			error = ENOBUFS;
318			goto bad;
319	}
320
321	/*
322	 * Look for broadcast address and
323	 * and verify user is allowed to send
324	 * such a packet.
325	 */
326	if (isbroadcast) {
327		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
328			error = EADDRNOTAVAIL;
329			goto bad;
330		}
331		if ((flags & IP_ALLOWBROADCAST) == 0) {
332			error = EACCES;
333			goto bad;
334		}
335		/* don't allow broadcast messages to be fragmented */
336		if ((u_short)ip->ip_len > ifp->if_mtu) {
337			error = EMSGSIZE;
338			goto bad;
339		}
340		m->m_flags |= M_BCAST;
341	} else {
342		m->m_flags &= ~M_BCAST;
343	}
344
345sendit:
346	/*
347	 * IpHack's section.
348	 * - Xlate: translate packet's addr/port (NAT).
349	 * - Firewall: deny/allow/etc.
350	 * - Wrap: fake packet's addr/port <unimpl.>
351	 * - Encapsulate: put it in another IP and send out. <unimp.>
352	 */
353#if defined(IPFILTER) || defined(IPFILTER_LKM)
354	if (fr_checkp) {
355		struct  mbuf    *m1 = m;
356
357		if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
358			goto done;
359		ip = mtod(m = m1, struct ip *);
360	}
361#endif
362
363#ifdef COMPAT_IPFW
364        if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
365		error = EACCES;
366		goto done;
367	}
368
369	/*
370	 * Check with the firewall...
371	 */
372	if (ip_fw_chk_ptr) {
373#ifdef IPDIVERT
374#ifndef IPFW_DIVERT_RESTART
375		ip_divert_port = (*ip_fw_chk_ptr)(&ip,
376		    hlen, ifp, ip_divert_ignore, &m);
377		ip_divert_ignore = 0;
378#else
379		ip_divert_in_cookie = 0;
380		ip_divert_port = (*ip_fw_chk_ptr)(&ip,
381		    hlen, ifp, ip_divert_out_cookie, &m);
382		ip_divert_out_cookie = 0;
383#endif /* IPFW_DIVERT_RESTART */
384		if (ip_divert_port) {		/* Divert packet */
385			(*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, 0);
386			goto done;
387		}
388#else
389		/* If ipfw says divert, we have to just drop packet */
390		if ((*ip_fw_chk_ptr)(&ip, hlen, ifp, 0, &m)) {
391			m_freem(m);
392			goto done;
393		}
394#endif
395		if (!m) {
396			error = EACCES;
397			goto done;
398		}
399	}
400#endif /* COMPAT_IPFW */
401
402	/*
403	 * If small enough for interface, can just send directly.
404	 */
405	if ((u_short)ip->ip_len <= ifp->if_mtu) {
406		ip->ip_len = htons((u_short)ip->ip_len);
407		ip->ip_off = htons((u_short)ip->ip_off);
408		ip->ip_sum = 0;
409		if (ip->ip_vhl == IP_VHL_BORING) {
410			ip->ip_sum = in_cksum_hdr(ip);
411		} else {
412			ip->ip_sum = in_cksum(m, hlen);
413		}
414		error = (*ifp->if_output)(ifp, m,
415				(struct sockaddr *)dst, ro->ro_rt);
416		goto done;
417	}
418	/*
419	 * Too large for interface; fragment if possible.
420	 * Must be able to put at least 8 bytes per fragment.
421	 */
422	if (ip->ip_off & IP_DF) {
423		error = EMSGSIZE;
424		/*
425		 * This case can happen if the user changed the MTU
426		 * of an interface after enabling IP on it.  Because
427		 * most netifs don't keep track of routes pointing to
428		 * them, there is no way for one to update all its
429		 * routes when the MTU is changed.
430		 */
431		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
432		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
433		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
434			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
435		}
436		ipstat.ips_cantfrag++;
437		goto bad;
438	}
439	len = (ifp->if_mtu - hlen) &~ 7;
440	if (len < 8) {
441		error = EMSGSIZE;
442		goto bad;
443	}
444
445    {
446	int mhlen, firstlen = len;
447	struct mbuf **mnext = &m->m_nextpkt;
448
449	/*
450	 * Loop through length of segment after first fragment,
451	 * make new header and copy data of each part and link onto chain.
452	 */
453	m0 = m;
454	mhlen = sizeof (struct ip);
455	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
456		MGETHDR(m, M_DONTWAIT, MT_HEADER);
457		if (m == 0) {
458			error = ENOBUFS;
459			ipstat.ips_odropped++;
460			goto sendorfree;
461		}
462		m->m_data += max_linkhdr;
463		mhip = mtod(m, struct ip *);
464		*mhip = *ip;
465		if (hlen > sizeof (struct ip)) {
466			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
467			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
468		}
469		m->m_len = mhlen;
470		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
471		if (ip->ip_off & IP_MF)
472			mhip->ip_off |= IP_MF;
473		if (off + len >= (u_short)ip->ip_len)
474			len = (u_short)ip->ip_len - off;
475		else
476			mhip->ip_off |= IP_MF;
477		mhip->ip_len = htons((u_short)(len + mhlen));
478		m->m_next = m_copy(m0, off, len);
479		if (m->m_next == 0) {
480			(void) m_free(m);
481			error = ENOBUFS;	/* ??? */
482			ipstat.ips_odropped++;
483			goto sendorfree;
484		}
485		m->m_pkthdr.len = mhlen + len;
486		m->m_pkthdr.rcvif = (struct ifnet *)0;
487		mhip->ip_off = htons((u_short)mhip->ip_off);
488		mhip->ip_sum = 0;
489		if (mhip->ip_vhl == IP_VHL_BORING) {
490			mhip->ip_sum = in_cksum_hdr(mhip);
491		} else {
492			mhip->ip_sum = in_cksum(m, mhlen);
493		}
494		*mnext = m;
495		mnext = &m->m_nextpkt;
496		ipstat.ips_ofragments++;
497	}
498	/*
499	 * Update first fragment by trimming what's been copied out
500	 * and updating header, then send each fragment (in order).
501	 */
502	m = m0;
503	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
504	m->m_pkthdr.len = hlen + firstlen;
505	ip->ip_len = htons((u_short)m->m_pkthdr.len);
506	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
507	ip->ip_sum = 0;
508	if (ip->ip_vhl == IP_VHL_BORING) {
509		ip->ip_sum = in_cksum_hdr(ip);
510	} else {
511		ip->ip_sum = in_cksum(m, hlen);
512	}
513sendorfree:
514	for (m = m0; m; m = m0) {
515		m0 = m->m_nextpkt;
516		m->m_nextpkt = 0;
517		if (error == 0)
518			error = (*ifp->if_output)(ifp, m,
519			    (struct sockaddr *)dst, ro->ro_rt);
520		else
521			m_freem(m);
522	}
523
524	if (error == 0)
525		ipstat.ips_fragmented++;
526    }
527done:
528	return (error);
529bad:
530	m_freem(m0);
531	goto done;
532}
533
534/*
535 * Insert IP options into preformed packet.
536 * Adjust IP destination as required for IP source routing,
537 * as indicated by a non-zero in_addr at the start of the options.
538 *
539 * XXX This routine assumes that the packet has no options in place.
540 */
541static struct mbuf *
542ip_insertoptions(m, opt, phlen)
543	register struct mbuf *m;
544	struct mbuf *opt;
545	int *phlen;
546{
547	register struct ipoption *p = mtod(opt, struct ipoption *);
548	struct mbuf *n;
549	register struct ip *ip = mtod(m, struct ip *);
550	unsigned optlen;
551
552	optlen = opt->m_len - sizeof(p->ipopt_dst);
553	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
554		return (m);		/* XXX should fail */
555	if (p->ipopt_dst.s_addr)
556		ip->ip_dst = p->ipopt_dst;
557	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
558		MGETHDR(n, M_DONTWAIT, MT_HEADER);
559		if (n == 0)
560			return (m);
561		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
562		m->m_len -= sizeof(struct ip);
563		m->m_data += sizeof(struct ip);
564		n->m_next = m;
565		m = n;
566		m->m_len = optlen + sizeof(struct ip);
567		m->m_data += max_linkhdr;
568		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
569	} else {
570		m->m_data -= optlen;
571		m->m_len += optlen;
572		m->m_pkthdr.len += optlen;
573		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
574	}
575	ip = mtod(m, struct ip *);
576	bcopy(p->ipopt_list, ip + 1, optlen);
577	*phlen = sizeof(struct ip) + optlen;
578	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
579	ip->ip_len += optlen;
580	return (m);
581}
582
583/*
584 * Copy options from ip to jp,
585 * omitting those not copied during fragmentation.
586 */
587#if !defined(IPFILTER) && !defined(IPFILTER_LKM)
588static
589#endif
590int
591ip_optcopy(ip, jp)
592	struct ip *ip, *jp;
593{
594	register u_char *cp, *dp;
595	int opt, optlen, cnt;
596
597	cp = (u_char *)(ip + 1);
598	dp = (u_char *)(jp + 1);
599	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
600	for (; cnt > 0; cnt -= optlen, cp += optlen) {
601		opt = cp[0];
602		if (opt == IPOPT_EOL)
603			break;
604		if (opt == IPOPT_NOP) {
605			/* Preserve for IP mcast tunnel's LSRR alignment. */
606			*dp++ = IPOPT_NOP;
607			optlen = 1;
608			continue;
609		} else
610			optlen = cp[IPOPT_OLEN];
611		/* bogus lengths should have been caught by ip_dooptions */
612		if (optlen > cnt)
613			optlen = cnt;
614		if (IPOPT_COPIED(opt)) {
615			bcopy(cp, dp, optlen);
616			dp += optlen;
617		}
618	}
619	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
620		*dp++ = IPOPT_EOL;
621	return (optlen);
622}
623
624/*
625 * IP socket option processing.
626 */
627int
628ip_ctloutput(op, so, level, optname, mp, p)
629	int op;
630	struct socket *so;
631	int level, optname;
632	struct mbuf **mp;
633	struct proc *p;
634{
635	register struct inpcb *inp = sotoinpcb(so);
636	register struct mbuf *m = *mp;
637	register int optval = 0;
638	int error = 0;
639
640	if (level != IPPROTO_IP) {
641		error = EINVAL;
642		if (op == PRCO_SETOPT && *mp)
643			(void) m_free(*mp);
644	} else switch (op) {
645
646	case PRCO_SETOPT:
647		switch (optname) {
648		case IP_OPTIONS:
649#ifdef notyet
650		case IP_RETOPTS:
651			return (ip_pcbopts(optname, &inp->inp_options, m));
652#else
653			return (ip_pcbopts(&inp->inp_options, m));
654#endif
655
656		case IP_TOS:
657		case IP_TTL:
658		case IP_RECVOPTS:
659		case IP_RECVRETOPTS:
660		case IP_RECVDSTADDR:
661		case IP_RECVIF:
662			if (m == 0 || m->m_len != sizeof(int))
663				error = EINVAL;
664			else {
665				optval = *mtod(m, int *);
666				switch (optname) {
667
668				case IP_TOS:
669					inp->inp_ip_tos = optval;
670					break;
671
672				case IP_TTL:
673					inp->inp_ip_ttl = optval;
674					break;
675#define	OPTSET(bit) \
676	if (optval) \
677		inp->inp_flags |= bit; \
678	else \
679		inp->inp_flags &= ~bit;
680
681				case IP_RECVOPTS:
682					OPTSET(INP_RECVOPTS);
683					break;
684
685				case IP_RECVRETOPTS:
686					OPTSET(INP_RECVRETOPTS);
687					break;
688
689				case IP_RECVDSTADDR:
690					OPTSET(INP_RECVDSTADDR);
691					break;
692
693				case IP_RECVIF:
694					OPTSET(INP_RECVIF);
695					break;
696				}
697			}
698			break;
699#undef OPTSET
700
701		case IP_MULTICAST_IF:
702		case IP_MULTICAST_VIF:
703		case IP_MULTICAST_TTL:
704		case IP_MULTICAST_LOOP:
705		case IP_ADD_MEMBERSHIP:
706		case IP_DROP_MEMBERSHIP:
707			error = ip_setmoptions(optname, &inp->inp_moptions, m);
708			break;
709
710		case IP_PORTRANGE:
711			if (m == 0 || m->m_len != sizeof(int))
712				error = EINVAL;
713			else {
714				optval = *mtod(m, int *);
715
716				switch (optval) {
717
718				case IP_PORTRANGE_DEFAULT:
719					inp->inp_flags &= ~(INP_LOWPORT);
720					inp->inp_flags &= ~(INP_HIGHPORT);
721					break;
722
723				case IP_PORTRANGE_HIGH:
724					inp->inp_flags &= ~(INP_LOWPORT);
725					inp->inp_flags |= INP_HIGHPORT;
726					break;
727
728				case IP_PORTRANGE_LOW:
729					inp->inp_flags &= ~(INP_HIGHPORT);
730					inp->inp_flags |= INP_LOWPORT;
731					break;
732
733				default:
734					error = EINVAL;
735					break;
736				}
737			}
738			break;
739
740		default:
741			error = ENOPROTOOPT;
742			break;
743		}
744		if (m)
745			(void)m_free(m);
746		break;
747
748	case PRCO_GETOPT:
749		switch (optname) {
750		case IP_OPTIONS:
751		case IP_RETOPTS:
752			*mp = m = m_get(M_WAIT, MT_SOOPTS);
753			if (inp->inp_options) {
754				m->m_len = inp->inp_options->m_len;
755				bcopy(mtod(inp->inp_options, void *),
756				    mtod(m, void *), m->m_len);
757			} else
758				m->m_len = 0;
759			break;
760
761		case IP_TOS:
762		case IP_TTL:
763		case IP_RECVOPTS:
764		case IP_RECVRETOPTS:
765		case IP_RECVDSTADDR:
766		case IP_RECVIF:
767			*mp = m = m_get(M_WAIT, MT_SOOPTS);
768			m->m_len = sizeof(int);
769			switch (optname) {
770
771			case IP_TOS:
772				optval = inp->inp_ip_tos;
773				break;
774
775			case IP_TTL:
776				optval = inp->inp_ip_ttl;
777				break;
778
779#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
780
781			case IP_RECVOPTS:
782				optval = OPTBIT(INP_RECVOPTS);
783				break;
784
785			case IP_RECVRETOPTS:
786				optval = OPTBIT(INP_RECVRETOPTS);
787				break;
788
789			case IP_RECVDSTADDR:
790				optval = OPTBIT(INP_RECVDSTADDR);
791				break;
792
793			case IP_RECVIF:
794				optval = OPTBIT(INP_RECVIF);
795				break;
796			}
797			*mtod(m, int *) = optval;
798			break;
799
800		case IP_MULTICAST_IF:
801		case IP_MULTICAST_VIF:
802		case IP_MULTICAST_TTL:
803		case IP_MULTICAST_LOOP:
804		case IP_ADD_MEMBERSHIP:
805		case IP_DROP_MEMBERSHIP:
806			error = ip_getmoptions(optname, inp->inp_moptions, mp);
807			break;
808
809		case IP_PORTRANGE:
810			*mp = m = m_get(M_WAIT, MT_SOOPTS);
811			m->m_len = sizeof(int);
812
813			if (inp->inp_flags & INP_HIGHPORT)
814				optval = IP_PORTRANGE_HIGH;
815			else if (inp->inp_flags & INP_LOWPORT)
816				optval = IP_PORTRANGE_LOW;
817			else
818				optval = 0;
819
820			*mtod(m, int *) = optval;
821			break;
822
823		default:
824			error = ENOPROTOOPT;
825			break;
826		}
827		break;
828	}
829	return (error);
830}
831
832/*
833 * Set up IP options in pcb for insertion in output packets.
834 * Store in mbuf with pointer in pcbopt, adding pseudo-option
835 * with destination address if source routed.
836 */
837static int
838#ifdef notyet
839ip_pcbopts(optname, pcbopt, m)
840	int optname;
841#else
842ip_pcbopts(pcbopt, m)
843#endif
844	struct mbuf **pcbopt;
845	register struct mbuf *m;
846{
847	register int cnt, optlen;
848	register u_char *cp;
849	u_char opt;
850
851	/* turn off any old options */
852	if (*pcbopt)
853		(void)m_free(*pcbopt);
854	*pcbopt = 0;
855	if (m == (struct mbuf *)0 || m->m_len == 0) {
856		/*
857		 * Only turning off any previous options.
858		 */
859		if (m)
860			(void)m_free(m);
861		return (0);
862	}
863
864#ifndef	vax
865	if (m->m_len % sizeof(long))
866		goto bad;
867#endif
868	/*
869	 * IP first-hop destination address will be stored before
870	 * actual options; move other options back
871	 * and clear it when none present.
872	 */
873	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
874		goto bad;
875	cnt = m->m_len;
876	m->m_len += sizeof(struct in_addr);
877	cp = mtod(m, u_char *) + sizeof(struct in_addr);
878	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
879	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
880
881	for (; cnt > 0; cnt -= optlen, cp += optlen) {
882		opt = cp[IPOPT_OPTVAL];
883		if (opt == IPOPT_EOL)
884			break;
885		if (opt == IPOPT_NOP)
886			optlen = 1;
887		else {
888			optlen = cp[IPOPT_OLEN];
889			if (optlen <= IPOPT_OLEN || optlen > cnt)
890				goto bad;
891		}
892		switch (opt) {
893
894		default:
895			break;
896
897		case IPOPT_LSRR:
898		case IPOPT_SSRR:
899			/*
900			 * user process specifies route as:
901			 *	->A->B->C->D
902			 * D must be our final destination (but we can't
903			 * check that since we may not have connected yet).
904			 * A is first hop destination, which doesn't appear in
905			 * actual IP option, but is stored before the options.
906			 */
907			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
908				goto bad;
909			m->m_len -= sizeof(struct in_addr);
910			cnt -= sizeof(struct in_addr);
911			optlen -= sizeof(struct in_addr);
912			cp[IPOPT_OLEN] = optlen;
913			/*
914			 * Move first hop before start of options.
915			 */
916			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
917			    sizeof(struct in_addr));
918			/*
919			 * Then copy rest of options back
920			 * to close up the deleted entry.
921			 */
922			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
923			    sizeof(struct in_addr)),
924			    (caddr_t)&cp[IPOPT_OFFSET+1],
925			    (unsigned)cnt + sizeof(struct in_addr));
926			break;
927		}
928	}
929	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
930		goto bad;
931	*pcbopt = m;
932	return (0);
933
934bad:
935	(void)m_free(m);
936	return (EINVAL);
937}
938
939/*
940 * Set the IP multicast options in response to user setsockopt().
941 */
942static int
943ip_setmoptions(optname, imop, m)
944	int optname;
945	struct ip_moptions **imop;
946	struct mbuf *m;
947{
948	register int error = 0;
949	u_char loop;
950	register int i;
951	struct in_addr addr;
952	register struct ip_mreq *mreq;
953	register struct ifnet *ifp;
954	register struct ip_moptions *imo = *imop;
955	struct route ro;
956	register struct sockaddr_in *dst;
957	int s;
958
959	if (imo == NULL) {
960		/*
961		 * No multicast option buffer attached to the pcb;
962		 * allocate one and initialize to default values.
963		 */
964		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
965		    M_WAITOK);
966
967		if (imo == NULL)
968			return (ENOBUFS);
969		*imop = imo;
970		imo->imo_multicast_ifp = NULL;
971		imo->imo_multicast_vif = -1;
972		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
973		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
974		imo->imo_num_memberships = 0;
975	}
976
977	switch (optname) {
978	/* store an index number for the vif you wanna use in the send */
979	case IP_MULTICAST_VIF:
980		if (!legal_vif_num) {
981			error = EOPNOTSUPP;
982			break;
983		}
984		if (m == NULL || m->m_len != sizeof(int)) {
985			error = EINVAL;
986			break;
987		}
988		i = *(mtod(m, int *));
989		if (!legal_vif_num(i) && (i != -1)) {
990			error = EINVAL;
991			break;
992		}
993		imo->imo_multicast_vif = i;
994		break;
995
996	case IP_MULTICAST_IF:
997		/*
998		 * Select the interface for outgoing multicast packets.
999		 */
1000		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1001			error = EINVAL;
1002			break;
1003		}
1004		addr = *(mtod(m, struct in_addr *));
1005		/*
1006		 * INADDR_ANY is used to remove a previous selection.
1007		 * When no interface is selected, a default one is
1008		 * chosen every time a multicast packet is sent.
1009		 */
1010		if (addr.s_addr == INADDR_ANY) {
1011			imo->imo_multicast_ifp = NULL;
1012			break;
1013		}
1014		/*
1015		 * The selected interface is identified by its local
1016		 * IP address.  Find the interface and confirm that
1017		 * it supports multicasting.
1018		 */
1019		s = splimp();
1020		INADDR_TO_IFP(addr, ifp);
1021		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1022			splx(s);
1023			error = EADDRNOTAVAIL;
1024			break;
1025		}
1026		imo->imo_multicast_ifp = ifp;
1027		splx(s);
1028		break;
1029
1030	case IP_MULTICAST_TTL:
1031		/*
1032		 * Set the IP time-to-live for outgoing multicast packets.
1033		 */
1034		if (m == NULL || m->m_len != 1) {
1035			error = EINVAL;
1036			break;
1037		}
1038		imo->imo_multicast_ttl = *(mtod(m, u_char *));
1039		break;
1040
1041	case IP_MULTICAST_LOOP:
1042		/*
1043		 * Set the loopback flag for outgoing multicast packets.
1044		 * Must be zero or one.
1045		 */
1046		if (m == NULL || m->m_len != 1 ||
1047		   (loop = *(mtod(m, u_char *))) > 1) {
1048			error = EINVAL;
1049			break;
1050		}
1051		imo->imo_multicast_loop = loop;
1052		break;
1053
1054	case IP_ADD_MEMBERSHIP:
1055		/*
1056		 * Add a multicast group membership.
1057		 * Group must be a valid IP multicast address.
1058		 */
1059		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1060			error = EINVAL;
1061			break;
1062		}
1063		mreq = mtod(m, struct ip_mreq *);
1064		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1065			error = EINVAL;
1066			break;
1067		}
1068		s = splimp();
1069		/*
1070		 * If no interface address was provided, use the interface of
1071		 * the route to the given multicast address.
1072		 */
1073		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1074			bzero((caddr_t)&ro, sizeof(ro));
1075			dst = (struct sockaddr_in *)&ro.ro_dst;
1076			dst->sin_len = sizeof(*dst);
1077			dst->sin_family = AF_INET;
1078			dst->sin_addr = mreq->imr_multiaddr;
1079			rtalloc(&ro);
1080			if (ro.ro_rt == NULL) {
1081				error = EADDRNOTAVAIL;
1082				splx(s);
1083				break;
1084			}
1085			ifp = ro.ro_rt->rt_ifp;
1086			rtfree(ro.ro_rt);
1087		}
1088		else {
1089			INADDR_TO_IFP(mreq->imr_interface, ifp);
1090		}
1091
1092		/*
1093		 * See if we found an interface, and confirm that it
1094		 * supports multicast.
1095		 */
1096		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1097			error = EADDRNOTAVAIL;
1098			splx(s);
1099			break;
1100		}
1101		/*
1102		 * See if the membership already exists or if all the
1103		 * membership slots are full.
1104		 */
1105		for (i = 0; i < imo->imo_num_memberships; ++i) {
1106			if (imo->imo_membership[i]->inm_ifp == ifp &&
1107			    imo->imo_membership[i]->inm_addr.s_addr
1108						== mreq->imr_multiaddr.s_addr)
1109				break;
1110		}
1111		if (i < imo->imo_num_memberships) {
1112			error = EADDRINUSE;
1113			splx(s);
1114			break;
1115		}
1116		if (i == IP_MAX_MEMBERSHIPS) {
1117			error = ETOOMANYREFS;
1118			splx(s);
1119			break;
1120		}
1121		/*
1122		 * Everything looks good; add a new record to the multicast
1123		 * address list for the given interface.
1124		 */
1125		if ((imo->imo_membership[i] =
1126		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1127			error = ENOBUFS;
1128			splx(s);
1129			break;
1130		}
1131		++imo->imo_num_memberships;
1132		splx(s);
1133		break;
1134
1135	case IP_DROP_MEMBERSHIP:
1136		/*
1137		 * Drop a multicast group membership.
1138		 * Group must be a valid IP multicast address.
1139		 */
1140		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1141			error = EINVAL;
1142			break;
1143		}
1144		mreq = mtod(m, struct ip_mreq *);
1145		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1146			error = EINVAL;
1147			break;
1148		}
1149
1150		s = splimp();
1151		/*
1152		 * If an interface address was specified, get a pointer
1153		 * to its ifnet structure.
1154		 */
1155		if (mreq->imr_interface.s_addr == INADDR_ANY)
1156			ifp = NULL;
1157		else {
1158			INADDR_TO_IFP(mreq->imr_interface, ifp);
1159			if (ifp == NULL) {
1160				error = EADDRNOTAVAIL;
1161				splx(s);
1162				break;
1163			}
1164		}
1165		/*
1166		 * Find the membership in the membership array.
1167		 */
1168		for (i = 0; i < imo->imo_num_memberships; ++i) {
1169			if ((ifp == NULL ||
1170			     imo->imo_membership[i]->inm_ifp == ifp) &&
1171			     imo->imo_membership[i]->inm_addr.s_addr ==
1172			     mreq->imr_multiaddr.s_addr)
1173				break;
1174		}
1175		if (i == imo->imo_num_memberships) {
1176			error = EADDRNOTAVAIL;
1177			splx(s);
1178			break;
1179		}
1180		/*
1181		 * Give up the multicast address record to which the
1182		 * membership points.
1183		 */
1184		in_delmulti(imo->imo_membership[i]);
1185		/*
1186		 * Remove the gap in the membership array.
1187		 */
1188		for (++i; i < imo->imo_num_memberships; ++i)
1189			imo->imo_membership[i-1] = imo->imo_membership[i];
1190		--imo->imo_num_memberships;
1191		splx(s);
1192		break;
1193
1194	default:
1195		error = EOPNOTSUPP;
1196		break;
1197	}
1198
1199	/*
1200	 * If all options have default values, no need to keep the mbuf.
1201	 */
1202	if (imo->imo_multicast_ifp == NULL &&
1203	    imo->imo_multicast_vif == -1 &&
1204	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1205	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1206	    imo->imo_num_memberships == 0) {
1207		free(*imop, M_IPMOPTS);
1208		*imop = NULL;
1209	}
1210
1211	return (error);
1212}
1213
1214/*
1215 * Return the IP multicast options in response to user getsockopt().
1216 */
1217static int
1218ip_getmoptions(optname, imo, mp)
1219	int optname;
1220	register struct ip_moptions *imo;
1221	register struct mbuf **mp;
1222{
1223	u_char *ttl;
1224	u_char *loop;
1225	struct in_addr *addr;
1226	struct in_ifaddr *ia;
1227
1228	*mp = m_get(M_WAIT, MT_SOOPTS);
1229
1230	switch (optname) {
1231
1232	case IP_MULTICAST_VIF:
1233		if (imo != NULL)
1234			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1235		else
1236			*(mtod(*mp, int *)) = -1;
1237		(*mp)->m_len = sizeof(int);
1238		return(0);
1239
1240	case IP_MULTICAST_IF:
1241		addr = mtod(*mp, struct in_addr *);
1242		(*mp)->m_len = sizeof(struct in_addr);
1243		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1244			addr->s_addr = INADDR_ANY;
1245		else {
1246			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1247			addr->s_addr = (ia == NULL) ? INADDR_ANY
1248					: IA_SIN(ia)->sin_addr.s_addr;
1249		}
1250		return (0);
1251
1252	case IP_MULTICAST_TTL:
1253		ttl = mtod(*mp, u_char *);
1254		(*mp)->m_len = 1;
1255		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1256				     : imo->imo_multicast_ttl;
1257		return (0);
1258
1259	case IP_MULTICAST_LOOP:
1260		loop = mtod(*mp, u_char *);
1261		(*mp)->m_len = 1;
1262		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1263				      : imo->imo_multicast_loop;
1264		return (0);
1265
1266	default:
1267		return (EOPNOTSUPP);
1268	}
1269}
1270
1271/*
1272 * Discard the IP multicast options.
1273 */
1274void
1275ip_freemoptions(imo)
1276	register struct ip_moptions *imo;
1277{
1278	register int i;
1279
1280	if (imo != NULL) {
1281		for (i = 0; i < imo->imo_num_memberships; ++i)
1282			in_delmulti(imo->imo_membership[i]);
1283		free(imo, M_IPMOPTS);
1284	}
1285}
1286
1287/*
1288 * Routine called from ip_output() to loop back a copy of an IP multicast
1289 * packet to the input queue of a specified interface.  Note that this
1290 * calls the output routine of the loopback "driver", but with an interface
1291 * pointer that might NOT be a loopback interface -- evil, but easier than
1292 * replicating that code here.
1293 */
1294static void
1295ip_mloopback(ifp, m, dst, hlen)
1296	struct ifnet *ifp;
1297	register struct mbuf *m;
1298	register struct sockaddr_in *dst;
1299	int hlen;
1300{
1301	register struct ip *ip;
1302	struct mbuf *copym;
1303
1304	copym = m_copy(m, 0, M_COPYALL);
1305	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1306		copym = m_pullup(copym, hlen);
1307	if (copym != NULL) {
1308		/*
1309		 * We don't bother to fragment if the IP length is greater
1310		 * than the interface's MTU.  Can this possibly matter?
1311		 */
1312		ip = mtod(copym, struct ip *);
1313		ip->ip_len = htons((u_short)ip->ip_len);
1314		ip->ip_off = htons((u_short)ip->ip_off);
1315		ip->ip_sum = 0;
1316		if (ip->ip_vhl == IP_VHL_BORING) {
1317			ip->ip_sum = in_cksum_hdr(ip);
1318		} else {
1319			ip->ip_sum = in_cksum(copym, hlen);
1320		}
1321		/*
1322		 * NB:
1323		 * It's not clear whether there are any lingering
1324		 * reentrancy problems in other areas which might
1325		 * be exposed by using ip_input directly (in
1326		 * particular, everything which modifies the packet
1327		 * in-place).  Yet another option is using the
1328		 * protosw directly to deliver the looped back
1329		 * packet.  For the moment, we'll err on the side
1330		 * of safety by continuing to abuse looutput().
1331		 */
1332#ifdef notdef
1333		copym->m_pkthdr.rcvif = ifp;
1334		ip_input(copym)
1335#else
1336		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1337#endif
1338	}
1339}
1340