ip_output.c revision 15295
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 *	$Id: ip_output.c,v 1.34 1996/04/03 13:52:20 phk Exp $
35 */
36
37#define _IP_VHL
38
39#include <sys/param.h>
40#include <sys/queue.h>
41#include <sys/systm.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/errno.h>
45#include <sys/protosw.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48
49#include <net/if.h>
50#include <net/route.h>
51
52#include <netinet/in.h>
53#include <netinet/in_systm.h>
54#include <netinet/ip.h>
55#include <netinet/in_pcb.h>
56#include <netinet/in_var.h>
57#include <netinet/ip_var.h>
58
59#ifdef vax
60#include <machine/mtpr.h>
61#endif
62#include <machine/in_cksum.h>
63
64u_short ip_id;
65
66static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
67static void	ip_mloopback
68	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
69static int	ip_getmoptions
70	__P((int, struct ip_moptions *, struct mbuf **));
71static int	ip_optcopy __P((struct ip *, struct ip *));
72static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
73static int	ip_setmoptions
74	__P((int, struct ip_moptions **, struct mbuf *));
75
76/*
77 * IP output.  The packet in mbuf chain m contains a skeletal IP
78 * header (with len, off, ttl, proto, tos, src, dst).
79 * The mbuf chain containing the packet will be freed.
80 * The mbuf opt, if present, will not be freed.
81 */
82int
83ip_output(m0, opt, ro, flags, imo)
84	struct mbuf *m0;
85	struct mbuf *opt;
86	struct route *ro;
87	int flags;
88	struct ip_moptions *imo;
89{
90	struct ip *ip, *mhip;
91	struct ifnet *ifp;
92	struct mbuf *m = m0;
93	int hlen = sizeof (struct ip);
94	int len, off, error = 0;
95	struct sockaddr_in *dst;
96	struct in_ifaddr *ia;
97
98#ifdef	DIAGNOSTIC
99	if ((m->m_flags & M_PKTHDR) == 0)
100		panic("ip_output no HDR");
101	if (!ro)
102		panic("ip_output no route, proto = %d",
103		      mtod(m, struct ip *)->ip_p);
104#endif
105	if (opt) {
106		m = ip_insertoptions(m, opt, &len);
107		hlen = len;
108	}
109	ip = mtod(m, struct ip *);
110	/*
111	 * Fill in IP header.
112	 */
113	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
114		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
115		ip->ip_off &= IP_DF;
116		ip->ip_id = htons(ip_id++);
117		ipstat.ips_localout++;
118	} else {
119		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
120	}
121
122	dst = (struct sockaddr_in *)&ro->ro_dst;
123	/*
124	 * If there is a cached route,
125	 * check that it is to the same destination
126	 * and is still up.  If not, free it and try again.
127	 */
128	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
129	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
130		RTFREE(ro->ro_rt);
131		ro->ro_rt = (struct rtentry *)0;
132	}
133	if (ro->ro_rt == 0) {
134		dst->sin_family = AF_INET;
135		dst->sin_len = sizeof(*dst);
136		dst->sin_addr = ip->ip_dst;
137	}
138	/*
139	 * If routing to interface only,
140	 * short circuit routing lookup.
141	 */
142#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
143#define sintosa(sin)	((struct sockaddr *)(sin))
144	if (flags & IP_ROUTETOIF) {
145		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
146		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
147			ipstat.ips_noroute++;
148			error = ENETUNREACH;
149			goto bad;
150		}
151		ifp = ia->ia_ifp;
152		ip->ip_ttl = 1;
153	} else {
154		/*
155		 * If this is the case, we probably don't want to allocate
156		 * a protocol-cloned route since we didn't get one from the
157		 * ULP.  This lets TCP do its thing, while not burdening
158		 * forwarding or ICMP with the overhead of cloning a route.
159		 * Of course, we still want to do any cloning requested by
160		 * the link layer, as this is probably required in all cases
161		 * for correct operation (as it is for ARP).
162		 */
163		if (ro->ro_rt == 0)
164			rtalloc_ign(ro, RTF_PRCLONING);
165		if (ro->ro_rt == 0) {
166			ipstat.ips_noroute++;
167			error = EHOSTUNREACH;
168			goto bad;
169		}
170		ia = ifatoia(ro->ro_rt->rt_ifa);
171		ifp = ro->ro_rt->rt_ifp;
172		ro->ro_rt->rt_use++;
173		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
174			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
175	}
176	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
177		struct in_multi *inm;
178
179		m->m_flags |= M_MCAST;
180		/*
181		 * IP destination address is multicast.  Make sure "dst"
182		 * still points to the address in "ro".  (It may have been
183		 * changed to point to a gateway address, above.)
184		 */
185		dst = (struct sockaddr_in *)&ro->ro_dst;
186		/*
187		 * See if the caller provided any multicast options
188		 */
189		if (imo != NULL) {
190			ip->ip_ttl = imo->imo_multicast_ttl;
191			if (imo->imo_multicast_ifp != NULL)
192				ifp = imo->imo_multicast_ifp;
193			if (imo->imo_multicast_vif != -1)
194				ip->ip_src.s_addr =
195				    ip_mcast_src(imo->imo_multicast_vif);
196		} else
197			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
198		/*
199		 * Confirm that the outgoing interface supports multicast.
200		 */
201		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
202			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
203				ipstat.ips_noroute++;
204				error = ENETUNREACH;
205				goto bad;
206			}
207		}
208		/*
209		 * If source address not specified yet, use address
210		 * of outgoing interface.
211		 */
212		if (ip->ip_src.s_addr == INADDR_ANY) {
213			register struct in_ifaddr *ia;
214
215			for (ia = in_ifaddr; ia; ia = ia->ia_next)
216				if (ia->ia_ifp == ifp) {
217					ip->ip_src = IA_SIN(ia)->sin_addr;
218					break;
219				}
220		}
221
222		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
223		if (inm != NULL &&
224		   (imo == NULL || imo->imo_multicast_loop)) {
225			/*
226			 * If we belong to the destination multicast group
227			 * on the outgoing interface, and the caller did not
228			 * forbid loopback, loop back a copy.
229			 */
230			ip_mloopback(ifp, m, dst);
231		}
232		else {
233			/*
234			 * If we are acting as a multicast router, perform
235			 * multicast forwarding as if the packet had just
236			 * arrived on the interface to which we are about
237			 * to send.  The multicast forwarding function
238			 * recursively calls this function, using the
239			 * IP_FORWARDING flag to prevent infinite recursion.
240			 *
241			 * Multicasts that are looped back by ip_mloopback(),
242			 * above, will be forwarded by the ip_input() routine,
243			 * if necessary.
244			 */
245			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
246				/*
247				 * Check if rsvp daemon is running. If not, don't
248				 * set ip_moptions. This ensures that the packet
249				 * is multicast and not just sent down one link
250				 * as prescribed by rsvpd.
251				 */
252				if (!rsvp_on)
253				  imo = NULL;
254				if (ip_mforward(ip, ifp, m, imo) != 0) {
255					m_freem(m);
256					goto done;
257				}
258			}
259		}
260
261		/*
262		 * Multicasts with a time-to-live of zero may be looped-
263		 * back, above, but must not be transmitted on a network.
264		 * Also, multicasts addressed to the loopback interface
265		 * are not sent -- the above call to ip_mloopback() will
266		 * loop back a copy if this host actually belongs to the
267		 * destination group on the loopback interface.
268		 */
269		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
270			m_freem(m);
271			goto done;
272		}
273
274		goto sendit;
275	}
276#ifndef notdef
277	/*
278	 * If source address not specified yet, use address
279	 * of outgoing interface.
280	 */
281	if (ip->ip_src.s_addr == INADDR_ANY)
282		ip->ip_src = IA_SIN(ia)->sin_addr;
283#endif
284	/*
285	 * Verify that we have any chance at all of being able to queue
286	 *      the packet or packet fragments
287	 */
288	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
289		ifp->if_snd.ifq_maxlen) {
290			error = ENOBUFS;
291			goto bad;
292	}
293
294	/*
295	 * Look for broadcast address and
296	 * and verify user is allowed to send
297	 * such a packet.
298	 */
299	if (in_broadcast(dst->sin_addr, ifp)) {
300		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
301			error = EADDRNOTAVAIL;
302			goto bad;
303		}
304		if ((flags & IP_ALLOWBROADCAST) == 0) {
305			error = EACCES;
306			goto bad;
307		}
308		/* don't allow broadcast messages to be fragmented */
309		if ((u_short)ip->ip_len > ifp->if_mtu) {
310			error = EMSGSIZE;
311			goto bad;
312		}
313		m->m_flags |= M_BCAST;
314	} else
315		m->m_flags &= ~M_BCAST;
316
317sendit:
318	/*
319	 * Check with the firewall...
320	 */
321	if (ip_fw_chk_ptr && !(*ip_fw_chk_ptr)(&ip, hlen, ifp, 1, &m)) {
322		error = EACCES;
323		goto done;
324	}
325
326	/*
327	 * If small enough for interface, can just send directly.
328	 */
329	if ((u_short)ip->ip_len <= ifp->if_mtu) {
330		ip->ip_len = htons((u_short)ip->ip_len);
331		ip->ip_off = htons((u_short)ip->ip_off);
332		ip->ip_sum = 0;
333		if (ip->ip_vhl == IP_VHL_BORING) {
334			ip->ip_sum = in_cksum_hdr(ip);
335		} else {
336			ip->ip_sum = in_cksum(m, hlen);
337		}
338		error = (*ifp->if_output)(ifp, m,
339				(struct sockaddr *)dst, ro->ro_rt);
340		goto done;
341	}
342	/*
343	 * Too large for interface; fragment if possible.
344	 * Must be able to put at least 8 bytes per fragment.
345	 */
346	if (ip->ip_off & IP_DF) {
347		error = EMSGSIZE;
348#if 1
349		/*
350		 * This case can happen if the user changed the MTU
351		 * of an interface after enabling IP on it.  Because
352		 * most netifs don't keep track of routes pointing to
353		 * them, there is no way for one to update all its
354		 * routes when the MTU is changed.
355		 */
356		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
357		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
358		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
359			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
360		}
361#endif
362		ipstat.ips_cantfrag++;
363		goto bad;
364	}
365	len = (ifp->if_mtu - hlen) &~ 7;
366	if (len < 8) {
367		error = EMSGSIZE;
368		goto bad;
369	}
370
371    {
372	int mhlen, firstlen = len;
373	struct mbuf **mnext = &m->m_nextpkt;
374
375	/*
376	 * Loop through length of segment after first fragment,
377	 * make new header and copy data of each part and link onto chain.
378	 */
379	m0 = m;
380	mhlen = sizeof (struct ip);
381	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
382		MGETHDR(m, M_DONTWAIT, MT_HEADER);
383		if (m == 0) {
384			error = ENOBUFS;
385			ipstat.ips_odropped++;
386			goto sendorfree;
387		}
388		m->m_data += max_linkhdr;
389		mhip = mtod(m, struct ip *);
390		*mhip = *ip;
391		if (hlen > sizeof (struct ip)) {
392			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
393			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
394		}
395		m->m_len = mhlen;
396		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
397		if (ip->ip_off & IP_MF)
398			mhip->ip_off |= IP_MF;
399		if (off + len >= (u_short)ip->ip_len)
400			len = (u_short)ip->ip_len - off;
401		else
402			mhip->ip_off |= IP_MF;
403		mhip->ip_len = htons((u_short)(len + mhlen));
404		m->m_next = m_copy(m0, off, len);
405		if (m->m_next == 0) {
406			(void) m_free(m);
407			error = ENOBUFS;	/* ??? */
408			ipstat.ips_odropped++;
409			goto sendorfree;
410		}
411		m->m_pkthdr.len = mhlen + len;
412		m->m_pkthdr.rcvif = (struct ifnet *)0;
413		mhip->ip_off = htons((u_short)mhip->ip_off);
414		mhip->ip_sum = 0;
415		if (mhip->ip_vhl == IP_VHL_BORING) {
416			mhip->ip_sum = in_cksum_hdr(ip);
417		} else {
418			mhip->ip_sum = in_cksum(m, mhlen);
419		}
420		*mnext = m;
421		mnext = &m->m_nextpkt;
422		ipstat.ips_ofragments++;
423	}
424	/*
425	 * Update first fragment by trimming what's been copied out
426	 * and updating header, then send each fragment (in order).
427	 */
428	m = m0;
429	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
430	m->m_pkthdr.len = hlen + firstlen;
431	ip->ip_len = htons((u_short)m->m_pkthdr.len);
432	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
433	ip->ip_sum = 0;
434	if (ip->ip_vhl == IP_VHL_BORING) {
435		ip->ip_sum = in_cksum_hdr(ip);
436	} else {
437		ip->ip_sum = in_cksum(m, hlen);
438	}
439sendorfree:
440	for (m = m0; m; m = m0) {
441		m0 = m->m_nextpkt;
442		m->m_nextpkt = 0;
443		if (error == 0)
444			error = (*ifp->if_output)(ifp, m,
445			    (struct sockaddr *)dst, ro->ro_rt);
446		else
447			m_freem(m);
448	}
449
450	if (error == 0)
451		ipstat.ips_fragmented++;
452    }
453done:
454	return (error);
455bad:
456	m_freem(m0);
457	goto done;
458}
459
460/*
461 * Insert IP options into preformed packet.
462 * Adjust IP destination as required for IP source routing,
463 * as indicated by a non-zero in_addr at the start of the options.
464 *
465 * XXX This routine assumes that the packet has no options in place.
466 */
467static struct mbuf *
468ip_insertoptions(m, opt, phlen)
469	register struct mbuf *m;
470	struct mbuf *opt;
471	int *phlen;
472{
473	register struct ipoption *p = mtod(opt, struct ipoption *);
474	struct mbuf *n;
475	register struct ip *ip = mtod(m, struct ip *);
476	unsigned optlen;
477
478	optlen = opt->m_len - sizeof(p->ipopt_dst);
479	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
480		return (m);		/* XXX should fail */
481	if (p->ipopt_dst.s_addr)
482		ip->ip_dst = p->ipopt_dst;
483	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
484		MGETHDR(n, M_DONTWAIT, MT_HEADER);
485		if (n == 0)
486			return (m);
487		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
488		m->m_len -= sizeof(struct ip);
489		m->m_data += sizeof(struct ip);
490		n->m_next = m;
491		m = n;
492		m->m_len = optlen + sizeof(struct ip);
493		m->m_data += max_linkhdr;
494		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
495	} else {
496		m->m_data -= optlen;
497		m->m_len += optlen;
498		m->m_pkthdr.len += optlen;
499		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
500	}
501	ip = mtod(m, struct ip *);
502	(void)memcpy(ip + 1, p->ipopt_list, (unsigned)optlen);
503	*phlen = sizeof(struct ip) + optlen;
504	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
505	ip->ip_len += optlen;
506	return (m);
507}
508
509/*
510 * Copy options from ip to jp,
511 * omitting those not copied during fragmentation.
512 */
513static int
514ip_optcopy(ip, jp)
515	struct ip *ip, *jp;
516{
517	register u_char *cp, *dp;
518	int opt, optlen, cnt;
519
520	cp = (u_char *)(ip + 1);
521	dp = (u_char *)(jp + 1);
522	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
523	for (; cnt > 0; cnt -= optlen, cp += optlen) {
524		opt = cp[0];
525		if (opt == IPOPT_EOL)
526			break;
527		if (opt == IPOPT_NOP) {
528			/* Preserve for IP mcast tunnel's LSRR alignment. */
529			*dp++ = IPOPT_NOP;
530			optlen = 1;
531			continue;
532		} else
533			optlen = cp[IPOPT_OLEN];
534		/* bogus lengths should have been caught by ip_dooptions */
535		if (optlen > cnt)
536			optlen = cnt;
537		if (IPOPT_COPIED(opt)) {
538			(void)memcpy(dp, cp, (unsigned)optlen);
539			dp += optlen;
540		}
541	}
542	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
543		*dp++ = IPOPT_EOL;
544	return (optlen);
545}
546
547/*
548 * IP socket option processing.
549 */
550int
551ip_ctloutput(op, so, level, optname, mp)
552	int op;
553	struct socket *so;
554	int level, optname;
555	struct mbuf **mp;
556{
557	register struct inpcb *inp = sotoinpcb(so);
558	register struct mbuf *m = *mp;
559	register int optval = 0;
560	int error = 0;
561
562	if (level != IPPROTO_IP) {
563		error = EINVAL;
564		if (op == PRCO_SETOPT && *mp)
565			(void) m_free(*mp);
566	} else switch (op) {
567
568	case PRCO_SETOPT:
569		switch (optname) {
570		case IP_OPTIONS:
571#ifdef notyet
572		case IP_RETOPTS:
573			return (ip_pcbopts(optname, &inp->inp_options, m));
574#else
575			return (ip_pcbopts(&inp->inp_options, m));
576#endif
577
578		case IP_TOS:
579		case IP_TTL:
580		case IP_RECVOPTS:
581		case IP_RECVRETOPTS:
582		case IP_RECVDSTADDR:
583			if (m == 0 || m->m_len != sizeof(int))
584				error = EINVAL;
585			else {
586				optval = *mtod(m, int *);
587				switch (optname) {
588
589				case IP_TOS:
590					inp->inp_ip.ip_tos = optval;
591					break;
592
593				case IP_TTL:
594					inp->inp_ip.ip_ttl = optval;
595					break;
596#define	OPTSET(bit) \
597	if (optval) \
598		inp->inp_flags |= bit; \
599	else \
600		inp->inp_flags &= ~bit;
601
602				case IP_RECVOPTS:
603					OPTSET(INP_RECVOPTS);
604					break;
605
606				case IP_RECVRETOPTS:
607					OPTSET(INP_RECVRETOPTS);
608					break;
609
610				case IP_RECVDSTADDR:
611					OPTSET(INP_RECVDSTADDR);
612					break;
613				}
614			}
615			break;
616#undef OPTSET
617
618		case IP_MULTICAST_IF:
619		case IP_MULTICAST_VIF:
620		case IP_MULTICAST_TTL:
621		case IP_MULTICAST_LOOP:
622		case IP_ADD_MEMBERSHIP:
623		case IP_DROP_MEMBERSHIP:
624			error = ip_setmoptions(optname, &inp->inp_moptions, m);
625			break;
626
627		case IP_PORTRANGE:
628			if (m == 0 || m->m_len != sizeof(int))
629				error = EINVAL;
630			else {
631				optval = *mtod(m, int *);
632
633				switch (optval) {
634
635				case IP_PORTRANGE_DEFAULT:
636					inp->inp_flags &= ~(INP_LOWPORT);
637					inp->inp_flags &= ~(INP_HIGHPORT);
638					break;
639
640				case IP_PORTRANGE_HIGH:
641					inp->inp_flags &= ~(INP_LOWPORT);
642					inp->inp_flags |= INP_HIGHPORT;
643					break;
644
645				case IP_PORTRANGE_LOW:
646					inp->inp_flags &= ~(INP_HIGHPORT);
647					inp->inp_flags |= INP_LOWPORT;
648					break;
649
650				default:
651					error = EINVAL;
652					break;
653				}
654			}
655
656		default:
657			error = ENOPROTOOPT;
658			break;
659		}
660		if (m)
661			(void)m_free(m);
662		break;
663
664	case PRCO_GETOPT:
665		switch (optname) {
666		case IP_OPTIONS:
667		case IP_RETOPTS:
668			*mp = m = m_get(M_WAIT, MT_SOOPTS);
669			if (inp->inp_options) {
670				m->m_len = inp->inp_options->m_len;
671				(void)memcpy(mtod(m, void *),
672				    mtod(inp->inp_options, void *), (unsigned)m->m_len);
673			} else
674				m->m_len = 0;
675			break;
676
677		case IP_TOS:
678		case IP_TTL:
679		case IP_RECVOPTS:
680		case IP_RECVRETOPTS:
681		case IP_RECVDSTADDR:
682			*mp = m = m_get(M_WAIT, MT_SOOPTS);
683			m->m_len = sizeof(int);
684			switch (optname) {
685
686			case IP_TOS:
687				optval = inp->inp_ip.ip_tos;
688				break;
689
690			case IP_TTL:
691				optval = inp->inp_ip.ip_ttl;
692				break;
693
694#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
695
696			case IP_RECVOPTS:
697				optval = OPTBIT(INP_RECVOPTS);
698				break;
699
700			case IP_RECVRETOPTS:
701				optval = OPTBIT(INP_RECVRETOPTS);
702				break;
703
704			case IP_RECVDSTADDR:
705				optval = OPTBIT(INP_RECVDSTADDR);
706				break;
707			}
708			*mtod(m, int *) = optval;
709			break;
710
711		case IP_MULTICAST_IF:
712		case IP_MULTICAST_VIF:
713		case IP_MULTICAST_TTL:
714		case IP_MULTICAST_LOOP:
715		case IP_ADD_MEMBERSHIP:
716		case IP_DROP_MEMBERSHIP:
717			error = ip_getmoptions(optname, inp->inp_moptions, mp);
718			break;
719
720		case IP_PORTRANGE:
721			*mp = m = m_get(M_WAIT, MT_SOOPTS);
722			m->m_len = sizeof(int);
723
724			if (inp->inp_flags & INP_HIGHPORT)
725				optval = IP_PORTRANGE_HIGH;
726			else if (inp->inp_flags & INP_LOWPORT)
727				optval = IP_PORTRANGE_LOW;
728			else
729				optval = 0;
730
731			*mtod(m, int *) = optval;
732			break;
733
734		default:
735			error = ENOPROTOOPT;
736			break;
737		}
738		break;
739	}
740	return (error);
741}
742
743/*
744 * Set up IP options in pcb for insertion in output packets.
745 * Store in mbuf with pointer in pcbopt, adding pseudo-option
746 * with destination address if source routed.
747 */
748static int
749#ifdef notyet
750ip_pcbopts(optname, pcbopt, m)
751	int optname;
752#else
753ip_pcbopts(pcbopt, m)
754#endif
755	struct mbuf **pcbopt;
756	register struct mbuf *m;
757{
758	register cnt, optlen;
759	register u_char *cp;
760	u_char opt;
761
762	/* turn off any old options */
763	if (*pcbopt)
764		(void)m_free(*pcbopt);
765	*pcbopt = 0;
766	if (m == (struct mbuf *)0 || m->m_len == 0) {
767		/*
768		 * Only turning off any previous options.
769		 */
770		if (m)
771			(void)m_free(m);
772		return (0);
773	}
774
775#ifndef	vax
776	if (m->m_len % sizeof(long))
777		goto bad;
778#endif
779	/*
780	 * IP first-hop destination address will be stored before
781	 * actual options; move other options back
782	 * and clear it when none present.
783	 */
784	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
785		goto bad;
786	cnt = m->m_len;
787	m->m_len += sizeof(struct in_addr);
788	cp = mtod(m, u_char *) + sizeof(struct in_addr);
789	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
790	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
791
792	for (; cnt > 0; cnt -= optlen, cp += optlen) {
793		opt = cp[IPOPT_OPTVAL];
794		if (opt == IPOPT_EOL)
795			break;
796		if (opt == IPOPT_NOP)
797			optlen = 1;
798		else {
799			optlen = cp[IPOPT_OLEN];
800			if (optlen <= IPOPT_OLEN || optlen > cnt)
801				goto bad;
802		}
803		switch (opt) {
804
805		default:
806			break;
807
808		case IPOPT_LSRR:
809		case IPOPT_SSRR:
810			/*
811			 * user process specifies route as:
812			 *	->A->B->C->D
813			 * D must be our final destination (but we can't
814			 * check that since we may not have connected yet).
815			 * A is first hop destination, which doesn't appear in
816			 * actual IP option, but is stored before the options.
817			 */
818			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
819				goto bad;
820			m->m_len -= sizeof(struct in_addr);
821			cnt -= sizeof(struct in_addr);
822			optlen -= sizeof(struct in_addr);
823			cp[IPOPT_OLEN] = optlen;
824			/*
825			 * Move first hop before start of options.
826			 */
827			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
828			    sizeof(struct in_addr));
829			/*
830			 * Then copy rest of options back
831			 * to close up the deleted entry.
832			 */
833			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
834			    sizeof(struct in_addr)),
835			    (caddr_t)&cp[IPOPT_OFFSET+1],
836			    (unsigned)cnt + sizeof(struct in_addr));
837			break;
838		}
839	}
840	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
841		goto bad;
842	*pcbopt = m;
843	return (0);
844
845bad:
846	(void)m_free(m);
847	return (EINVAL);
848}
849
850/*
851 * Set the IP multicast options in response to user setsockopt().
852 */
853static int
854ip_setmoptions(optname, imop, m)
855	int optname;
856	struct ip_moptions **imop;
857	struct mbuf *m;
858{
859	register int error = 0;
860	u_char loop;
861	register int i;
862	struct in_addr addr;
863	register struct ip_mreq *mreq;
864	register struct ifnet *ifp;
865	register struct ip_moptions *imo = *imop;
866	struct route ro;
867	register struct sockaddr_in *dst;
868	int s;
869
870	if (imo == NULL) {
871		/*
872		 * No multicast option buffer attached to the pcb;
873		 * allocate one and initialize to default values.
874		 */
875		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
876		    M_WAITOK);
877
878		if (imo == NULL)
879			return (ENOBUFS);
880		*imop = imo;
881		imo->imo_multicast_ifp = NULL;
882		imo->imo_multicast_vif = -1;
883		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
884		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
885		imo->imo_num_memberships = 0;
886	}
887
888	switch (optname) {
889	/* store an index number for the vif you wanna use in the send */
890	case IP_MULTICAST_VIF:
891		if (!legal_vif_num) {
892			error = EOPNOTSUPP;
893			break;
894		}
895		if (m == NULL || m->m_len != sizeof(int)) {
896			error = EINVAL;
897			break;
898		}
899		i = *(mtod(m, int *));
900		if (!legal_vif_num(i) && (i != -1)) {
901			error = EINVAL;
902			break;
903		}
904		imo->imo_multicast_vif = i;
905		break;
906
907	case IP_MULTICAST_IF:
908		/*
909		 * Select the interface for outgoing multicast packets.
910		 */
911		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
912			error = EINVAL;
913			break;
914		}
915		addr = *(mtod(m, struct in_addr *));
916		/*
917		 * INADDR_ANY is used to remove a previous selection.
918		 * When no interface is selected, a default one is
919		 * chosen every time a multicast packet is sent.
920		 */
921		if (addr.s_addr == INADDR_ANY) {
922			imo->imo_multicast_ifp = NULL;
923			break;
924		}
925		/*
926		 * The selected interface is identified by its local
927		 * IP address.  Find the interface and confirm that
928		 * it supports multicasting.
929		 */
930		s = splimp();
931		INADDR_TO_IFP(addr, ifp);
932		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
933			splx(s);
934			error = EADDRNOTAVAIL;
935			break;
936		}
937		imo->imo_multicast_ifp = ifp;
938		splx(s);
939		break;
940
941	case IP_MULTICAST_TTL:
942		/*
943		 * Set the IP time-to-live for outgoing multicast packets.
944		 */
945		if (m == NULL || m->m_len != 1) {
946			error = EINVAL;
947			break;
948		}
949		imo->imo_multicast_ttl = *(mtod(m, u_char *));
950		break;
951
952	case IP_MULTICAST_LOOP:
953		/*
954		 * Set the loopback flag for outgoing multicast packets.
955		 * Must be zero or one.
956		 */
957		if (m == NULL || m->m_len != 1 ||
958		   (loop = *(mtod(m, u_char *))) > 1) {
959			error = EINVAL;
960			break;
961		}
962		imo->imo_multicast_loop = loop;
963		break;
964
965	case IP_ADD_MEMBERSHIP:
966		/*
967		 * Add a multicast group membership.
968		 * Group must be a valid IP multicast address.
969		 */
970		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
971			error = EINVAL;
972			break;
973		}
974		mreq = mtod(m, struct ip_mreq *);
975		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
976			error = EINVAL;
977			break;
978		}
979		s = splimp();
980		/*
981		 * If no interface address was provided, use the interface of
982		 * the route to the given multicast address.
983		 */
984		if (mreq->imr_interface.s_addr == INADDR_ANY) {
985			bzero((caddr_t)&ro, sizeof(ro));
986			dst = (struct sockaddr_in *)&ro.ro_dst;
987			dst->sin_len = sizeof(*dst);
988			dst->sin_family = AF_INET;
989			dst->sin_addr = mreq->imr_multiaddr;
990			rtalloc(&ro);
991			if (ro.ro_rt == NULL) {
992				error = EADDRNOTAVAIL;
993				splx(s);
994				break;
995			}
996			ifp = ro.ro_rt->rt_ifp;
997			rtfree(ro.ro_rt);
998		}
999		else {
1000			INADDR_TO_IFP(mreq->imr_interface, ifp);
1001		}
1002
1003		/*
1004		 * See if we found an interface, and confirm that it
1005		 * supports multicast.
1006		 */
1007		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1008			error = EADDRNOTAVAIL;
1009			splx(s);
1010			break;
1011		}
1012		/*
1013		 * See if the membership already exists or if all the
1014		 * membership slots are full.
1015		 */
1016		for (i = 0; i < imo->imo_num_memberships; ++i) {
1017			if (imo->imo_membership[i]->inm_ifp == ifp &&
1018			    imo->imo_membership[i]->inm_addr.s_addr
1019						== mreq->imr_multiaddr.s_addr)
1020				break;
1021		}
1022		if (i < imo->imo_num_memberships) {
1023			error = EADDRINUSE;
1024			splx(s);
1025			break;
1026		}
1027		if (i == IP_MAX_MEMBERSHIPS) {
1028			error = ETOOMANYREFS;
1029			splx(s);
1030			break;
1031		}
1032		/*
1033		 * Everything looks good; add a new record to the multicast
1034		 * address list for the given interface.
1035		 */
1036		if ((imo->imo_membership[i] =
1037		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1038			error = ENOBUFS;
1039			splx(s);
1040			break;
1041		}
1042		++imo->imo_num_memberships;
1043		splx(s);
1044		break;
1045
1046	case IP_DROP_MEMBERSHIP:
1047		/*
1048		 * Drop a multicast group membership.
1049		 * Group must be a valid IP multicast address.
1050		 */
1051		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1052			error = EINVAL;
1053			break;
1054		}
1055		mreq = mtod(m, struct ip_mreq *);
1056		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1057			error = EINVAL;
1058			break;
1059		}
1060
1061		s = splimp();
1062		/*
1063		 * If an interface address was specified, get a pointer
1064		 * to its ifnet structure.
1065		 */
1066		if (mreq->imr_interface.s_addr == INADDR_ANY)
1067			ifp = NULL;
1068		else {
1069			INADDR_TO_IFP(mreq->imr_interface, ifp);
1070			if (ifp == NULL) {
1071				error = EADDRNOTAVAIL;
1072				splx(s);
1073				break;
1074			}
1075		}
1076		/*
1077		 * Find the membership in the membership array.
1078		 */
1079		for (i = 0; i < imo->imo_num_memberships; ++i) {
1080			if ((ifp == NULL ||
1081			     imo->imo_membership[i]->inm_ifp == ifp) &&
1082			     imo->imo_membership[i]->inm_addr.s_addr ==
1083			     mreq->imr_multiaddr.s_addr)
1084				break;
1085		}
1086		if (i == imo->imo_num_memberships) {
1087			error = EADDRNOTAVAIL;
1088			splx(s);
1089			break;
1090		}
1091		/*
1092		 * Give up the multicast address record to which the
1093		 * membership points.
1094		 */
1095		in_delmulti(imo->imo_membership[i]);
1096		/*
1097		 * Remove the gap in the membership array.
1098		 */
1099		for (++i; i < imo->imo_num_memberships; ++i)
1100			imo->imo_membership[i-1] = imo->imo_membership[i];
1101		--imo->imo_num_memberships;
1102		splx(s);
1103		break;
1104
1105	default:
1106		error = EOPNOTSUPP;
1107		break;
1108	}
1109
1110	/*
1111	 * If all options have default values, no need to keep the mbuf.
1112	 */
1113	if (imo->imo_multicast_ifp == NULL &&
1114	    imo->imo_multicast_vif == -1 &&
1115	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1116	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1117	    imo->imo_num_memberships == 0) {
1118		free(*imop, M_IPMOPTS);
1119		*imop = NULL;
1120	}
1121
1122	return (error);
1123}
1124
1125/*
1126 * Return the IP multicast options in response to user getsockopt().
1127 */
1128static int
1129ip_getmoptions(optname, imo, mp)
1130	int optname;
1131	register struct ip_moptions *imo;
1132	register struct mbuf **mp;
1133{
1134	u_char *ttl;
1135	u_char *loop;
1136	struct in_addr *addr;
1137	struct in_ifaddr *ia;
1138
1139	*mp = m_get(M_WAIT, MT_SOOPTS);
1140
1141	switch (optname) {
1142
1143	case IP_MULTICAST_VIF:
1144		if (imo != NULL)
1145			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1146		else
1147			*(mtod(*mp, int *)) = -1;
1148		(*mp)->m_len = sizeof(int);
1149		return(0);
1150
1151	case IP_MULTICAST_IF:
1152		addr = mtod(*mp, struct in_addr *);
1153		(*mp)->m_len = sizeof(struct in_addr);
1154		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1155			addr->s_addr = INADDR_ANY;
1156		else {
1157			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1158			addr->s_addr = (ia == NULL) ? INADDR_ANY
1159					: IA_SIN(ia)->sin_addr.s_addr;
1160		}
1161		return (0);
1162
1163	case IP_MULTICAST_TTL:
1164		ttl = mtod(*mp, u_char *);
1165		(*mp)->m_len = 1;
1166		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1167				     : imo->imo_multicast_ttl;
1168		return (0);
1169
1170	case IP_MULTICAST_LOOP:
1171		loop = mtod(*mp, u_char *);
1172		(*mp)->m_len = 1;
1173		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1174				      : imo->imo_multicast_loop;
1175		return (0);
1176
1177	default:
1178		return (EOPNOTSUPP);
1179	}
1180}
1181
1182/*
1183 * Discard the IP multicast options.
1184 */
1185void
1186ip_freemoptions(imo)
1187	register struct ip_moptions *imo;
1188{
1189	register int i;
1190
1191	if (imo != NULL) {
1192		for (i = 0; i < imo->imo_num_memberships; ++i)
1193			in_delmulti(imo->imo_membership[i]);
1194		free(imo, M_IPMOPTS);
1195	}
1196}
1197
1198/*
1199 * Routine called from ip_output() to loop back a copy of an IP multicast
1200 * packet to the input queue of a specified interface.  Note that this
1201 * calls the output routine of the loopback "driver", but with an interface
1202 * pointer that might NOT be a loopback interface -- evil, but easier than
1203 * replicating that code here.
1204 */
1205static void
1206ip_mloopback(ifp, m, dst)
1207	struct ifnet *ifp;
1208	register struct mbuf *m;
1209	register struct sockaddr_in *dst;
1210{
1211	register struct ip *ip;
1212	struct mbuf *copym;
1213
1214	copym = m_copy(m, 0, M_COPYALL);
1215	if (copym != NULL) {
1216		/*
1217		 * We don't bother to fragment if the IP length is greater
1218		 * than the interface's MTU.  Can this possibly matter?
1219		 */
1220		ip = mtod(copym, struct ip *);
1221		ip->ip_len = htons((u_short)ip->ip_len);
1222		ip->ip_off = htons((u_short)ip->ip_off);
1223		ip->ip_sum = 0;
1224		if (ip->ip_vhl == IP_VHL_BORING) {
1225			ip->ip_sum = in_cksum_hdr(ip);
1226		} else {
1227			ip->ip_sum = in_cksum(copym,
1228					      IP_VHL_HL(ip->ip_vhl) << 2);
1229		}
1230		/*
1231		 * NB:
1232		 * We can't simply call ip_input() directly because
1233		 * the ip_mforward() depends on the `input interface'
1234		 * being set to something unreasonable so that we don't
1235		 * attempt to forward the looped-back copy.
1236		 * It's also not clear whether there are any lingering
1237		 * reentrancy problems in other areas which might be
1238		 * exposed by this code.  For the moment, we'll err
1239		 * on the side of safety by continuing to abuse
1240		 * loinput().
1241		 */
1242#ifdef notdef
1243		copym->m_pkthdr.rcvif = &loif[0];
1244		ip_input(copym)
1245#else
1246		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1247#endif
1248	}
1249}
1250