ip_output.c revision 12296
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 *	$Id: ip_output.c,v 1.24 1995/10/16 18:21:09 wollman Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/errno.h>
42#include <sys/protosw.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/queue.h>
46
47#include <net/if.h>
48#include <net/route.h>
49
50#include <netinet/in.h>
51#include <netinet/in_systm.h>
52#include <netinet/ip.h>
53#include <netinet/in_pcb.h>
54#include <netinet/in_var.h>
55#include <netinet/ip_var.h>
56
57#include <netinet/ip_fw.h>
58
59#ifdef vax
60#include <machine/mtpr.h>
61#endif
62
63u_short ip_id;
64
65static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
66static void	ip_mloopback
67	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
68static int	ip_getmoptions
69	__P((int, struct ip_moptions *, struct mbuf **));
70static int	ip_optcopy __P((struct ip *, struct ip *));
71static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
72static int	ip_setmoptions
73	__P((int, struct ip_moptions **, struct mbuf *));
74
75/*
76 * IP output.  The packet in mbuf chain m contains a skeletal IP
77 * header (with len, off, ttl, proto, tos, src, dst).
78 * The mbuf chain containing the packet will be freed.
79 * The mbuf opt, if present, will not be freed.
80 */
81int
82ip_output(m0, opt, ro, flags, imo)
83	struct mbuf *m0;
84	struct mbuf *opt;
85	struct route *ro;
86	int flags;
87	struct ip_moptions *imo;
88{
89	register struct ip *ip, *mhip;
90	register struct ifnet *ifp;
91	register struct mbuf *m = m0;
92	register int hlen = sizeof (struct ip);
93	int len, off, error = 0;
94	struct route iproute;
95	struct sockaddr_in *dst;
96	struct in_ifaddr *ia;
97
98#ifdef	DIAGNOSTIC
99	if ((m->m_flags & M_PKTHDR) == 0)
100		panic("ip_output no HDR");
101#endif
102	if (opt) {
103		m = ip_insertoptions(m, opt, &len);
104		hlen = len;
105	}
106	ip = mtod(m, struct ip *);
107	/*
108	 * Fill in IP header.
109	 */
110	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
111		ip->ip_v = IPVERSION;
112		ip->ip_off &= IP_DF;
113		ip->ip_id = htons(ip_id++);
114		ip->ip_hl = hlen >> 2;
115		ipstat.ips_localout++;
116	} else {
117		hlen = ip->ip_hl << 2;
118	}
119	/*
120	 * Route packet.
121	 */
122	if (ro == 0) {
123		ro = &iproute;
124		bzero((caddr_t)ro, sizeof (*ro));
125	}
126	dst = (struct sockaddr_in *)&ro->ro_dst;
127	/*
128	 * If there is a cached route,
129	 * check that it is to the same destination
130	 * and is still up.  If not, free it and try again.
131	 */
132	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
133	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
134		RTFREE(ro->ro_rt);
135		ro->ro_rt = (struct rtentry *)0;
136	}
137	if (ro->ro_rt == 0) {
138		dst->sin_family = AF_INET;
139		dst->sin_len = sizeof(*dst);
140		dst->sin_addr = ip->ip_dst;
141	}
142	/*
143	 * If routing to interface only,
144	 * short circuit routing lookup.
145	 */
146#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
147#define sintosa(sin)	((struct sockaddr *)(sin))
148	if (flags & IP_ROUTETOIF) {
149		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
150		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
151			ipstat.ips_noroute++;
152			error = ENETUNREACH;
153			goto bad;
154		}
155		ifp = ia->ia_ifp;
156		ip->ip_ttl = 1;
157	} else {
158		/*
159		 * If this is the case, we probably don't want to allocate
160		 * a protocol-cloned route since we didn't get one from the
161		 * ULP.  This lets TCP do its thing, while not burdening
162		 * forwarding or ICMP with the overhead of cloning a route.
163		 * Of course, we still want to do any cloning requested by
164		 * the link layer, as this is probably required in all cases
165		 * for correct operation (as it is for ARP).
166		 */
167		if (ro->ro_rt == 0)
168			rtalloc_ign(ro, RTF_PRCLONING);
169		if (ro->ro_rt == 0) {
170			ipstat.ips_noroute++;
171			error = EHOSTUNREACH;
172			goto bad;
173		}
174		ia = ifatoia(ro->ro_rt->rt_ifa);
175		ifp = ro->ro_rt->rt_ifp;
176		ro->ro_rt->rt_use++;
177		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
178			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
179	}
180	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
181		struct in_multi *inm;
182
183		m->m_flags |= M_MCAST;
184		/*
185		 * IP destination address is multicast.  Make sure "dst"
186		 * still points to the address in "ro".  (It may have been
187		 * changed to point to a gateway address, above.)
188		 */
189		dst = (struct sockaddr_in *)&ro->ro_dst;
190		/*
191		 * See if the caller provided any multicast options
192		 */
193		if (imo != NULL) {
194			ip->ip_ttl = imo->imo_multicast_ttl;
195			if (imo->imo_multicast_ifp != NULL)
196				ifp = imo->imo_multicast_ifp;
197			if (imo->imo_multicast_vif != -1)
198				ip->ip_src.s_addr =
199				    ip_mcast_src(imo->imo_multicast_vif);
200		} else
201			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
202		/*
203		 * Confirm that the outgoing interface supports multicast.
204		 */
205		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
206			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
207				ipstat.ips_noroute++;
208				error = ENETUNREACH;
209				goto bad;
210			}
211		}
212		/*
213		 * If source address not specified yet, use address
214		 * of outgoing interface.
215		 */
216		if (ip->ip_src.s_addr == INADDR_ANY) {
217			register struct in_ifaddr *ia;
218
219			for (ia = in_ifaddr; ia; ia = ia->ia_next)
220				if (ia->ia_ifp == ifp) {
221					ip->ip_src = IA_SIN(ia)->sin_addr;
222					break;
223				}
224		}
225
226		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
227		if (inm != NULL &&
228		   (imo == NULL || imo->imo_multicast_loop)) {
229			/*
230			 * If we belong to the destination multicast group
231			 * on the outgoing interface, and the caller did not
232			 * forbid loopback, loop back a copy.
233			 */
234			ip_mloopback(ifp, m, dst);
235		}
236		else {
237			/*
238			 * If we are acting as a multicast router, perform
239			 * multicast forwarding as if the packet had just
240			 * arrived on the interface to which we are about
241			 * to send.  The multicast forwarding function
242			 * recursively calls this function, using the
243			 * IP_FORWARDING flag to prevent infinite recursion.
244			 *
245			 * Multicasts that are looped back by ip_mloopback(),
246			 * above, will be forwarded by the ip_input() routine,
247			 * if necessary.
248			 */
249			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
250				/*
251				 * Check if rsvp daemon is running. If not, don't
252				 * set ip_moptions. This ensures that the packet
253				 * is multicast and not just sent down one link
254				 * as prescribed by rsvpd.
255				 */
256				if (!rsvp_on)
257				  imo = NULL;
258				if (ip_mforward(ip, ifp, m, imo) != 0) {
259					m_freem(m);
260					goto done;
261				}
262			}
263		}
264
265		/*
266		 * Multicasts with a time-to-live of zero may be looped-
267		 * back, above, but must not be transmitted on a network.
268		 * Also, multicasts addressed to the loopback interface
269		 * are not sent -- the above call to ip_mloopback() will
270		 * loop back a copy if this host actually belongs to the
271		 * destination group on the loopback interface.
272		 */
273		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
274			m_freem(m);
275			goto done;
276		}
277
278		goto sendit;
279	}
280#ifndef notdef
281	/*
282	 * If source address not specified yet, use address
283	 * of outgoing interface.
284	 */
285	if (ip->ip_src.s_addr == INADDR_ANY)
286		ip->ip_src = IA_SIN(ia)->sin_addr;
287#endif
288	/*
289	 * Verify that we have any chance at all of being able to queue
290	 *      the packet or packet fragments
291	 */
292	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
293		ifp->if_snd.ifq_maxlen) {
294			error = ENOBUFS;
295			goto bad;
296	}
297
298	/*
299	 * Look for broadcast address and
300	 * and verify user is allowed to send
301	 * such a packet.
302	 */
303	if (in_broadcast(dst->sin_addr, ifp)) {
304		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
305			error = EADDRNOTAVAIL;
306			goto bad;
307		}
308		if ((flags & IP_ALLOWBROADCAST) == 0) {
309			error = EACCES;
310			goto bad;
311		}
312		/* don't allow broadcast messages to be fragmented */
313		if ((u_short)ip->ip_len > ifp->if_mtu) {
314			error = EMSGSIZE;
315			goto bad;
316		}
317		m->m_flags |= M_BCAST;
318	} else
319		m->m_flags &= ~M_BCAST;
320
321sendit:
322	/*
323	 * If small enough for interface, can just send directly.
324	 */
325	if ((u_short)ip->ip_len <= ifp->if_mtu) {
326		ip->ip_len = htons((u_short)ip->ip_len);
327		ip->ip_off = htons((u_short)ip->ip_off);
328		ip->ip_sum = 0;
329		ip->ip_sum = in_cksum(m, hlen);
330		error = (*ifp->if_output)(ifp, m,
331				(struct sockaddr *)dst, ro->ro_rt);
332		goto done;
333	}
334	/*
335	 * Too large for interface; fragment if possible.
336	 * Must be able to put at least 8 bytes per fragment.
337	 */
338	if (ip->ip_off & IP_DF) {
339		error = EMSGSIZE;
340#ifdef MTUDISC
341		/*
342		 * This case can happen if the user changed the MTU
343		 * of an interface after enabling IP on it.  Because
344		 * most netifs don't keep track of routes pointing to
345		 * them, there is no way for one to update all its
346		 * routes when the MTU is changed.
347		 */
348		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
349		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
350		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
351			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
352		}
353#endif /* MTUDISC */
354		ipstat.ips_cantfrag++;
355		goto bad;
356	}
357	len = (ifp->if_mtu - hlen) &~ 7;
358	if (len < 8) {
359		error = EMSGSIZE;
360		goto bad;
361	}
362
363    {
364	int mhlen, firstlen = len;
365	struct mbuf **mnext = &m->m_nextpkt;
366
367	/*
368	 * Loop through length of segment after first fragment,
369	 * make new header and copy data of each part and link onto chain.
370	 */
371	m0 = m;
372	mhlen = sizeof (struct ip);
373	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
374		MGETHDR(m, M_DONTWAIT, MT_HEADER);
375		if (m == 0) {
376			error = ENOBUFS;
377			ipstat.ips_odropped++;
378			goto sendorfree;
379		}
380		m->m_data += max_linkhdr;
381		mhip = mtod(m, struct ip *);
382		*mhip = *ip;
383		if (hlen > sizeof (struct ip)) {
384			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
385			mhip->ip_hl = mhlen >> 2;
386		}
387		m->m_len = mhlen;
388		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
389		if (ip->ip_off & IP_MF)
390			mhip->ip_off |= IP_MF;
391		if (off + len >= (u_short)ip->ip_len)
392			len = (u_short)ip->ip_len - off;
393		else
394			mhip->ip_off |= IP_MF;
395		mhip->ip_len = htons((u_short)(len + mhlen));
396		m->m_next = m_copy(m0, off, len);
397		if (m->m_next == 0) {
398			(void) m_free(m);
399			error = ENOBUFS;	/* ??? */
400			ipstat.ips_odropped++;
401			goto sendorfree;
402		}
403		m->m_pkthdr.len = mhlen + len;
404		m->m_pkthdr.rcvif = (struct ifnet *)0;
405		mhip->ip_off = htons((u_short)mhip->ip_off);
406		mhip->ip_sum = 0;
407		mhip->ip_sum = in_cksum(m, mhlen);
408		*mnext = m;
409		mnext = &m->m_nextpkt;
410		ipstat.ips_ofragments++;
411	}
412	/*
413	 * Update first fragment by trimming what's been copied out
414	 * and updating header, then send each fragment (in order).
415	 */
416	m = m0;
417	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
418	m->m_pkthdr.len = hlen + firstlen;
419	ip->ip_len = htons((u_short)m->m_pkthdr.len);
420	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
421	ip->ip_sum = 0;
422	ip->ip_sum = in_cksum(m, hlen);
423sendorfree:
424	for (m = m0; m; m = m0) {
425		m0 = m->m_nextpkt;
426		m->m_nextpkt = 0;
427		if (error == 0)
428			error = (*ifp->if_output)(ifp, m,
429			    (struct sockaddr *)dst, ro->ro_rt);
430		else
431			m_freem(m);
432	}
433
434	if (error == 0)
435		ipstat.ips_fragmented++;
436    }
437done:
438	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
439		RTFREE(ro->ro_rt);
440	/*
441	 * Count outgoing packet,here we count both our packets and
442	 * those we forward.
443	 * Here we want to convert ip_len to host byte order when counting
444	 * so we set 3rd arg to 1.
445	 * This is locally generated packet so it has not
446	 * incoming interface.
447	 */
448	if (ip_acct_cnt_ptr!=NULL)
449		(*ip_acct_cnt_ptr)(ip,NULL,ip_acct_chain,1);
450
451	return (error);
452bad:
453	m_freem(m0);
454	goto done;
455}
456
457/*
458 * Insert IP options into preformed packet.
459 * Adjust IP destination as required for IP source routing,
460 * as indicated by a non-zero in_addr at the start of the options.
461 */
462static struct mbuf *
463ip_insertoptions(m, opt, phlen)
464	register struct mbuf *m;
465	struct mbuf *opt;
466	int *phlen;
467{
468	register struct ipoption *p = mtod(opt, struct ipoption *);
469	struct mbuf *n;
470	register struct ip *ip = mtod(m, struct ip *);
471	unsigned optlen;
472
473	optlen = opt->m_len - sizeof(p->ipopt_dst);
474	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
475		return (m);		/* XXX should fail */
476	if (p->ipopt_dst.s_addr)
477		ip->ip_dst = p->ipopt_dst;
478	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
479		MGETHDR(n, M_DONTWAIT, MT_HEADER);
480		if (n == 0)
481			return (m);
482		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
483		m->m_len -= sizeof(struct ip);
484		m->m_data += sizeof(struct ip);
485		n->m_next = m;
486		m = n;
487		m->m_len = optlen + sizeof(struct ip);
488		m->m_data += max_linkhdr;
489		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
490	} else {
491		m->m_data -= optlen;
492		m->m_len += optlen;
493		m->m_pkthdr.len += optlen;
494		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
495	}
496	ip = mtod(m, struct ip *);
497	(void)memcpy(ip + 1, p->ipopt_list, (unsigned)optlen);
498	*phlen = sizeof(struct ip) + optlen;
499	ip->ip_len += optlen;
500	return (m);
501}
502
503/*
504 * Copy options from ip to jp,
505 * omitting those not copied during fragmentation.
506 */
507static int
508ip_optcopy(ip, jp)
509	struct ip *ip, *jp;
510{
511	register u_char *cp, *dp;
512	int opt, optlen, cnt;
513
514	cp = (u_char *)(ip + 1);
515	dp = (u_char *)(jp + 1);
516	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
517	for (; cnt > 0; cnt -= optlen, cp += optlen) {
518		opt = cp[0];
519		if (opt == IPOPT_EOL)
520			break;
521		if (opt == IPOPT_NOP) {
522			/* Preserve for IP mcast tunnel's LSRR alignment. */
523			*dp++ = IPOPT_NOP;
524			optlen = 1;
525			continue;
526		} else
527			optlen = cp[IPOPT_OLEN];
528		/* bogus lengths should have been caught by ip_dooptions */
529		if (optlen > cnt)
530			optlen = cnt;
531		if (IPOPT_COPIED(opt)) {
532			(void)memcpy(dp, cp, (unsigned)optlen);
533			dp += optlen;
534		}
535	}
536	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
537		*dp++ = IPOPT_EOL;
538	return (optlen);
539}
540
541/*
542 * IP socket option processing.
543 */
544int
545ip_ctloutput(op, so, level, optname, mp)
546	int op;
547	struct socket *so;
548	int level, optname;
549	struct mbuf **mp;
550{
551	register struct inpcb *inp = sotoinpcb(so);
552	register struct mbuf *m = *mp;
553	register int optval = 0;
554	int error = 0;
555
556	if (level != IPPROTO_IP) {
557		error = EINVAL;
558		if (op == PRCO_SETOPT && *mp)
559			(void) m_free(*mp);
560	} else switch (op) {
561
562	case PRCO_SETOPT:
563		switch (optname) {
564		case IP_OPTIONS:
565#ifdef notyet
566		case IP_RETOPTS:
567			return (ip_pcbopts(optname, &inp->inp_options, m));
568#else
569			return (ip_pcbopts(&inp->inp_options, m));
570#endif
571
572		case IP_TOS:
573		case IP_TTL:
574		case IP_RECVOPTS:
575		case IP_RECVRETOPTS:
576		case IP_RECVDSTADDR:
577			if (m == 0 || m->m_len != sizeof(int))
578				error = EINVAL;
579			else {
580				optval = *mtod(m, int *);
581				switch (optname) {
582
583				case IP_TOS:
584					inp->inp_ip.ip_tos = optval;
585					break;
586
587				case IP_TTL:
588					inp->inp_ip.ip_ttl = optval;
589					break;
590#define	OPTSET(bit) \
591	if (optval) \
592		inp->inp_flags |= bit; \
593	else \
594		inp->inp_flags &= ~bit;
595
596				case IP_RECVOPTS:
597					OPTSET(INP_RECVOPTS);
598					break;
599
600				case IP_RECVRETOPTS:
601					OPTSET(INP_RECVRETOPTS);
602					break;
603
604				case IP_RECVDSTADDR:
605					OPTSET(INP_RECVDSTADDR);
606					break;
607				}
608			}
609			break;
610#undef OPTSET
611
612		case IP_MULTICAST_IF:
613		case IP_MULTICAST_VIF:
614		case IP_MULTICAST_TTL:
615		case IP_MULTICAST_LOOP:
616		case IP_ADD_MEMBERSHIP:
617		case IP_DROP_MEMBERSHIP:
618			error = ip_setmoptions(optname, &inp->inp_moptions, m);
619			break;
620
621		default:
622			error = ENOPROTOOPT;
623			break;
624		}
625		if (m)
626			(void)m_free(m);
627		break;
628
629	case PRCO_GETOPT:
630		switch (optname) {
631		case IP_OPTIONS:
632		case IP_RETOPTS:
633			*mp = m = m_get(M_WAIT, MT_SOOPTS);
634			if (inp->inp_options) {
635				m->m_len = inp->inp_options->m_len;
636				(void)memcpy(mtod(m, void *),
637				    mtod(inp->inp_options, void *), (unsigned)m->m_len);
638			} else
639				m->m_len = 0;
640			break;
641
642		case IP_TOS:
643		case IP_TTL:
644		case IP_RECVOPTS:
645		case IP_RECVRETOPTS:
646		case IP_RECVDSTADDR:
647			*mp = m = m_get(M_WAIT, MT_SOOPTS);
648			m->m_len = sizeof(int);
649			switch (optname) {
650
651			case IP_TOS:
652				optval = inp->inp_ip.ip_tos;
653				break;
654
655			case IP_TTL:
656				optval = inp->inp_ip.ip_ttl;
657				break;
658
659#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
660
661			case IP_RECVOPTS:
662				optval = OPTBIT(INP_RECVOPTS);
663				break;
664
665			case IP_RECVRETOPTS:
666				optval = OPTBIT(INP_RECVRETOPTS);
667				break;
668
669			case IP_RECVDSTADDR:
670				optval = OPTBIT(INP_RECVDSTADDR);
671				break;
672			}
673			*mtod(m, int *) = optval;
674			break;
675
676		case IP_MULTICAST_IF:
677		case IP_MULTICAST_VIF:
678		case IP_MULTICAST_TTL:
679		case IP_MULTICAST_LOOP:
680		case IP_ADD_MEMBERSHIP:
681		case IP_DROP_MEMBERSHIP:
682			error = ip_getmoptions(optname, inp->inp_moptions, mp);
683			break;
684
685		default:
686			error = ENOPROTOOPT;
687			break;
688		}
689		break;
690	}
691	return (error);
692}
693
694/*
695 * Set up IP options in pcb for insertion in output packets.
696 * Store in mbuf with pointer in pcbopt, adding pseudo-option
697 * with destination address if source routed.
698 */
699static int
700#ifdef notyet
701ip_pcbopts(optname, pcbopt, m)
702	int optname;
703#else
704ip_pcbopts(pcbopt, m)
705#endif
706	struct mbuf **pcbopt;
707	register struct mbuf *m;
708{
709	register cnt, optlen;
710	register u_char *cp;
711	u_char opt;
712
713	/* turn off any old options */
714	if (*pcbopt)
715		(void)m_free(*pcbopt);
716	*pcbopt = 0;
717	if (m == (struct mbuf *)0 || m->m_len == 0) {
718		/*
719		 * Only turning off any previous options.
720		 */
721		if (m)
722			(void)m_free(m);
723		return (0);
724	}
725
726#ifndef	vax
727	if (m->m_len % sizeof(long))
728		goto bad;
729#endif
730	/*
731	 * IP first-hop destination address will be stored before
732	 * actual options; move other options back
733	 * and clear it when none present.
734	 */
735	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
736		goto bad;
737	cnt = m->m_len;
738	m->m_len += sizeof(struct in_addr);
739	cp = mtod(m, u_char *) + sizeof(struct in_addr);
740	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
741	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
742
743	for (; cnt > 0; cnt -= optlen, cp += optlen) {
744		opt = cp[IPOPT_OPTVAL];
745		if (opt == IPOPT_EOL)
746			break;
747		if (opt == IPOPT_NOP)
748			optlen = 1;
749		else {
750			optlen = cp[IPOPT_OLEN];
751			if (optlen <= IPOPT_OLEN || optlen > cnt)
752				goto bad;
753		}
754		switch (opt) {
755
756		default:
757			break;
758
759		case IPOPT_LSRR:
760		case IPOPT_SSRR:
761			/*
762			 * user process specifies route as:
763			 *	->A->B->C->D
764			 * D must be our final destination (but we can't
765			 * check that since we may not have connected yet).
766			 * A is first hop destination, which doesn't appear in
767			 * actual IP option, but is stored before the options.
768			 */
769			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
770				goto bad;
771			m->m_len -= sizeof(struct in_addr);
772			cnt -= sizeof(struct in_addr);
773			optlen -= sizeof(struct in_addr);
774			cp[IPOPT_OLEN] = optlen;
775			/*
776			 * Move first hop before start of options.
777			 */
778			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
779			    sizeof(struct in_addr));
780			/*
781			 * Then copy rest of options back
782			 * to close up the deleted entry.
783			 */
784			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
785			    sizeof(struct in_addr)),
786			    (caddr_t)&cp[IPOPT_OFFSET+1],
787			    (unsigned)cnt + sizeof(struct in_addr));
788			break;
789		}
790	}
791	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
792		goto bad;
793	*pcbopt = m;
794	return (0);
795
796bad:
797	(void)m_free(m);
798	return (EINVAL);
799}
800
801/*
802 * Set the IP multicast options in response to user setsockopt().
803 */
804static int
805ip_setmoptions(optname, imop, m)
806	int optname;
807	struct ip_moptions **imop;
808	struct mbuf *m;
809{
810	register int error = 0;
811	u_char loop;
812	register int i;
813	struct in_addr addr;
814	register struct ip_mreq *mreq;
815	register struct ifnet *ifp;
816	register struct ip_moptions *imo = *imop;
817	struct route ro;
818	register struct sockaddr_in *dst;
819	int s;
820
821	if (imo == NULL) {
822		/*
823		 * No multicast option buffer attached to the pcb;
824		 * allocate one and initialize to default values.
825		 */
826		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
827		    M_WAITOK);
828
829		if (imo == NULL)
830			return (ENOBUFS);
831		*imop = imo;
832		imo->imo_multicast_ifp = NULL;
833		imo->imo_multicast_vif = -1;
834		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
835		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
836		imo->imo_num_memberships = 0;
837	}
838
839	switch (optname) {
840	/* store an index number for the vif you wanna use in the send */
841	case IP_MULTICAST_VIF:
842		if (!legal_vif_num) {
843			error = EOPNOTSUPP;
844			break;
845		}
846		if (m == NULL || m->m_len != sizeof(int)) {
847			error = EINVAL;
848			break;
849		}
850		i = *(mtod(m, int *));
851		if (!legal_vif_num(i) && (i != -1)) {
852			error = EINVAL;
853			break;
854		}
855		imo->imo_multicast_vif = i;
856		break;
857
858	case IP_MULTICAST_IF:
859		/*
860		 * Select the interface for outgoing multicast packets.
861		 */
862		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
863			error = EINVAL;
864			break;
865		}
866		addr = *(mtod(m, struct in_addr *));
867		/*
868		 * INADDR_ANY is used to remove a previous selection.
869		 * When no interface is selected, a default one is
870		 * chosen every time a multicast packet is sent.
871		 */
872		if (addr.s_addr == INADDR_ANY) {
873			imo->imo_multicast_ifp = NULL;
874			break;
875		}
876		/*
877		 * The selected interface is identified by its local
878		 * IP address.  Find the interface and confirm that
879		 * it supports multicasting.
880		 */
881		s = splimp();
882		INADDR_TO_IFP(addr, ifp);
883		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
884			error = EADDRNOTAVAIL;
885			break;
886		}
887		imo->imo_multicast_ifp = ifp;
888		splx(s);
889		break;
890
891	case IP_MULTICAST_TTL:
892		/*
893		 * Set the IP time-to-live for outgoing multicast packets.
894		 */
895		if (m == NULL || m->m_len != 1) {
896			error = EINVAL;
897			break;
898		}
899		imo->imo_multicast_ttl = *(mtod(m, u_char *));
900		break;
901
902	case IP_MULTICAST_LOOP:
903		/*
904		 * Set the loopback flag for outgoing multicast packets.
905		 * Must be zero or one.
906		 */
907		if (m == NULL || m->m_len != 1 ||
908		   (loop = *(mtod(m, u_char *))) > 1) {
909			error = EINVAL;
910			break;
911		}
912		imo->imo_multicast_loop = loop;
913		break;
914
915	case IP_ADD_MEMBERSHIP:
916		/*
917		 * Add a multicast group membership.
918		 * Group must be a valid IP multicast address.
919		 */
920		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
921			error = EINVAL;
922			break;
923		}
924		mreq = mtod(m, struct ip_mreq *);
925		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
926			error = EINVAL;
927			break;
928		}
929		s = splimp();
930		/*
931		 * If no interface address was provided, use the interface of
932		 * the route to the given multicast address.
933		 */
934		if (mreq->imr_interface.s_addr == INADDR_ANY) {
935			bzero((caddr_t)&ro, sizeof(ro));
936			dst = (struct sockaddr_in *)&ro.ro_dst;
937			dst->sin_len = sizeof(*dst);
938			dst->sin_family = AF_INET;
939			dst->sin_addr = mreq->imr_multiaddr;
940			rtalloc(&ro);
941			if (ro.ro_rt == NULL) {
942				error = EADDRNOTAVAIL;
943				splx(s);
944				break;
945			}
946			ifp = ro.ro_rt->rt_ifp;
947			rtfree(ro.ro_rt);
948		}
949		else {
950			INADDR_TO_IFP(mreq->imr_interface, ifp);
951		}
952
953		/*
954		 * See if we found an interface, and confirm that it
955		 * supports multicast.
956		 */
957		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
958			error = EADDRNOTAVAIL;
959			splx(s);
960			break;
961		}
962		/*
963		 * See if the membership already exists or if all the
964		 * membership slots are full.
965		 */
966		for (i = 0; i < imo->imo_num_memberships; ++i) {
967			if (imo->imo_membership[i]->inm_ifp == ifp &&
968			    imo->imo_membership[i]->inm_addr.s_addr
969						== mreq->imr_multiaddr.s_addr)
970				break;
971		}
972		if (i < imo->imo_num_memberships) {
973			error = EADDRINUSE;
974			splx(s);
975			break;
976		}
977		if (i == IP_MAX_MEMBERSHIPS) {
978			error = ETOOMANYREFS;
979			splx(s);
980			break;
981		}
982		/*
983		 * Everything looks good; add a new record to the multicast
984		 * address list for the given interface.
985		 */
986		if ((imo->imo_membership[i] =
987		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
988			error = ENOBUFS;
989			splx(s);
990			break;
991		}
992		++imo->imo_num_memberships;
993		splx(s);
994		break;
995
996	case IP_DROP_MEMBERSHIP:
997		/*
998		 * Drop a multicast group membership.
999		 * Group must be a valid IP multicast address.
1000		 */
1001		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1002			error = EINVAL;
1003			break;
1004		}
1005		mreq = mtod(m, struct ip_mreq *);
1006		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1007			error = EINVAL;
1008			break;
1009		}
1010
1011		s = splimp();
1012		/*
1013		 * If an interface address was specified, get a pointer
1014		 * to its ifnet structure.
1015		 */
1016		if (mreq->imr_interface.s_addr == INADDR_ANY)
1017			ifp = NULL;
1018		else {
1019			INADDR_TO_IFP(mreq->imr_interface, ifp);
1020			if (ifp == NULL) {
1021				error = EADDRNOTAVAIL;
1022				splx(s);
1023				break;
1024			}
1025		}
1026		/*
1027		 * Find the membership in the membership array.
1028		 */
1029		for (i = 0; i < imo->imo_num_memberships; ++i) {
1030			if ((ifp == NULL ||
1031			     imo->imo_membership[i]->inm_ifp == ifp) &&
1032			     imo->imo_membership[i]->inm_addr.s_addr ==
1033			     mreq->imr_multiaddr.s_addr)
1034				break;
1035		}
1036		if (i == imo->imo_num_memberships) {
1037			error = EADDRNOTAVAIL;
1038			splx(s);
1039			break;
1040		}
1041		/*
1042		 * Give up the multicast address record to which the
1043		 * membership points.
1044		 */
1045		in_delmulti(imo->imo_membership[i]);
1046		/*
1047		 * Remove the gap in the membership array.
1048		 */
1049		for (++i; i < imo->imo_num_memberships; ++i)
1050			imo->imo_membership[i-1] = imo->imo_membership[i];
1051		--imo->imo_num_memberships;
1052		splx(s);
1053		break;
1054
1055	default:
1056		error = EOPNOTSUPP;
1057		break;
1058	}
1059
1060	/*
1061	 * If all options have default values, no need to keep the mbuf.
1062	 */
1063	if (imo->imo_multicast_ifp == NULL &&
1064	    imo->imo_multicast_vif == -1 &&
1065	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1066	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1067	    imo->imo_num_memberships == 0) {
1068		free(*imop, M_IPMOPTS);
1069		*imop = NULL;
1070	}
1071
1072	return (error);
1073}
1074
1075/*
1076 * Return the IP multicast options in response to user getsockopt().
1077 */
1078static int
1079ip_getmoptions(optname, imo, mp)
1080	int optname;
1081	register struct ip_moptions *imo;
1082	register struct mbuf **mp;
1083{
1084	u_char *ttl;
1085	u_char *loop;
1086	struct in_addr *addr;
1087	struct in_ifaddr *ia;
1088
1089	*mp = m_get(M_WAIT, MT_SOOPTS);
1090
1091	switch (optname) {
1092
1093	case IP_MULTICAST_VIF:
1094		if (imo != NULL)
1095			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1096		else
1097			*(mtod(*mp, int *)) = -1;
1098		(*mp)->m_len = sizeof(int);
1099		return(0);
1100
1101	case IP_MULTICAST_IF:
1102		addr = mtod(*mp, struct in_addr *);
1103		(*mp)->m_len = sizeof(struct in_addr);
1104		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1105			addr->s_addr = INADDR_ANY;
1106		else {
1107			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1108			addr->s_addr = (ia == NULL) ? INADDR_ANY
1109					: IA_SIN(ia)->sin_addr.s_addr;
1110		}
1111		return (0);
1112
1113	case IP_MULTICAST_TTL:
1114		ttl = mtod(*mp, u_char *);
1115		(*mp)->m_len = 1;
1116		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1117				     : imo->imo_multicast_ttl;
1118		return (0);
1119
1120	case IP_MULTICAST_LOOP:
1121		loop = mtod(*mp, u_char *);
1122		(*mp)->m_len = 1;
1123		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1124				      : imo->imo_multicast_loop;
1125		return (0);
1126
1127	default:
1128		return (EOPNOTSUPP);
1129	}
1130}
1131
1132/*
1133 * Discard the IP multicast options.
1134 */
1135void
1136ip_freemoptions(imo)
1137	register struct ip_moptions *imo;
1138{
1139	register int i;
1140
1141	if (imo != NULL) {
1142		for (i = 0; i < imo->imo_num_memberships; ++i)
1143			in_delmulti(imo->imo_membership[i]);
1144		free(imo, M_IPMOPTS);
1145	}
1146}
1147
1148/*
1149 * Routine called from ip_output() to loop back a copy of an IP multicast
1150 * packet to the input queue of a specified interface.  Note that this
1151 * calls the output routine of the loopback "driver", but with an interface
1152 * pointer that might NOT be a loopback interface -- evil, but easier than
1153 * replicating that code here.
1154 */
1155static void
1156ip_mloopback(ifp, m, dst)
1157	struct ifnet *ifp;
1158	register struct mbuf *m;
1159	register struct sockaddr_in *dst;
1160{
1161	register struct ip *ip;
1162	struct mbuf *copym;
1163
1164	copym = m_copy(m, 0, M_COPYALL);
1165	if (copym != NULL) {
1166		/*
1167		 * We don't bother to fragment if the IP length is greater
1168		 * than the interface's MTU.  Can this possibly matter?
1169		 */
1170		ip = mtod(copym, struct ip *);
1171		ip->ip_len = htons((u_short)ip->ip_len);
1172		ip->ip_off = htons((u_short)ip->ip_off);
1173		ip->ip_sum = 0;
1174		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1175		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1176	}
1177}
1178