ip_output.c revision 15026
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 *	$Id: ip_output.c,v 1.33 1996/03/26 18:56:51 fenner Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/queue.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/errno.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46
47#include <net/if.h>
48#include <net/route.h>
49
50#include <netinet/in.h>
51#include <netinet/in_systm.h>
52#include <netinet/ip.h>
53#include <netinet/in_pcb.h>
54#include <netinet/in_var.h>
55#include <netinet/ip_var.h>
56
57#ifdef vax
58#include <machine/mtpr.h>
59#endif
60
61u_short ip_id;
62
63static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
64static void	ip_mloopback
65	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
66static int	ip_getmoptions
67	__P((int, struct ip_moptions *, struct mbuf **));
68static int	ip_optcopy __P((struct ip *, struct ip *));
69static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
70static int	ip_setmoptions
71	__P((int, struct ip_moptions **, struct mbuf *));
72
73/*
74 * IP output.  The packet in mbuf chain m contains a skeletal IP
75 * header (with len, off, ttl, proto, tos, src, dst).
76 * The mbuf chain containing the packet will be freed.
77 * The mbuf opt, if present, will not be freed.
78 */
79int
80ip_output(m0, opt, ro, flags, imo)
81	struct mbuf *m0;
82	struct mbuf *opt;
83	struct route *ro;
84	int flags;
85	struct ip_moptions *imo;
86{
87	struct ip *ip, *mhip;
88	struct ifnet *ifp;
89	struct mbuf *m = m0;
90	int hlen = sizeof (struct ip);
91	int len, off, error = 0;
92	/*
93	 * It might seem obvious at first glance that one could easily
94	 * make a one-behind cache out of this by simply making `iproute'
95	 * static and eliminating the bzero() below.  However, this turns
96	 * out not to work, for two reasons:
97	 *
98	 * 1) This routine needs to be reentrant.  It can be called
99	 * recursively from encapsulating network interfaces, and it
100	 * is always called recursively from ip_mforward().
101	 *
102	 * 2) You turn out not to gain much.  There is already a one-
103	 * behind cache implemented for the specific case of forwarding,
104	 * and sends on a connected socket will use a route associated
105	 * with the PCB.  The only cases left are sends on unconnected
106	 * and raw sockets, and if these cases are really significant,
107	 * something is seriously wrong.
108	 */
109	struct route iproute;
110	struct sockaddr_in *dst;
111	struct in_ifaddr *ia;
112
113#ifdef	DIAGNOSTIC
114	if ((m->m_flags & M_PKTHDR) == 0)
115		panic("ip_output no HDR");
116#endif
117	if (opt) {
118		m = ip_insertoptions(m, opt, &len);
119		hlen = len;
120	}
121	ip = mtod(m, struct ip *);
122	/*
123	 * Fill in IP header.
124	 */
125	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
126		ip->ip_v = IPVERSION;
127		ip->ip_off &= IP_DF;
128		ip->ip_id = htons(ip_id++);
129		ip->ip_hl = hlen >> 2;
130		ipstat.ips_localout++;
131	} else {
132		hlen = ip->ip_hl << 2;
133	}
134	/*
135	 * Route packet.
136	 */
137	if (ro == 0) {
138		ro = &iproute;
139		bzero((caddr_t)ro, sizeof (*ro));
140	}
141	dst = (struct sockaddr_in *)&ro->ro_dst;
142	/*
143	 * If there is a cached route,
144	 * check that it is to the same destination
145	 * and is still up.  If not, free it and try again.
146	 */
147	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
148	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
149		RTFREE(ro->ro_rt);
150		ro->ro_rt = (struct rtentry *)0;
151	}
152	if (ro->ro_rt == 0) {
153		dst->sin_family = AF_INET;
154		dst->sin_len = sizeof(*dst);
155		dst->sin_addr = ip->ip_dst;
156	}
157	/*
158	 * If routing to interface only,
159	 * short circuit routing lookup.
160	 */
161#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
162#define sintosa(sin)	((struct sockaddr *)(sin))
163	if (flags & IP_ROUTETOIF) {
164		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
165		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
166			ipstat.ips_noroute++;
167			error = ENETUNREACH;
168			goto bad;
169		}
170		ifp = ia->ia_ifp;
171		ip->ip_ttl = 1;
172	} else {
173		/*
174		 * If this is the case, we probably don't want to allocate
175		 * a protocol-cloned route since we didn't get one from the
176		 * ULP.  This lets TCP do its thing, while not burdening
177		 * forwarding or ICMP with the overhead of cloning a route.
178		 * Of course, we still want to do any cloning requested by
179		 * the link layer, as this is probably required in all cases
180		 * for correct operation (as it is for ARP).
181		 */
182		if (ro->ro_rt == 0)
183			rtalloc_ign(ro, RTF_PRCLONING);
184		if (ro->ro_rt == 0) {
185			ipstat.ips_noroute++;
186			error = EHOSTUNREACH;
187			goto bad;
188		}
189		ia = ifatoia(ro->ro_rt->rt_ifa);
190		ifp = ro->ro_rt->rt_ifp;
191		ro->ro_rt->rt_use++;
192		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
193			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
194	}
195	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
196		struct in_multi *inm;
197
198		m->m_flags |= M_MCAST;
199		/*
200		 * IP destination address is multicast.  Make sure "dst"
201		 * still points to the address in "ro".  (It may have been
202		 * changed to point to a gateway address, above.)
203		 */
204		dst = (struct sockaddr_in *)&ro->ro_dst;
205		/*
206		 * See if the caller provided any multicast options
207		 */
208		if (imo != NULL) {
209			ip->ip_ttl = imo->imo_multicast_ttl;
210			if (imo->imo_multicast_ifp != NULL)
211				ifp = imo->imo_multicast_ifp;
212			if (imo->imo_multicast_vif != -1)
213				ip->ip_src.s_addr =
214				    ip_mcast_src(imo->imo_multicast_vif);
215		} else
216			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
217		/*
218		 * Confirm that the outgoing interface supports multicast.
219		 */
220		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
221			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
222				ipstat.ips_noroute++;
223				error = ENETUNREACH;
224				goto bad;
225			}
226		}
227		/*
228		 * If source address not specified yet, use address
229		 * of outgoing interface.
230		 */
231		if (ip->ip_src.s_addr == INADDR_ANY) {
232			register struct in_ifaddr *ia;
233
234			for (ia = in_ifaddr; ia; ia = ia->ia_next)
235				if (ia->ia_ifp == ifp) {
236					ip->ip_src = IA_SIN(ia)->sin_addr;
237					break;
238				}
239		}
240
241		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
242		if (inm != NULL &&
243		   (imo == NULL || imo->imo_multicast_loop)) {
244			/*
245			 * If we belong to the destination multicast group
246			 * on the outgoing interface, and the caller did not
247			 * forbid loopback, loop back a copy.
248			 */
249			ip_mloopback(ifp, m, dst);
250		}
251		else {
252			/*
253			 * If we are acting as a multicast router, perform
254			 * multicast forwarding as if the packet had just
255			 * arrived on the interface to which we are about
256			 * to send.  The multicast forwarding function
257			 * recursively calls this function, using the
258			 * IP_FORWARDING flag to prevent infinite recursion.
259			 *
260			 * Multicasts that are looped back by ip_mloopback(),
261			 * above, will be forwarded by the ip_input() routine,
262			 * if necessary.
263			 */
264			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
265				/*
266				 * Check if rsvp daemon is running. If not, don't
267				 * set ip_moptions. This ensures that the packet
268				 * is multicast and not just sent down one link
269				 * as prescribed by rsvpd.
270				 */
271				if (!rsvp_on)
272				  imo = NULL;
273				if (ip_mforward(ip, ifp, m, imo) != 0) {
274					m_freem(m);
275					goto done;
276				}
277			}
278		}
279
280		/*
281		 * Multicasts with a time-to-live of zero may be looped-
282		 * back, above, but must not be transmitted on a network.
283		 * Also, multicasts addressed to the loopback interface
284		 * are not sent -- the above call to ip_mloopback() will
285		 * loop back a copy if this host actually belongs to the
286		 * destination group on the loopback interface.
287		 */
288		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
289			m_freem(m);
290			goto done;
291		}
292
293		goto sendit;
294	}
295#ifndef notdef
296	/*
297	 * If source address not specified yet, use address
298	 * of outgoing interface.
299	 */
300	if (ip->ip_src.s_addr == INADDR_ANY)
301		ip->ip_src = IA_SIN(ia)->sin_addr;
302#endif
303	/*
304	 * Verify that we have any chance at all of being able to queue
305	 *      the packet or packet fragments
306	 */
307	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
308		ifp->if_snd.ifq_maxlen) {
309			error = ENOBUFS;
310			goto bad;
311	}
312
313	/*
314	 * Look for broadcast address and
315	 * and verify user is allowed to send
316	 * such a packet.
317	 */
318	if (in_broadcast(dst->sin_addr, ifp)) {
319		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
320			error = EADDRNOTAVAIL;
321			goto bad;
322		}
323		if ((flags & IP_ALLOWBROADCAST) == 0) {
324			error = EACCES;
325			goto bad;
326		}
327		/* don't allow broadcast messages to be fragmented */
328		if ((u_short)ip->ip_len > ifp->if_mtu) {
329			error = EMSGSIZE;
330			goto bad;
331		}
332		m->m_flags |= M_BCAST;
333	} else
334		m->m_flags &= ~M_BCAST;
335
336sendit:
337	/*
338	 * Check with the firewall...
339	 */
340	if (ip_fw_chk_ptr && !(*ip_fw_chk_ptr)(&ip, hlen, ifp, 1, &m)) {
341		error = EACCES;
342		goto done;
343	}
344
345	/*
346	 * If small enough for interface, can just send directly.
347	 */
348	if ((u_short)ip->ip_len <= ifp->if_mtu) {
349		ip->ip_len = htons((u_short)ip->ip_len);
350		ip->ip_off = htons((u_short)ip->ip_off);
351		ip->ip_sum = 0;
352		ip->ip_sum = in_cksum(m, hlen);
353		error = (*ifp->if_output)(ifp, m,
354				(struct sockaddr *)dst, ro->ro_rt);
355		goto done;
356	}
357	/*
358	 * Too large for interface; fragment if possible.
359	 * Must be able to put at least 8 bytes per fragment.
360	 */
361	if (ip->ip_off & IP_DF) {
362		error = EMSGSIZE;
363#if 1
364		/*
365		 * This case can happen if the user changed the MTU
366		 * of an interface after enabling IP on it.  Because
367		 * most netifs don't keep track of routes pointing to
368		 * them, there is no way for one to update all its
369		 * routes when the MTU is changed.
370		 */
371		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
372		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
373		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
374			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
375		}
376#endif
377		ipstat.ips_cantfrag++;
378		goto bad;
379	}
380	len = (ifp->if_mtu - hlen) &~ 7;
381	if (len < 8) {
382		error = EMSGSIZE;
383		goto bad;
384	}
385
386    {
387	int mhlen, firstlen = len;
388	struct mbuf **mnext = &m->m_nextpkt;
389
390	/*
391	 * Loop through length of segment after first fragment,
392	 * make new header and copy data of each part and link onto chain.
393	 */
394	m0 = m;
395	mhlen = sizeof (struct ip);
396	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
397		MGETHDR(m, M_DONTWAIT, MT_HEADER);
398		if (m == 0) {
399			error = ENOBUFS;
400			ipstat.ips_odropped++;
401			goto sendorfree;
402		}
403		m->m_data += max_linkhdr;
404		mhip = mtod(m, struct ip *);
405		*mhip = *ip;
406		if (hlen > sizeof (struct ip)) {
407			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
408			mhip->ip_hl = mhlen >> 2;
409		}
410		m->m_len = mhlen;
411		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
412		if (ip->ip_off & IP_MF)
413			mhip->ip_off |= IP_MF;
414		if (off + len >= (u_short)ip->ip_len)
415			len = (u_short)ip->ip_len - off;
416		else
417			mhip->ip_off |= IP_MF;
418		mhip->ip_len = htons((u_short)(len + mhlen));
419		m->m_next = m_copy(m0, off, len);
420		if (m->m_next == 0) {
421			(void) m_free(m);
422			error = ENOBUFS;	/* ??? */
423			ipstat.ips_odropped++;
424			goto sendorfree;
425		}
426		m->m_pkthdr.len = mhlen + len;
427		m->m_pkthdr.rcvif = (struct ifnet *)0;
428		mhip->ip_off = htons((u_short)mhip->ip_off);
429		mhip->ip_sum = 0;
430		mhip->ip_sum = in_cksum(m, mhlen);
431		*mnext = m;
432		mnext = &m->m_nextpkt;
433		ipstat.ips_ofragments++;
434	}
435	/*
436	 * Update first fragment by trimming what's been copied out
437	 * and updating header, then send each fragment (in order).
438	 */
439	m = m0;
440	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
441	m->m_pkthdr.len = hlen + firstlen;
442	ip->ip_len = htons((u_short)m->m_pkthdr.len);
443	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
444	ip->ip_sum = 0;
445	ip->ip_sum = in_cksum(m, hlen);
446sendorfree:
447	for (m = m0; m; m = m0) {
448		m0 = m->m_nextpkt;
449		m->m_nextpkt = 0;
450		if (error == 0)
451			error = (*ifp->if_output)(ifp, m,
452			    (struct sockaddr *)dst, ro->ro_rt);
453		else
454			m_freem(m);
455	}
456
457	if (error == 0)
458		ipstat.ips_fragmented++;
459    }
460done:
461	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
462		RTFREE(ro->ro_rt);
463
464	return (error);
465bad:
466	m_freem(m0);
467	goto done;
468}
469
470/*
471 * Insert IP options into preformed packet.
472 * Adjust IP destination as required for IP source routing,
473 * as indicated by a non-zero in_addr at the start of the options.
474 *
475 * XXX This routine assumes that the packet has no options in place.
476 */
477static struct mbuf *
478ip_insertoptions(m, opt, phlen)
479	register struct mbuf *m;
480	struct mbuf *opt;
481	int *phlen;
482{
483	register struct ipoption *p = mtod(opt, struct ipoption *);
484	struct mbuf *n;
485	register struct ip *ip = mtod(m, struct ip *);
486	unsigned optlen;
487
488	optlen = opt->m_len - sizeof(p->ipopt_dst);
489	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
490		return (m);		/* XXX should fail */
491	if (p->ipopt_dst.s_addr)
492		ip->ip_dst = p->ipopt_dst;
493	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
494		MGETHDR(n, M_DONTWAIT, MT_HEADER);
495		if (n == 0)
496			return (m);
497		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
498		m->m_len -= sizeof(struct ip);
499		m->m_data += sizeof(struct ip);
500		n->m_next = m;
501		m = n;
502		m->m_len = optlen + sizeof(struct ip);
503		m->m_data += max_linkhdr;
504		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
505	} else {
506		m->m_data -= optlen;
507		m->m_len += optlen;
508		m->m_pkthdr.len += optlen;
509		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
510	}
511	ip = mtod(m, struct ip *);
512	(void)memcpy(ip + 1, p->ipopt_list, (unsigned)optlen);
513	*phlen = sizeof(struct ip) + optlen;
514	ip->ip_hl = *phlen >> 2;
515	ip->ip_len += optlen;
516	return (m);
517}
518
519/*
520 * Copy options from ip to jp,
521 * omitting those not copied during fragmentation.
522 */
523static int
524ip_optcopy(ip, jp)
525	struct ip *ip, *jp;
526{
527	register u_char *cp, *dp;
528	int opt, optlen, cnt;
529
530	cp = (u_char *)(ip + 1);
531	dp = (u_char *)(jp + 1);
532	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
533	for (; cnt > 0; cnt -= optlen, cp += optlen) {
534		opt = cp[0];
535		if (opt == IPOPT_EOL)
536			break;
537		if (opt == IPOPT_NOP) {
538			/* Preserve for IP mcast tunnel's LSRR alignment. */
539			*dp++ = IPOPT_NOP;
540			optlen = 1;
541			continue;
542		} else
543			optlen = cp[IPOPT_OLEN];
544		/* bogus lengths should have been caught by ip_dooptions */
545		if (optlen > cnt)
546			optlen = cnt;
547		if (IPOPT_COPIED(opt)) {
548			(void)memcpy(dp, cp, (unsigned)optlen);
549			dp += optlen;
550		}
551	}
552	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
553		*dp++ = IPOPT_EOL;
554	return (optlen);
555}
556
557/*
558 * IP socket option processing.
559 */
560int
561ip_ctloutput(op, so, level, optname, mp)
562	int op;
563	struct socket *so;
564	int level, optname;
565	struct mbuf **mp;
566{
567	register struct inpcb *inp = sotoinpcb(so);
568	register struct mbuf *m = *mp;
569	register int optval = 0;
570	int error = 0;
571
572	if (level != IPPROTO_IP) {
573		error = EINVAL;
574		if (op == PRCO_SETOPT && *mp)
575			(void) m_free(*mp);
576	} else switch (op) {
577
578	case PRCO_SETOPT:
579		switch (optname) {
580		case IP_OPTIONS:
581#ifdef notyet
582		case IP_RETOPTS:
583			return (ip_pcbopts(optname, &inp->inp_options, m));
584#else
585			return (ip_pcbopts(&inp->inp_options, m));
586#endif
587
588		case IP_TOS:
589		case IP_TTL:
590		case IP_RECVOPTS:
591		case IP_RECVRETOPTS:
592		case IP_RECVDSTADDR:
593			if (m == 0 || m->m_len != sizeof(int))
594				error = EINVAL;
595			else {
596				optval = *mtod(m, int *);
597				switch (optname) {
598
599				case IP_TOS:
600					inp->inp_ip.ip_tos = optval;
601					break;
602
603				case IP_TTL:
604					inp->inp_ip.ip_ttl = optval;
605					break;
606#define	OPTSET(bit) \
607	if (optval) \
608		inp->inp_flags |= bit; \
609	else \
610		inp->inp_flags &= ~bit;
611
612				case IP_RECVOPTS:
613					OPTSET(INP_RECVOPTS);
614					break;
615
616				case IP_RECVRETOPTS:
617					OPTSET(INP_RECVRETOPTS);
618					break;
619
620				case IP_RECVDSTADDR:
621					OPTSET(INP_RECVDSTADDR);
622					break;
623				}
624			}
625			break;
626#undef OPTSET
627
628		case IP_MULTICAST_IF:
629		case IP_MULTICAST_VIF:
630		case IP_MULTICAST_TTL:
631		case IP_MULTICAST_LOOP:
632		case IP_ADD_MEMBERSHIP:
633		case IP_DROP_MEMBERSHIP:
634			error = ip_setmoptions(optname, &inp->inp_moptions, m);
635			break;
636
637		case IP_PORTRANGE:
638			if (m == 0 || m->m_len != sizeof(int))
639				error = EINVAL;
640			else {
641				optval = *mtod(m, int *);
642
643				switch (optval) {
644
645				case IP_PORTRANGE_DEFAULT:
646					inp->inp_flags &= ~(INP_LOWPORT);
647					inp->inp_flags &= ~(INP_HIGHPORT);
648					break;
649
650				case IP_PORTRANGE_HIGH:
651					inp->inp_flags &= ~(INP_LOWPORT);
652					inp->inp_flags |= INP_HIGHPORT;
653					break;
654
655				case IP_PORTRANGE_LOW:
656					inp->inp_flags &= ~(INP_HIGHPORT);
657					inp->inp_flags |= INP_LOWPORT;
658					break;
659
660				default:
661					error = EINVAL;
662					break;
663				}
664			}
665
666		default:
667			error = ENOPROTOOPT;
668			break;
669		}
670		if (m)
671			(void)m_free(m);
672		break;
673
674	case PRCO_GETOPT:
675		switch (optname) {
676		case IP_OPTIONS:
677		case IP_RETOPTS:
678			*mp = m = m_get(M_WAIT, MT_SOOPTS);
679			if (inp->inp_options) {
680				m->m_len = inp->inp_options->m_len;
681				(void)memcpy(mtod(m, void *),
682				    mtod(inp->inp_options, void *), (unsigned)m->m_len);
683			} else
684				m->m_len = 0;
685			break;
686
687		case IP_TOS:
688		case IP_TTL:
689		case IP_RECVOPTS:
690		case IP_RECVRETOPTS:
691		case IP_RECVDSTADDR:
692			*mp = m = m_get(M_WAIT, MT_SOOPTS);
693			m->m_len = sizeof(int);
694			switch (optname) {
695
696			case IP_TOS:
697				optval = inp->inp_ip.ip_tos;
698				break;
699
700			case IP_TTL:
701				optval = inp->inp_ip.ip_ttl;
702				break;
703
704#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
705
706			case IP_RECVOPTS:
707				optval = OPTBIT(INP_RECVOPTS);
708				break;
709
710			case IP_RECVRETOPTS:
711				optval = OPTBIT(INP_RECVRETOPTS);
712				break;
713
714			case IP_RECVDSTADDR:
715				optval = OPTBIT(INP_RECVDSTADDR);
716				break;
717			}
718			*mtod(m, int *) = optval;
719			break;
720
721		case IP_MULTICAST_IF:
722		case IP_MULTICAST_VIF:
723		case IP_MULTICAST_TTL:
724		case IP_MULTICAST_LOOP:
725		case IP_ADD_MEMBERSHIP:
726		case IP_DROP_MEMBERSHIP:
727			error = ip_getmoptions(optname, inp->inp_moptions, mp);
728			break;
729
730		case IP_PORTRANGE:
731			*mp = m = m_get(M_WAIT, MT_SOOPTS);
732			m->m_len = sizeof(int);
733
734			if (inp->inp_flags & INP_HIGHPORT)
735				optval = IP_PORTRANGE_HIGH;
736			else if (inp->inp_flags & INP_LOWPORT)
737				optval = IP_PORTRANGE_LOW;
738			else
739				optval = 0;
740
741			*mtod(m, int *) = optval;
742			break;
743
744		default:
745			error = ENOPROTOOPT;
746			break;
747		}
748		break;
749	}
750	return (error);
751}
752
753/*
754 * Set up IP options in pcb for insertion in output packets.
755 * Store in mbuf with pointer in pcbopt, adding pseudo-option
756 * with destination address if source routed.
757 */
758static int
759#ifdef notyet
760ip_pcbopts(optname, pcbopt, m)
761	int optname;
762#else
763ip_pcbopts(pcbopt, m)
764#endif
765	struct mbuf **pcbopt;
766	register struct mbuf *m;
767{
768	register cnt, optlen;
769	register u_char *cp;
770	u_char opt;
771
772	/* turn off any old options */
773	if (*pcbopt)
774		(void)m_free(*pcbopt);
775	*pcbopt = 0;
776	if (m == (struct mbuf *)0 || m->m_len == 0) {
777		/*
778		 * Only turning off any previous options.
779		 */
780		if (m)
781			(void)m_free(m);
782		return (0);
783	}
784
785#ifndef	vax
786	if (m->m_len % sizeof(long))
787		goto bad;
788#endif
789	/*
790	 * IP first-hop destination address will be stored before
791	 * actual options; move other options back
792	 * and clear it when none present.
793	 */
794	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
795		goto bad;
796	cnt = m->m_len;
797	m->m_len += sizeof(struct in_addr);
798	cp = mtod(m, u_char *) + sizeof(struct in_addr);
799	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
800	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
801
802	for (; cnt > 0; cnt -= optlen, cp += optlen) {
803		opt = cp[IPOPT_OPTVAL];
804		if (opt == IPOPT_EOL)
805			break;
806		if (opt == IPOPT_NOP)
807			optlen = 1;
808		else {
809			optlen = cp[IPOPT_OLEN];
810			if (optlen <= IPOPT_OLEN || optlen > cnt)
811				goto bad;
812		}
813		switch (opt) {
814
815		default:
816			break;
817
818		case IPOPT_LSRR:
819		case IPOPT_SSRR:
820			/*
821			 * user process specifies route as:
822			 *	->A->B->C->D
823			 * D must be our final destination (but we can't
824			 * check that since we may not have connected yet).
825			 * A is first hop destination, which doesn't appear in
826			 * actual IP option, but is stored before the options.
827			 */
828			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
829				goto bad;
830			m->m_len -= sizeof(struct in_addr);
831			cnt -= sizeof(struct in_addr);
832			optlen -= sizeof(struct in_addr);
833			cp[IPOPT_OLEN] = optlen;
834			/*
835			 * Move first hop before start of options.
836			 */
837			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
838			    sizeof(struct in_addr));
839			/*
840			 * Then copy rest of options back
841			 * to close up the deleted entry.
842			 */
843			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
844			    sizeof(struct in_addr)),
845			    (caddr_t)&cp[IPOPT_OFFSET+1],
846			    (unsigned)cnt + sizeof(struct in_addr));
847			break;
848		}
849	}
850	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
851		goto bad;
852	*pcbopt = m;
853	return (0);
854
855bad:
856	(void)m_free(m);
857	return (EINVAL);
858}
859
860/*
861 * Set the IP multicast options in response to user setsockopt().
862 */
863static int
864ip_setmoptions(optname, imop, m)
865	int optname;
866	struct ip_moptions **imop;
867	struct mbuf *m;
868{
869	register int error = 0;
870	u_char loop;
871	register int i;
872	struct in_addr addr;
873	register struct ip_mreq *mreq;
874	register struct ifnet *ifp;
875	register struct ip_moptions *imo = *imop;
876	struct route ro;
877	register struct sockaddr_in *dst;
878	int s;
879
880	if (imo == NULL) {
881		/*
882		 * No multicast option buffer attached to the pcb;
883		 * allocate one and initialize to default values.
884		 */
885		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
886		    M_WAITOK);
887
888		if (imo == NULL)
889			return (ENOBUFS);
890		*imop = imo;
891		imo->imo_multicast_ifp = NULL;
892		imo->imo_multicast_vif = -1;
893		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
894		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
895		imo->imo_num_memberships = 0;
896	}
897
898	switch (optname) {
899	/* store an index number for the vif you wanna use in the send */
900	case IP_MULTICAST_VIF:
901		if (!legal_vif_num) {
902			error = EOPNOTSUPP;
903			break;
904		}
905		if (m == NULL || m->m_len != sizeof(int)) {
906			error = EINVAL;
907			break;
908		}
909		i = *(mtod(m, int *));
910		if (!legal_vif_num(i) && (i != -1)) {
911			error = EINVAL;
912			break;
913		}
914		imo->imo_multicast_vif = i;
915		break;
916
917	case IP_MULTICAST_IF:
918		/*
919		 * Select the interface for outgoing multicast packets.
920		 */
921		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
922			error = EINVAL;
923			break;
924		}
925		addr = *(mtod(m, struct in_addr *));
926		/*
927		 * INADDR_ANY is used to remove a previous selection.
928		 * When no interface is selected, a default one is
929		 * chosen every time a multicast packet is sent.
930		 */
931		if (addr.s_addr == INADDR_ANY) {
932			imo->imo_multicast_ifp = NULL;
933			break;
934		}
935		/*
936		 * The selected interface is identified by its local
937		 * IP address.  Find the interface and confirm that
938		 * it supports multicasting.
939		 */
940		s = splimp();
941		INADDR_TO_IFP(addr, ifp);
942		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
943			splx(s);
944			error = EADDRNOTAVAIL;
945			break;
946		}
947		imo->imo_multicast_ifp = ifp;
948		splx(s);
949		break;
950
951	case IP_MULTICAST_TTL:
952		/*
953		 * Set the IP time-to-live for outgoing multicast packets.
954		 */
955		if (m == NULL || m->m_len != 1) {
956			error = EINVAL;
957			break;
958		}
959		imo->imo_multicast_ttl = *(mtod(m, u_char *));
960		break;
961
962	case IP_MULTICAST_LOOP:
963		/*
964		 * Set the loopback flag for outgoing multicast packets.
965		 * Must be zero or one.
966		 */
967		if (m == NULL || m->m_len != 1 ||
968		   (loop = *(mtod(m, u_char *))) > 1) {
969			error = EINVAL;
970			break;
971		}
972		imo->imo_multicast_loop = loop;
973		break;
974
975	case IP_ADD_MEMBERSHIP:
976		/*
977		 * Add a multicast group membership.
978		 * Group must be a valid IP multicast address.
979		 */
980		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
981			error = EINVAL;
982			break;
983		}
984		mreq = mtod(m, struct ip_mreq *);
985		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
986			error = EINVAL;
987			break;
988		}
989		s = splimp();
990		/*
991		 * If no interface address was provided, use the interface of
992		 * the route to the given multicast address.
993		 */
994		if (mreq->imr_interface.s_addr == INADDR_ANY) {
995			bzero((caddr_t)&ro, sizeof(ro));
996			dst = (struct sockaddr_in *)&ro.ro_dst;
997			dst->sin_len = sizeof(*dst);
998			dst->sin_family = AF_INET;
999			dst->sin_addr = mreq->imr_multiaddr;
1000			rtalloc(&ro);
1001			if (ro.ro_rt == NULL) {
1002				error = EADDRNOTAVAIL;
1003				splx(s);
1004				break;
1005			}
1006			ifp = ro.ro_rt->rt_ifp;
1007			rtfree(ro.ro_rt);
1008		}
1009		else {
1010			INADDR_TO_IFP(mreq->imr_interface, ifp);
1011		}
1012
1013		/*
1014		 * See if we found an interface, and confirm that it
1015		 * supports multicast.
1016		 */
1017		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1018			error = EADDRNOTAVAIL;
1019			splx(s);
1020			break;
1021		}
1022		/*
1023		 * See if the membership already exists or if all the
1024		 * membership slots are full.
1025		 */
1026		for (i = 0; i < imo->imo_num_memberships; ++i) {
1027			if (imo->imo_membership[i]->inm_ifp == ifp &&
1028			    imo->imo_membership[i]->inm_addr.s_addr
1029						== mreq->imr_multiaddr.s_addr)
1030				break;
1031		}
1032		if (i < imo->imo_num_memberships) {
1033			error = EADDRINUSE;
1034			splx(s);
1035			break;
1036		}
1037		if (i == IP_MAX_MEMBERSHIPS) {
1038			error = ETOOMANYREFS;
1039			splx(s);
1040			break;
1041		}
1042		/*
1043		 * Everything looks good; add a new record to the multicast
1044		 * address list for the given interface.
1045		 */
1046		if ((imo->imo_membership[i] =
1047		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1048			error = ENOBUFS;
1049			splx(s);
1050			break;
1051		}
1052		++imo->imo_num_memberships;
1053		splx(s);
1054		break;
1055
1056	case IP_DROP_MEMBERSHIP:
1057		/*
1058		 * Drop a multicast group membership.
1059		 * Group must be a valid IP multicast address.
1060		 */
1061		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1062			error = EINVAL;
1063			break;
1064		}
1065		mreq = mtod(m, struct ip_mreq *);
1066		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1067			error = EINVAL;
1068			break;
1069		}
1070
1071		s = splimp();
1072		/*
1073		 * If an interface address was specified, get a pointer
1074		 * to its ifnet structure.
1075		 */
1076		if (mreq->imr_interface.s_addr == INADDR_ANY)
1077			ifp = NULL;
1078		else {
1079			INADDR_TO_IFP(mreq->imr_interface, ifp);
1080			if (ifp == NULL) {
1081				error = EADDRNOTAVAIL;
1082				splx(s);
1083				break;
1084			}
1085		}
1086		/*
1087		 * Find the membership in the membership array.
1088		 */
1089		for (i = 0; i < imo->imo_num_memberships; ++i) {
1090			if ((ifp == NULL ||
1091			     imo->imo_membership[i]->inm_ifp == ifp) &&
1092			     imo->imo_membership[i]->inm_addr.s_addr ==
1093			     mreq->imr_multiaddr.s_addr)
1094				break;
1095		}
1096		if (i == imo->imo_num_memberships) {
1097			error = EADDRNOTAVAIL;
1098			splx(s);
1099			break;
1100		}
1101		/*
1102		 * Give up the multicast address record to which the
1103		 * membership points.
1104		 */
1105		in_delmulti(imo->imo_membership[i]);
1106		/*
1107		 * Remove the gap in the membership array.
1108		 */
1109		for (++i; i < imo->imo_num_memberships; ++i)
1110			imo->imo_membership[i-1] = imo->imo_membership[i];
1111		--imo->imo_num_memberships;
1112		splx(s);
1113		break;
1114
1115	default:
1116		error = EOPNOTSUPP;
1117		break;
1118	}
1119
1120	/*
1121	 * If all options have default values, no need to keep the mbuf.
1122	 */
1123	if (imo->imo_multicast_ifp == NULL &&
1124	    imo->imo_multicast_vif == -1 &&
1125	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1126	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1127	    imo->imo_num_memberships == 0) {
1128		free(*imop, M_IPMOPTS);
1129		*imop = NULL;
1130	}
1131
1132	return (error);
1133}
1134
1135/*
1136 * Return the IP multicast options in response to user getsockopt().
1137 */
1138static int
1139ip_getmoptions(optname, imo, mp)
1140	int optname;
1141	register struct ip_moptions *imo;
1142	register struct mbuf **mp;
1143{
1144	u_char *ttl;
1145	u_char *loop;
1146	struct in_addr *addr;
1147	struct in_ifaddr *ia;
1148
1149	*mp = m_get(M_WAIT, MT_SOOPTS);
1150
1151	switch (optname) {
1152
1153	case IP_MULTICAST_VIF:
1154		if (imo != NULL)
1155			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1156		else
1157			*(mtod(*mp, int *)) = -1;
1158		(*mp)->m_len = sizeof(int);
1159		return(0);
1160
1161	case IP_MULTICAST_IF:
1162		addr = mtod(*mp, struct in_addr *);
1163		(*mp)->m_len = sizeof(struct in_addr);
1164		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1165			addr->s_addr = INADDR_ANY;
1166		else {
1167			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1168			addr->s_addr = (ia == NULL) ? INADDR_ANY
1169					: IA_SIN(ia)->sin_addr.s_addr;
1170		}
1171		return (0);
1172
1173	case IP_MULTICAST_TTL:
1174		ttl = mtod(*mp, u_char *);
1175		(*mp)->m_len = 1;
1176		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1177				     : imo->imo_multicast_ttl;
1178		return (0);
1179
1180	case IP_MULTICAST_LOOP:
1181		loop = mtod(*mp, u_char *);
1182		(*mp)->m_len = 1;
1183		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1184				      : imo->imo_multicast_loop;
1185		return (0);
1186
1187	default:
1188		return (EOPNOTSUPP);
1189	}
1190}
1191
1192/*
1193 * Discard the IP multicast options.
1194 */
1195void
1196ip_freemoptions(imo)
1197	register struct ip_moptions *imo;
1198{
1199	register int i;
1200
1201	if (imo != NULL) {
1202		for (i = 0; i < imo->imo_num_memberships; ++i)
1203			in_delmulti(imo->imo_membership[i]);
1204		free(imo, M_IPMOPTS);
1205	}
1206}
1207
1208/*
1209 * Routine called from ip_output() to loop back a copy of an IP multicast
1210 * packet to the input queue of a specified interface.  Note that this
1211 * calls the output routine of the loopback "driver", but with an interface
1212 * pointer that might NOT be a loopback interface -- evil, but easier than
1213 * replicating that code here.
1214 */
1215static void
1216ip_mloopback(ifp, m, dst)
1217	struct ifnet *ifp;
1218	register struct mbuf *m;
1219	register struct sockaddr_in *dst;
1220{
1221	register struct ip *ip;
1222	struct mbuf *copym;
1223
1224	copym = m_copy(m, 0, M_COPYALL);
1225	if (copym != NULL) {
1226		/*
1227		 * We don't bother to fragment if the IP length is greater
1228		 * than the interface's MTU.  Can this possibly matter?
1229		 */
1230		ip = mtod(copym, struct ip *);
1231		ip->ip_len = htons((u_short)ip->ip_len);
1232		ip->ip_off = htons((u_short)ip->ip_off);
1233		ip->ip_sum = 0;
1234		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1235		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1236	}
1237}
1238