ip_output.c revision 15869
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 *	$Id: ip_output.c,v 1.38 1996/05/21 20:47:31 peter Exp $
35 */
36
37#define _IP_VHL
38
39#include <sys/param.h>
40#include <sys/queue.h>
41#include <sys/systm.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/errno.h>
45#include <sys/protosw.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48
49#include <net/if.h>
50#include <net/route.h>
51
52#include <netinet/in.h>
53#include <netinet/in_systm.h>
54#include <netinet/ip.h>
55#include <netinet/in_pcb.h>
56#include <netinet/in_var.h>
57#include <netinet/ip_var.h>
58
59#ifdef vax
60#include <machine/mtpr.h>
61#endif
62#include <machine/in_cksum.h>
63
64#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
65#undef COMPAT_IPFW
66#define COMPAT_IPFW 1
67#else
68#undef COMPAT_IPFW
69#endif
70
71u_short ip_id;
72
73static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
74static void	ip_mloopback
75	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
76static int	ip_getmoptions
77	__P((int, struct ip_moptions *, struct mbuf **));
78static int	ip_optcopy __P((struct ip *, struct ip *));
79static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
80static int	ip_setmoptions
81	__P((int, struct ip_moptions **, struct mbuf *));
82
83/*
84 * IP output.  The packet in mbuf chain m contains a skeletal IP
85 * header (with len, off, ttl, proto, tos, src, dst).
86 * The mbuf chain containing the packet will be freed.
87 * The mbuf opt, if present, will not be freed.
88 */
89int
90ip_output(m0, opt, ro, flags, imo)
91	struct mbuf *m0;
92	struct mbuf *opt;
93	struct route *ro;
94	int flags;
95	struct ip_moptions *imo;
96{
97	struct ip *ip, *mhip;
98	struct ifnet *ifp;
99	struct mbuf *m = m0;
100	int hlen = sizeof (struct ip);
101	int len, off, error = 0;
102	struct sockaddr_in *dst;
103	struct in_ifaddr *ia;
104	int isbroadcast;
105
106#ifdef	DIAGNOSTIC
107	if ((m->m_flags & M_PKTHDR) == 0)
108		panic("ip_output no HDR");
109	if (!ro)
110		panic("ip_output no route, proto = %d",
111		      mtod(m, struct ip *)->ip_p);
112#endif
113	if (opt) {
114		m = ip_insertoptions(m, opt, &len);
115		hlen = len;
116	}
117	ip = mtod(m, struct ip *);
118	/*
119	 * Fill in IP header.
120	 */
121	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
122		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
123		ip->ip_off &= IP_DF;
124		ip->ip_id = htons(ip_id++);
125		ipstat.ips_localout++;
126	} else {
127		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
128	}
129
130	dst = (struct sockaddr_in *)&ro->ro_dst;
131	/*
132	 * If there is a cached route,
133	 * check that it is to the same destination
134	 * and is still up.  If not, free it and try again.
135	 */
136	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
137	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
138		RTFREE(ro->ro_rt);
139		ro->ro_rt = (struct rtentry *)0;
140	}
141	if (ro->ro_rt == 0) {
142		dst->sin_family = AF_INET;
143		dst->sin_len = sizeof(*dst);
144		dst->sin_addr = ip->ip_dst;
145	}
146	/*
147	 * If routing to interface only,
148	 * short circuit routing lookup.
149	 */
150#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
151#define sintosa(sin)	((struct sockaddr *)(sin))
152	if (flags & IP_ROUTETOIF) {
153		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
154		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
155			ipstat.ips_noroute++;
156			error = ENETUNREACH;
157			goto bad;
158		}
159		ifp = ia->ia_ifp;
160		ip->ip_ttl = 1;
161		isbroadcast = in_broadcast(dst->sin_addr, ifp);
162	} else {
163		/*
164		 * If this is the case, we probably don't want to allocate
165		 * a protocol-cloned route since we didn't get one from the
166		 * ULP.  This lets TCP do its thing, while not burdening
167		 * forwarding or ICMP with the overhead of cloning a route.
168		 * Of course, we still want to do any cloning requested by
169		 * the link layer, as this is probably required in all cases
170		 * for correct operation (as it is for ARP).
171		 */
172		if (ro->ro_rt == 0)
173			rtalloc_ign(ro, RTF_PRCLONING);
174		if (ro->ro_rt == 0) {
175			ipstat.ips_noroute++;
176			error = EHOSTUNREACH;
177			goto bad;
178		}
179		ia = ifatoia(ro->ro_rt->rt_ifa);
180		ifp = ro->ro_rt->rt_ifp;
181		ro->ro_rt->rt_use++;
182		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
183			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
184		if (ro->ro_rt->rt_flags & RTF_HOST)
185			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
186		else
187			isbroadcast = in_broadcast(dst->sin_addr, ifp);
188	}
189	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
190		struct in_multi *inm;
191
192		m->m_flags |= M_MCAST;
193		/*
194		 * IP destination address is multicast.  Make sure "dst"
195		 * still points to the address in "ro".  (It may have been
196		 * changed to point to a gateway address, above.)
197		 */
198		dst = (struct sockaddr_in *)&ro->ro_dst;
199		/*
200		 * See if the caller provided any multicast options
201		 */
202		if (imo != NULL) {
203			ip->ip_ttl = imo->imo_multicast_ttl;
204			if (imo->imo_multicast_ifp != NULL)
205				ifp = imo->imo_multicast_ifp;
206			if (imo->imo_multicast_vif != -1)
207				ip->ip_src.s_addr =
208				    ip_mcast_src(imo->imo_multicast_vif);
209		} else
210			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
211		/*
212		 * Confirm that the outgoing interface supports multicast.
213		 */
214		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
215			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
216				ipstat.ips_noroute++;
217				error = ENETUNREACH;
218				goto bad;
219			}
220		}
221		/*
222		 * If source address not specified yet, use address
223		 * of outgoing interface.
224		 */
225		if (ip->ip_src.s_addr == INADDR_ANY) {
226			register struct in_ifaddr *ia;
227
228			for (ia = in_ifaddr; ia; ia = ia->ia_next)
229				if (ia->ia_ifp == ifp) {
230					ip->ip_src = IA_SIN(ia)->sin_addr;
231					break;
232				}
233		}
234
235		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
236		if (inm != NULL &&
237		   (imo == NULL || imo->imo_multicast_loop)) {
238			/*
239			 * If we belong to the destination multicast group
240			 * on the outgoing interface, and the caller did not
241			 * forbid loopback, loop back a copy.
242			 */
243			ip_mloopback(ifp, m, dst);
244		}
245		else {
246			/*
247			 * If we are acting as a multicast router, perform
248			 * multicast forwarding as if the packet had just
249			 * arrived on the interface to which we are about
250			 * to send.  The multicast forwarding function
251			 * recursively calls this function, using the
252			 * IP_FORWARDING flag to prevent infinite recursion.
253			 *
254			 * Multicasts that are looped back by ip_mloopback(),
255			 * above, will be forwarded by the ip_input() routine,
256			 * if necessary.
257			 */
258			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
259				/*
260				 * Check if rsvp daemon is running. If not, don't
261				 * set ip_moptions. This ensures that the packet
262				 * is multicast and not just sent down one link
263				 * as prescribed by rsvpd.
264				 */
265				if (!rsvp_on)
266				  imo = NULL;
267				if (ip_mforward(ip, ifp, m, imo) != 0) {
268					m_freem(m);
269					goto done;
270				}
271			}
272		}
273
274		/*
275		 * Multicasts with a time-to-live of zero may be looped-
276		 * back, above, but must not be transmitted on a network.
277		 * Also, multicasts addressed to the loopback interface
278		 * are not sent -- the above call to ip_mloopback() will
279		 * loop back a copy if this host actually belongs to the
280		 * destination group on the loopback interface.
281		 */
282		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
283			m_freem(m);
284			goto done;
285		}
286
287		goto sendit;
288	}
289#ifndef notdef
290	/*
291	 * If source address not specified yet, use address
292	 * of outgoing interface.
293	 */
294	if (ip->ip_src.s_addr == INADDR_ANY)
295		ip->ip_src = IA_SIN(ia)->sin_addr;
296#endif
297	/*
298	 * Verify that we have any chance at all of being able to queue
299	 *      the packet or packet fragments
300	 */
301	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
302		ifp->if_snd.ifq_maxlen) {
303			error = ENOBUFS;
304			goto bad;
305	}
306
307	/*
308	 * Look for broadcast address and
309	 * and verify user is allowed to send
310	 * such a packet.
311	 */
312	if (isbroadcast) {
313		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
314			error = EADDRNOTAVAIL;
315			goto bad;
316		}
317		if ((flags & IP_ALLOWBROADCAST) == 0) {
318			error = EACCES;
319			goto bad;
320		}
321		/* don't allow broadcast messages to be fragmented */
322		if ((u_short)ip->ip_len > ifp->if_mtu) {
323			error = EMSGSIZE;
324			goto bad;
325		}
326		m->m_flags |= M_BCAST;
327	} else {
328		m->m_flags &= ~M_BCAST;
329	}
330
331sendit:
332	/*
333	 * Check with the firewall...
334	 */
335#ifdef COMPAT_IPFW
336	if (ip_fw_chk_ptr && !(*ip_fw_chk_ptr)(&ip, hlen, ifp, 1, &m)) {
337		error = EACCES;
338		goto done;
339	}
340#endif
341
342	/*
343	 * If small enough for interface, can just send directly.
344	 */
345	if ((u_short)ip->ip_len <= ifp->if_mtu) {
346		ip->ip_len = htons((u_short)ip->ip_len);
347		ip->ip_off = htons((u_short)ip->ip_off);
348		ip->ip_sum = 0;
349		if (ip->ip_vhl == IP_VHL_BORING) {
350			ip->ip_sum = in_cksum_hdr(ip);
351		} else {
352			ip->ip_sum = in_cksum(m, hlen);
353		}
354		error = (*ifp->if_output)(ifp, m,
355				(struct sockaddr *)dst, ro->ro_rt);
356		goto done;
357	}
358	/*
359	 * Too large for interface; fragment if possible.
360	 * Must be able to put at least 8 bytes per fragment.
361	 */
362	if (ip->ip_off & IP_DF) {
363		error = EMSGSIZE;
364		/*
365		 * This case can happen if the user changed the MTU
366		 * of an interface after enabling IP on it.  Because
367		 * most netifs don't keep track of routes pointing to
368		 * them, there is no way for one to update all its
369		 * routes when the MTU is changed.
370		 */
371		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
372		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
373		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
374			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
375		}
376		ipstat.ips_cantfrag++;
377		goto bad;
378	}
379	len = (ifp->if_mtu - hlen) &~ 7;
380	if (len < 8) {
381		error = EMSGSIZE;
382		goto bad;
383	}
384
385    {
386	int mhlen, firstlen = len;
387	struct mbuf **mnext = &m->m_nextpkt;
388
389	/*
390	 * Loop through length of segment after first fragment,
391	 * make new header and copy data of each part and link onto chain.
392	 */
393	m0 = m;
394	mhlen = sizeof (struct ip);
395	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
396		MGETHDR(m, M_DONTWAIT, MT_HEADER);
397		if (m == 0) {
398			error = ENOBUFS;
399			ipstat.ips_odropped++;
400			goto sendorfree;
401		}
402		m->m_data += max_linkhdr;
403		mhip = mtod(m, struct ip *);
404		*mhip = *ip;
405		if (hlen > sizeof (struct ip)) {
406			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
407			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
408		}
409		m->m_len = mhlen;
410		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
411		if (ip->ip_off & IP_MF)
412			mhip->ip_off |= IP_MF;
413		if (off + len >= (u_short)ip->ip_len)
414			len = (u_short)ip->ip_len - off;
415		else
416			mhip->ip_off |= IP_MF;
417		mhip->ip_len = htons((u_short)(len + mhlen));
418		m->m_next = m_copy(m0, off, len);
419		if (m->m_next == 0) {
420			(void) m_free(m);
421			error = ENOBUFS;	/* ??? */
422			ipstat.ips_odropped++;
423			goto sendorfree;
424		}
425		m->m_pkthdr.len = mhlen + len;
426		m->m_pkthdr.rcvif = (struct ifnet *)0;
427		mhip->ip_off = htons((u_short)mhip->ip_off);
428		mhip->ip_sum = 0;
429		if (mhip->ip_vhl == IP_VHL_BORING) {
430			mhip->ip_sum = in_cksum_hdr(mhip);
431		} else {
432			mhip->ip_sum = in_cksum(m, mhlen);
433		}
434		*mnext = m;
435		mnext = &m->m_nextpkt;
436		ipstat.ips_ofragments++;
437	}
438	/*
439	 * Update first fragment by trimming what's been copied out
440	 * and updating header, then send each fragment (in order).
441	 */
442	m = m0;
443	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
444	m->m_pkthdr.len = hlen + firstlen;
445	ip->ip_len = htons((u_short)m->m_pkthdr.len);
446	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
447	ip->ip_sum = 0;
448	if (ip->ip_vhl == IP_VHL_BORING) {
449		ip->ip_sum = in_cksum_hdr(ip);
450	} else {
451		ip->ip_sum = in_cksum(m, hlen);
452	}
453sendorfree:
454	for (m = m0; m; m = m0) {
455		m0 = m->m_nextpkt;
456		m->m_nextpkt = 0;
457		if (error == 0)
458			error = (*ifp->if_output)(ifp, m,
459			    (struct sockaddr *)dst, ro->ro_rt);
460		else
461			m_freem(m);
462	}
463
464	if (error == 0)
465		ipstat.ips_fragmented++;
466    }
467done:
468	return (error);
469bad:
470	m_freem(m0);
471	goto done;
472}
473
474/*
475 * Insert IP options into preformed packet.
476 * Adjust IP destination as required for IP source routing,
477 * as indicated by a non-zero in_addr at the start of the options.
478 *
479 * XXX This routine assumes that the packet has no options in place.
480 */
481static struct mbuf *
482ip_insertoptions(m, opt, phlen)
483	register struct mbuf *m;
484	struct mbuf *opt;
485	int *phlen;
486{
487	register struct ipoption *p = mtod(opt, struct ipoption *);
488	struct mbuf *n;
489	register struct ip *ip = mtod(m, struct ip *);
490	unsigned optlen;
491
492	optlen = opt->m_len - sizeof(p->ipopt_dst);
493	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
494		return (m);		/* XXX should fail */
495	if (p->ipopt_dst.s_addr)
496		ip->ip_dst = p->ipopt_dst;
497	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
498		MGETHDR(n, M_DONTWAIT, MT_HEADER);
499		if (n == 0)
500			return (m);
501		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
502		m->m_len -= sizeof(struct ip);
503		m->m_data += sizeof(struct ip);
504		n->m_next = m;
505		m = n;
506		m->m_len = optlen + sizeof(struct ip);
507		m->m_data += max_linkhdr;
508		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
509	} else {
510		m->m_data -= optlen;
511		m->m_len += optlen;
512		m->m_pkthdr.len += optlen;
513		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
514	}
515	ip = mtod(m, struct ip *);
516	(void)memcpy(ip + 1, p->ipopt_list, (unsigned)optlen);
517	*phlen = sizeof(struct ip) + optlen;
518	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
519	ip->ip_len += optlen;
520	return (m);
521}
522
523/*
524 * Copy options from ip to jp,
525 * omitting those not copied during fragmentation.
526 */
527static int
528ip_optcopy(ip, jp)
529	struct ip *ip, *jp;
530{
531	register u_char *cp, *dp;
532	int opt, optlen, cnt;
533
534	cp = (u_char *)(ip + 1);
535	dp = (u_char *)(jp + 1);
536	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
537	for (; cnt > 0; cnt -= optlen, cp += optlen) {
538		opt = cp[0];
539		if (opt == IPOPT_EOL)
540			break;
541		if (opt == IPOPT_NOP) {
542			/* Preserve for IP mcast tunnel's LSRR alignment. */
543			*dp++ = IPOPT_NOP;
544			optlen = 1;
545			continue;
546		} else
547			optlen = cp[IPOPT_OLEN];
548		/* bogus lengths should have been caught by ip_dooptions */
549		if (optlen > cnt)
550			optlen = cnt;
551		if (IPOPT_COPIED(opt)) {
552			(void)memcpy(dp, cp, (unsigned)optlen);
553			dp += optlen;
554		}
555	}
556	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
557		*dp++ = IPOPT_EOL;
558	return (optlen);
559}
560
561/*
562 * IP socket option processing.
563 */
564int
565ip_ctloutput(op, so, level, optname, mp)
566	int op;
567	struct socket *so;
568	int level, optname;
569	struct mbuf **mp;
570{
571	register struct inpcb *inp = sotoinpcb(so);
572	register struct mbuf *m = *mp;
573	register int optval = 0;
574	int error = 0;
575
576	if (level != IPPROTO_IP) {
577		error = EINVAL;
578		if (op == PRCO_SETOPT && *mp)
579			(void) m_free(*mp);
580	} else switch (op) {
581
582	case PRCO_SETOPT:
583		switch (optname) {
584		case IP_OPTIONS:
585#ifdef notyet
586		case IP_RETOPTS:
587			return (ip_pcbopts(optname, &inp->inp_options, m));
588#else
589			return (ip_pcbopts(&inp->inp_options, m));
590#endif
591
592		case IP_TOS:
593		case IP_TTL:
594		case IP_RECVOPTS:
595		case IP_RECVRETOPTS:
596		case IP_RECVDSTADDR:
597			if (m == 0 || m->m_len != sizeof(int))
598				error = EINVAL;
599			else {
600				optval = *mtod(m, int *);
601				switch (optname) {
602
603				case IP_TOS:
604					inp->inp_ip.ip_tos = optval;
605					break;
606
607				case IP_TTL:
608					inp->inp_ip.ip_ttl = optval;
609					break;
610#define	OPTSET(bit) \
611	if (optval) \
612		inp->inp_flags |= bit; \
613	else \
614		inp->inp_flags &= ~bit;
615
616				case IP_RECVOPTS:
617					OPTSET(INP_RECVOPTS);
618					break;
619
620				case IP_RECVRETOPTS:
621					OPTSET(INP_RECVRETOPTS);
622					break;
623
624				case IP_RECVDSTADDR:
625					OPTSET(INP_RECVDSTADDR);
626					break;
627				}
628			}
629			break;
630#undef OPTSET
631
632		case IP_MULTICAST_IF:
633		case IP_MULTICAST_VIF:
634		case IP_MULTICAST_TTL:
635		case IP_MULTICAST_LOOP:
636		case IP_ADD_MEMBERSHIP:
637		case IP_DROP_MEMBERSHIP:
638			error = ip_setmoptions(optname, &inp->inp_moptions, m);
639			break;
640
641		case IP_PORTRANGE:
642			if (m == 0 || m->m_len != sizeof(int))
643				error = EINVAL;
644			else {
645				optval = *mtod(m, int *);
646
647				switch (optval) {
648
649				case IP_PORTRANGE_DEFAULT:
650					inp->inp_flags &= ~(INP_LOWPORT);
651					inp->inp_flags &= ~(INP_HIGHPORT);
652					break;
653
654				case IP_PORTRANGE_HIGH:
655					inp->inp_flags &= ~(INP_LOWPORT);
656					inp->inp_flags |= INP_HIGHPORT;
657					break;
658
659				case IP_PORTRANGE_LOW:
660					inp->inp_flags &= ~(INP_HIGHPORT);
661					inp->inp_flags |= INP_LOWPORT;
662					break;
663
664				default:
665					error = EINVAL;
666					break;
667				}
668			}
669			break;
670
671		default:
672			error = ENOPROTOOPT;
673			break;
674		}
675		if (m)
676			(void)m_free(m);
677		break;
678
679	case PRCO_GETOPT:
680		switch (optname) {
681		case IP_OPTIONS:
682		case IP_RETOPTS:
683			*mp = m = m_get(M_WAIT, MT_SOOPTS);
684			if (inp->inp_options) {
685				m->m_len = inp->inp_options->m_len;
686				(void)memcpy(mtod(m, void *),
687				    mtod(inp->inp_options, void *), (unsigned)m->m_len);
688			} else
689				m->m_len = 0;
690			break;
691
692		case IP_TOS:
693		case IP_TTL:
694		case IP_RECVOPTS:
695		case IP_RECVRETOPTS:
696		case IP_RECVDSTADDR:
697			*mp = m = m_get(M_WAIT, MT_SOOPTS);
698			m->m_len = sizeof(int);
699			switch (optname) {
700
701			case IP_TOS:
702				optval = inp->inp_ip.ip_tos;
703				break;
704
705			case IP_TTL:
706				optval = inp->inp_ip.ip_ttl;
707				break;
708
709#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
710
711			case IP_RECVOPTS:
712				optval = OPTBIT(INP_RECVOPTS);
713				break;
714
715			case IP_RECVRETOPTS:
716				optval = OPTBIT(INP_RECVRETOPTS);
717				break;
718
719			case IP_RECVDSTADDR:
720				optval = OPTBIT(INP_RECVDSTADDR);
721				break;
722			}
723			*mtod(m, int *) = optval;
724			break;
725
726		case IP_MULTICAST_IF:
727		case IP_MULTICAST_VIF:
728		case IP_MULTICAST_TTL:
729		case IP_MULTICAST_LOOP:
730		case IP_ADD_MEMBERSHIP:
731		case IP_DROP_MEMBERSHIP:
732			error = ip_getmoptions(optname, inp->inp_moptions, mp);
733			break;
734
735		case IP_PORTRANGE:
736			*mp = m = m_get(M_WAIT, MT_SOOPTS);
737			m->m_len = sizeof(int);
738
739			if (inp->inp_flags & INP_HIGHPORT)
740				optval = IP_PORTRANGE_HIGH;
741			else if (inp->inp_flags & INP_LOWPORT)
742				optval = IP_PORTRANGE_LOW;
743			else
744				optval = 0;
745
746			*mtod(m, int *) = optval;
747			break;
748
749		default:
750			error = ENOPROTOOPT;
751			break;
752		}
753		break;
754	}
755	return (error);
756}
757
758/*
759 * Set up IP options in pcb for insertion in output packets.
760 * Store in mbuf with pointer in pcbopt, adding pseudo-option
761 * with destination address if source routed.
762 */
763static int
764#ifdef notyet
765ip_pcbopts(optname, pcbopt, m)
766	int optname;
767#else
768ip_pcbopts(pcbopt, m)
769#endif
770	struct mbuf **pcbopt;
771	register struct mbuf *m;
772{
773	register cnt, optlen;
774	register u_char *cp;
775	u_char opt;
776
777	/* turn off any old options */
778	if (*pcbopt)
779		(void)m_free(*pcbopt);
780	*pcbopt = 0;
781	if (m == (struct mbuf *)0 || m->m_len == 0) {
782		/*
783		 * Only turning off any previous options.
784		 */
785		if (m)
786			(void)m_free(m);
787		return (0);
788	}
789
790#ifndef	vax
791	if (m->m_len % sizeof(long))
792		goto bad;
793#endif
794	/*
795	 * IP first-hop destination address will be stored before
796	 * actual options; move other options back
797	 * and clear it when none present.
798	 */
799	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
800		goto bad;
801	cnt = m->m_len;
802	m->m_len += sizeof(struct in_addr);
803	cp = mtod(m, u_char *) + sizeof(struct in_addr);
804	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
805	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
806
807	for (; cnt > 0; cnt -= optlen, cp += optlen) {
808		opt = cp[IPOPT_OPTVAL];
809		if (opt == IPOPT_EOL)
810			break;
811		if (opt == IPOPT_NOP)
812			optlen = 1;
813		else {
814			optlen = cp[IPOPT_OLEN];
815			if (optlen <= IPOPT_OLEN || optlen > cnt)
816				goto bad;
817		}
818		switch (opt) {
819
820		default:
821			break;
822
823		case IPOPT_LSRR:
824		case IPOPT_SSRR:
825			/*
826			 * user process specifies route as:
827			 *	->A->B->C->D
828			 * D must be our final destination (but we can't
829			 * check that since we may not have connected yet).
830			 * A is first hop destination, which doesn't appear in
831			 * actual IP option, but is stored before the options.
832			 */
833			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
834				goto bad;
835			m->m_len -= sizeof(struct in_addr);
836			cnt -= sizeof(struct in_addr);
837			optlen -= sizeof(struct in_addr);
838			cp[IPOPT_OLEN] = optlen;
839			/*
840			 * Move first hop before start of options.
841			 */
842			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
843			    sizeof(struct in_addr));
844			/*
845			 * Then copy rest of options back
846			 * to close up the deleted entry.
847			 */
848			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
849			    sizeof(struct in_addr)),
850			    (caddr_t)&cp[IPOPT_OFFSET+1],
851			    (unsigned)cnt + sizeof(struct in_addr));
852			break;
853		}
854	}
855	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
856		goto bad;
857	*pcbopt = m;
858	return (0);
859
860bad:
861	(void)m_free(m);
862	return (EINVAL);
863}
864
865/*
866 * Set the IP multicast options in response to user setsockopt().
867 */
868static int
869ip_setmoptions(optname, imop, m)
870	int optname;
871	struct ip_moptions **imop;
872	struct mbuf *m;
873{
874	register int error = 0;
875	u_char loop;
876	register int i;
877	struct in_addr addr;
878	register struct ip_mreq *mreq;
879	register struct ifnet *ifp;
880	register struct ip_moptions *imo = *imop;
881	struct route ro;
882	register struct sockaddr_in *dst;
883	int s;
884
885	if (imo == NULL) {
886		/*
887		 * No multicast option buffer attached to the pcb;
888		 * allocate one and initialize to default values.
889		 */
890		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
891		    M_WAITOK);
892
893		if (imo == NULL)
894			return (ENOBUFS);
895		*imop = imo;
896		imo->imo_multicast_ifp = NULL;
897		imo->imo_multicast_vif = -1;
898		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
899		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
900		imo->imo_num_memberships = 0;
901	}
902
903	switch (optname) {
904	/* store an index number for the vif you wanna use in the send */
905	case IP_MULTICAST_VIF:
906		if (!legal_vif_num) {
907			error = EOPNOTSUPP;
908			break;
909		}
910		if (m == NULL || m->m_len != sizeof(int)) {
911			error = EINVAL;
912			break;
913		}
914		i = *(mtod(m, int *));
915		if (!legal_vif_num(i) && (i != -1)) {
916			error = EINVAL;
917			break;
918		}
919		imo->imo_multicast_vif = i;
920		break;
921
922	case IP_MULTICAST_IF:
923		/*
924		 * Select the interface for outgoing multicast packets.
925		 */
926		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
927			error = EINVAL;
928			break;
929		}
930		addr = *(mtod(m, struct in_addr *));
931		/*
932		 * INADDR_ANY is used to remove a previous selection.
933		 * When no interface is selected, a default one is
934		 * chosen every time a multicast packet is sent.
935		 */
936		if (addr.s_addr == INADDR_ANY) {
937			imo->imo_multicast_ifp = NULL;
938			break;
939		}
940		/*
941		 * The selected interface is identified by its local
942		 * IP address.  Find the interface and confirm that
943		 * it supports multicasting.
944		 */
945		s = splimp();
946		INADDR_TO_IFP(addr, ifp);
947		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
948			splx(s);
949			error = EADDRNOTAVAIL;
950			break;
951		}
952		imo->imo_multicast_ifp = ifp;
953		splx(s);
954		break;
955
956	case IP_MULTICAST_TTL:
957		/*
958		 * Set the IP time-to-live for outgoing multicast packets.
959		 */
960		if (m == NULL || m->m_len != 1) {
961			error = EINVAL;
962			break;
963		}
964		imo->imo_multicast_ttl = *(mtod(m, u_char *));
965		break;
966
967	case IP_MULTICAST_LOOP:
968		/*
969		 * Set the loopback flag for outgoing multicast packets.
970		 * Must be zero or one.
971		 */
972		if (m == NULL || m->m_len != 1 ||
973		   (loop = *(mtod(m, u_char *))) > 1) {
974			error = EINVAL;
975			break;
976		}
977		imo->imo_multicast_loop = loop;
978		break;
979
980	case IP_ADD_MEMBERSHIP:
981		/*
982		 * Add a multicast group membership.
983		 * Group must be a valid IP multicast address.
984		 */
985		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
986			error = EINVAL;
987			break;
988		}
989		mreq = mtod(m, struct ip_mreq *);
990		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
991			error = EINVAL;
992			break;
993		}
994		s = splimp();
995		/*
996		 * If no interface address was provided, use the interface of
997		 * the route to the given multicast address.
998		 */
999		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1000			bzero((caddr_t)&ro, sizeof(ro));
1001			dst = (struct sockaddr_in *)&ro.ro_dst;
1002			dst->sin_len = sizeof(*dst);
1003			dst->sin_family = AF_INET;
1004			dst->sin_addr = mreq->imr_multiaddr;
1005			rtalloc(&ro);
1006			if (ro.ro_rt == NULL) {
1007				error = EADDRNOTAVAIL;
1008				splx(s);
1009				break;
1010			}
1011			ifp = ro.ro_rt->rt_ifp;
1012			rtfree(ro.ro_rt);
1013		}
1014		else {
1015			INADDR_TO_IFP(mreq->imr_interface, ifp);
1016		}
1017
1018		/*
1019		 * See if we found an interface, and confirm that it
1020		 * supports multicast.
1021		 */
1022		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1023			error = EADDRNOTAVAIL;
1024			splx(s);
1025			break;
1026		}
1027		/*
1028		 * See if the membership already exists or if all the
1029		 * membership slots are full.
1030		 */
1031		for (i = 0; i < imo->imo_num_memberships; ++i) {
1032			if (imo->imo_membership[i]->inm_ifp == ifp &&
1033			    imo->imo_membership[i]->inm_addr.s_addr
1034						== mreq->imr_multiaddr.s_addr)
1035				break;
1036		}
1037		if (i < imo->imo_num_memberships) {
1038			error = EADDRINUSE;
1039			splx(s);
1040			break;
1041		}
1042		if (i == IP_MAX_MEMBERSHIPS) {
1043			error = ETOOMANYREFS;
1044			splx(s);
1045			break;
1046		}
1047		/*
1048		 * Everything looks good; add a new record to the multicast
1049		 * address list for the given interface.
1050		 */
1051		if ((imo->imo_membership[i] =
1052		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1053			error = ENOBUFS;
1054			splx(s);
1055			break;
1056		}
1057		++imo->imo_num_memberships;
1058		splx(s);
1059		break;
1060
1061	case IP_DROP_MEMBERSHIP:
1062		/*
1063		 * Drop a multicast group membership.
1064		 * Group must be a valid IP multicast address.
1065		 */
1066		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1067			error = EINVAL;
1068			break;
1069		}
1070		mreq = mtod(m, struct ip_mreq *);
1071		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1072			error = EINVAL;
1073			break;
1074		}
1075
1076		s = splimp();
1077		/*
1078		 * If an interface address was specified, get a pointer
1079		 * to its ifnet structure.
1080		 */
1081		if (mreq->imr_interface.s_addr == INADDR_ANY)
1082			ifp = NULL;
1083		else {
1084			INADDR_TO_IFP(mreq->imr_interface, ifp);
1085			if (ifp == NULL) {
1086				error = EADDRNOTAVAIL;
1087				splx(s);
1088				break;
1089			}
1090		}
1091		/*
1092		 * Find the membership in the membership array.
1093		 */
1094		for (i = 0; i < imo->imo_num_memberships; ++i) {
1095			if ((ifp == NULL ||
1096			     imo->imo_membership[i]->inm_ifp == ifp) &&
1097			     imo->imo_membership[i]->inm_addr.s_addr ==
1098			     mreq->imr_multiaddr.s_addr)
1099				break;
1100		}
1101		if (i == imo->imo_num_memberships) {
1102			error = EADDRNOTAVAIL;
1103			splx(s);
1104			break;
1105		}
1106		/*
1107		 * Give up the multicast address record to which the
1108		 * membership points.
1109		 */
1110		in_delmulti(imo->imo_membership[i]);
1111		/*
1112		 * Remove the gap in the membership array.
1113		 */
1114		for (++i; i < imo->imo_num_memberships; ++i)
1115			imo->imo_membership[i-1] = imo->imo_membership[i];
1116		--imo->imo_num_memberships;
1117		splx(s);
1118		break;
1119
1120	default:
1121		error = EOPNOTSUPP;
1122		break;
1123	}
1124
1125	/*
1126	 * If all options have default values, no need to keep the mbuf.
1127	 */
1128	if (imo->imo_multicast_ifp == NULL &&
1129	    imo->imo_multicast_vif == -1 &&
1130	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1131	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1132	    imo->imo_num_memberships == 0) {
1133		free(*imop, M_IPMOPTS);
1134		*imop = NULL;
1135	}
1136
1137	return (error);
1138}
1139
1140/*
1141 * Return the IP multicast options in response to user getsockopt().
1142 */
1143static int
1144ip_getmoptions(optname, imo, mp)
1145	int optname;
1146	register struct ip_moptions *imo;
1147	register struct mbuf **mp;
1148{
1149	u_char *ttl;
1150	u_char *loop;
1151	struct in_addr *addr;
1152	struct in_ifaddr *ia;
1153
1154	*mp = m_get(M_WAIT, MT_SOOPTS);
1155
1156	switch (optname) {
1157
1158	case IP_MULTICAST_VIF:
1159		if (imo != NULL)
1160			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1161		else
1162			*(mtod(*mp, int *)) = -1;
1163		(*mp)->m_len = sizeof(int);
1164		return(0);
1165
1166	case IP_MULTICAST_IF:
1167		addr = mtod(*mp, struct in_addr *);
1168		(*mp)->m_len = sizeof(struct in_addr);
1169		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1170			addr->s_addr = INADDR_ANY;
1171		else {
1172			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1173			addr->s_addr = (ia == NULL) ? INADDR_ANY
1174					: IA_SIN(ia)->sin_addr.s_addr;
1175		}
1176		return (0);
1177
1178	case IP_MULTICAST_TTL:
1179		ttl = mtod(*mp, u_char *);
1180		(*mp)->m_len = 1;
1181		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1182				     : imo->imo_multicast_ttl;
1183		return (0);
1184
1185	case IP_MULTICAST_LOOP:
1186		loop = mtod(*mp, u_char *);
1187		(*mp)->m_len = 1;
1188		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1189				      : imo->imo_multicast_loop;
1190		return (0);
1191
1192	default:
1193		return (EOPNOTSUPP);
1194	}
1195}
1196
1197/*
1198 * Discard the IP multicast options.
1199 */
1200void
1201ip_freemoptions(imo)
1202	register struct ip_moptions *imo;
1203{
1204	register int i;
1205
1206	if (imo != NULL) {
1207		for (i = 0; i < imo->imo_num_memberships; ++i)
1208			in_delmulti(imo->imo_membership[i]);
1209		free(imo, M_IPMOPTS);
1210	}
1211}
1212
1213/*
1214 * Routine called from ip_output() to loop back a copy of an IP multicast
1215 * packet to the input queue of a specified interface.  Note that this
1216 * calls the output routine of the loopback "driver", but with an interface
1217 * pointer that might NOT be a loopback interface -- evil, but easier than
1218 * replicating that code here.
1219 */
1220static void
1221ip_mloopback(ifp, m, dst)
1222	struct ifnet *ifp;
1223	register struct mbuf *m;
1224	register struct sockaddr_in *dst;
1225{
1226	register struct ip *ip;
1227	struct mbuf *copym;
1228
1229	copym = m_copy(m, 0, M_COPYALL);
1230	if (copym != NULL) {
1231		/*
1232		 * We don't bother to fragment if the IP length is greater
1233		 * than the interface's MTU.  Can this possibly matter?
1234		 */
1235		ip = mtod(copym, struct ip *);
1236		ip->ip_len = htons((u_short)ip->ip_len);
1237		ip->ip_off = htons((u_short)ip->ip_off);
1238		ip->ip_sum = 0;
1239		if (ip->ip_vhl == IP_VHL_BORING) {
1240			ip->ip_sum = in_cksum_hdr(ip);
1241		} else {
1242			ip->ip_sum = in_cksum(copym,
1243					      IP_VHL_HL(ip->ip_vhl) << 2);
1244		}
1245		/*
1246		 * NB:
1247		 * We can't simply call ip_input() directly because
1248		 * the ip_mforward() depends on the `input interface'
1249		 * being set to something unreasonable so that we don't
1250		 * attempt to forward the looped-back copy.
1251		 * It's also not clear whether there are any lingering
1252		 * reentrancy problems in other areas which might be
1253		 * exposed by this code.  For the moment, we'll err
1254		 * on the side of safety by continuing to abuse
1255		 * loinput().
1256		 */
1257#ifdef notdef
1258		copym->m_pkthdr.rcvif = &loif[0];
1259		ip_input(copym)
1260#else
1261		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1262#endif
1263	}
1264}
1265