ip_output.c revision 30354
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34 *	$Id: ip_output.c,v 1.59 1997/10/11 18:31:32 phk Exp $
35 */
36
37#define _IP_VHL
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46
47#include <net/if.h>
48#include <net/route.h>
49
50#include <netinet/in.h>
51#include <netinet/in_systm.h>
52#include <netinet/ip.h>
53#include <netinet/in_pcb.h>
54#include <netinet/in_var.h>
55#include <netinet/ip_var.h>
56
57#ifdef vax
58#include <machine/mtpr.h>
59#endif
60#include <machine/in_cksum.h>
61
62static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
63
64#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
65#undef COMPAT_IPFW
66#define COMPAT_IPFW 1
67#else
68#undef COMPAT_IPFW
69#endif
70
71u_short ip_id;
72
73static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
74static void	ip_mloopback
75	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
76static int	ip_getmoptions
77	__P((int, struct ip_moptions *, struct mbuf **));
78static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
79static int	ip_setmoptions
80	__P((int, struct ip_moptions **, struct mbuf *));
81
82#if defined(IPFILTER_LKM) || defined(IPFILTER)
83int	ip_optcopy __P((struct ip *, struct ip *));
84extern int fr_check __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
85extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
86#else
87static int	ip_optcopy __P((struct ip *, struct ip *));
88#endif
89
90
91extern	struct protosw inetsw[];
92
93/*
94 * IP output.  The packet in mbuf chain m contains a skeletal IP
95 * header (with len, off, ttl, proto, tos, src, dst).
96 * The mbuf chain containing the packet will be freed.
97 * The mbuf opt, if present, will not be freed.
98 */
99int
100ip_output(m0, opt, ro, flags, imo)
101	struct mbuf *m0;
102	struct mbuf *opt;
103	struct route *ro;
104	int flags;
105	struct ip_moptions *imo;
106{
107	struct ip *ip, *mhip;
108	struct ifnet *ifp;
109	struct mbuf *m = m0;
110	int hlen = sizeof (struct ip);
111	int len, off, error = 0;
112	struct sockaddr_in *dst;
113	struct in_ifaddr *ia;
114	int isbroadcast;
115
116#ifdef	DIAGNOSTIC
117	if ((m->m_flags & M_PKTHDR) == 0)
118		panic("ip_output no HDR");
119	if (!ro)
120		panic("ip_output no route, proto = %d",
121		      mtod(m, struct ip *)->ip_p);
122#endif
123	if (opt) {
124		m = ip_insertoptions(m, opt, &len);
125		hlen = len;
126	}
127	ip = mtod(m, struct ip *);
128	/*
129	 * Fill in IP header.
130	 */
131	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
132		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
133		ip->ip_off &= IP_DF;
134		ip->ip_id = htons(ip_id++);
135		ipstat.ips_localout++;
136	} else {
137		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
138	}
139
140	dst = (struct sockaddr_in *)&ro->ro_dst;
141	/*
142	 * If there is a cached route,
143	 * check that it is to the same destination
144	 * and is still up.  If not, free it and try again.
145	 */
146	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
147	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
148		RTFREE(ro->ro_rt);
149		ro->ro_rt = (struct rtentry *)0;
150	}
151	if (ro->ro_rt == 0) {
152		dst->sin_family = AF_INET;
153		dst->sin_len = sizeof(*dst);
154		dst->sin_addr = ip->ip_dst;
155	}
156	/*
157	 * If routing to interface only,
158	 * short circuit routing lookup.
159	 */
160#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
161#define sintosa(sin)	((struct sockaddr *)(sin))
162	if (flags & IP_ROUTETOIF) {
163		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
164		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
165			ipstat.ips_noroute++;
166			error = ENETUNREACH;
167			goto bad;
168		}
169		ifp = ia->ia_ifp;
170		ip->ip_ttl = 1;
171		isbroadcast = in_broadcast(dst->sin_addr, ifp);
172	} else {
173		/*
174		 * If this is the case, we probably don't want to allocate
175		 * a protocol-cloned route since we didn't get one from the
176		 * ULP.  This lets TCP do its thing, while not burdening
177		 * forwarding or ICMP with the overhead of cloning a route.
178		 * Of course, we still want to do any cloning requested by
179		 * the link layer, as this is probably required in all cases
180		 * for correct operation (as it is for ARP).
181		 */
182		if (ro->ro_rt == 0)
183			rtalloc_ign(ro, RTF_PRCLONING);
184		if (ro->ro_rt == 0) {
185			ipstat.ips_noroute++;
186			error = EHOSTUNREACH;
187			goto bad;
188		}
189		ia = ifatoia(ro->ro_rt->rt_ifa);
190		ifp = ro->ro_rt->rt_ifp;
191		ro->ro_rt->rt_use++;
192		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
193			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
194		if (ro->ro_rt->rt_flags & RTF_HOST)
195			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
196		else
197			isbroadcast = in_broadcast(dst->sin_addr, ifp);
198	}
199	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
200		struct in_multi *inm;
201
202		m->m_flags |= M_MCAST;
203		/*
204		 * IP destination address is multicast.  Make sure "dst"
205		 * still points to the address in "ro".  (It may have been
206		 * changed to point to a gateway address, above.)
207		 */
208		dst = (struct sockaddr_in *)&ro->ro_dst;
209		/*
210		 * See if the caller provided any multicast options
211		 */
212		if (imo != NULL) {
213			ip->ip_ttl = imo->imo_multicast_ttl;
214			if (imo->imo_multicast_ifp != NULL)
215				ifp = imo->imo_multicast_ifp;
216			if (imo->imo_multicast_vif != -1)
217				ip->ip_src.s_addr =
218				    ip_mcast_src(imo->imo_multicast_vif);
219		} else
220			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
221		/*
222		 * Confirm that the outgoing interface supports multicast.
223		 */
224		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
225			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
226				ipstat.ips_noroute++;
227				error = ENETUNREACH;
228				goto bad;
229			}
230		}
231		/*
232		 * If source address not specified yet, use address
233		 * of outgoing interface.
234		 */
235		if (ip->ip_src.s_addr == INADDR_ANY) {
236			register struct in_ifaddr *ia;
237
238			for (ia = in_ifaddrhead.tqh_first; ia;
239			     ia = ia->ia_link.tqe_next)
240				if (ia->ia_ifp == ifp) {
241					ip->ip_src = IA_SIN(ia)->sin_addr;
242					break;
243				}
244		}
245
246		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
247		if (inm != NULL &&
248		   (imo == NULL || imo->imo_multicast_loop)) {
249			/*
250			 * If we belong to the destination multicast group
251			 * on the outgoing interface, and the caller did not
252			 * forbid loopback, loop back a copy.
253			 */
254			ip_mloopback(ifp, m, dst, hlen);
255		}
256		else {
257			/*
258			 * If we are acting as a multicast router, perform
259			 * multicast forwarding as if the packet had just
260			 * arrived on the interface to which we are about
261			 * to send.  The multicast forwarding function
262			 * recursively calls this function, using the
263			 * IP_FORWARDING flag to prevent infinite recursion.
264			 *
265			 * Multicasts that are looped back by ip_mloopback(),
266			 * above, will be forwarded by the ip_input() routine,
267			 * if necessary.
268			 */
269			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
270				/*
271				 * Check if rsvp daemon is running. If not, don't
272				 * set ip_moptions. This ensures that the packet
273				 * is multicast and not just sent down one link
274				 * as prescribed by rsvpd.
275				 */
276				if (!rsvp_on)
277				  imo = NULL;
278				if (ip_mforward(ip, ifp, m, imo) != 0) {
279					m_freem(m);
280					goto done;
281				}
282			}
283		}
284
285		/*
286		 * Multicasts with a time-to-live of zero may be looped-
287		 * back, above, but must not be transmitted on a network.
288		 * Also, multicasts addressed to the loopback interface
289		 * are not sent -- the above call to ip_mloopback() will
290		 * loop back a copy if this host actually belongs to the
291		 * destination group on the loopback interface.
292		 */
293		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
294			m_freem(m);
295			goto done;
296		}
297
298		goto sendit;
299	}
300#ifndef notdef
301	/*
302	 * If source address not specified yet, use address
303	 * of outgoing interface.
304	 */
305	if (ip->ip_src.s_addr == INADDR_ANY)
306		ip->ip_src = IA_SIN(ia)->sin_addr;
307#endif
308	/*
309	 * Verify that we have any chance at all of being able to queue
310	 *      the packet or packet fragments
311	 */
312	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
313		ifp->if_snd.ifq_maxlen) {
314			error = ENOBUFS;
315			goto bad;
316	}
317
318	/*
319	 * Look for broadcast address and
320	 * and verify user is allowed to send
321	 * such a packet.
322	 */
323	if (isbroadcast) {
324		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
325			error = EADDRNOTAVAIL;
326			goto bad;
327		}
328		if ((flags & IP_ALLOWBROADCAST) == 0) {
329			error = EACCES;
330			goto bad;
331		}
332		/* don't allow broadcast messages to be fragmented */
333		if ((u_short)ip->ip_len > ifp->if_mtu) {
334			error = EMSGSIZE;
335			goto bad;
336		}
337		m->m_flags |= M_BCAST;
338	} else {
339		m->m_flags &= ~M_BCAST;
340	}
341
342sendit:
343#if defined(IPFILTER) || defined(IPFILTER_LKM)
344	/*
345	 * looks like most checking has been done now...do a filter check
346	 */
347	if (fr_checkp) {
348		struct  mbuf    *m1 = m;
349
350		if ((*fr_checkp)(ip, hlen, ifp, 1, &m1))
351			error = EHOSTUNREACH;
352		if (error || !m1)
353			goto done;
354		ip = mtod(m = m1, struct ip *);
355	}
356#endif
357        /*
358	 * IpHack's section.
359	 * - Xlate: translate packet's addr/port (NAT).
360	 * - Firewall: deny/allow/etc.
361	 * - Wrap: fake packet's addr/port <unimpl.>
362	 * - Encapsulate: put it in another IP and send out. <unimp.>
363	 */
364
365#ifdef COMPAT_IPFW
366        if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
367		error = EACCES;
368		goto done;
369	}
370
371	/*
372	 * Check with the firewall...
373	 */
374	if (ip_fw_chk_ptr) {
375#ifdef IPDIVERT
376		ip_divert_port = (*ip_fw_chk_ptr)(&ip,
377		    hlen, ifp, ip_divert_ignore, &m);
378		ip_divert_ignore = 0;
379		if (ip_divert_port) {		/* Divert packet */
380			(*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, 0);
381			goto done;
382		}
383#else
384		/* If ipfw says divert, we have to just drop packet */
385		if ((*ip_fw_chk_ptr)(&ip, hlen, ifp, 0, &m)) {
386			m_freem(m);
387			goto done;
388		}
389#endif
390		if (!m) {
391			error = EACCES;
392			goto done;
393		}
394	}
395#endif /* COMPAT_IPFW */
396
397	/*
398	 * If small enough for interface, can just send directly.
399	 */
400	if ((u_short)ip->ip_len <= ifp->if_mtu) {
401		ip->ip_len = htons((u_short)ip->ip_len);
402		ip->ip_off = htons((u_short)ip->ip_off);
403		ip->ip_sum = 0;
404		if (ip->ip_vhl == IP_VHL_BORING) {
405			ip->ip_sum = in_cksum_hdr(ip);
406		} else {
407			ip->ip_sum = in_cksum(m, hlen);
408		}
409		error = (*ifp->if_output)(ifp, m,
410				(struct sockaddr *)dst, ro->ro_rt);
411		goto done;
412	}
413	/*
414	 * Too large for interface; fragment if possible.
415	 * Must be able to put at least 8 bytes per fragment.
416	 */
417	if (ip->ip_off & IP_DF) {
418		error = EMSGSIZE;
419		/*
420		 * This case can happen if the user changed the MTU
421		 * of an interface after enabling IP on it.  Because
422		 * most netifs don't keep track of routes pointing to
423		 * them, there is no way for one to update all its
424		 * routes when the MTU is changed.
425		 */
426		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
427		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
428		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
429			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
430		}
431		ipstat.ips_cantfrag++;
432		goto bad;
433	}
434	len = (ifp->if_mtu - hlen) &~ 7;
435	if (len < 8) {
436		error = EMSGSIZE;
437		goto bad;
438	}
439
440    {
441	int mhlen, firstlen = len;
442	struct mbuf **mnext = &m->m_nextpkt;
443
444	/*
445	 * Loop through length of segment after first fragment,
446	 * make new header and copy data of each part and link onto chain.
447	 */
448	m0 = m;
449	mhlen = sizeof (struct ip);
450	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
451		MGETHDR(m, M_DONTWAIT, MT_HEADER);
452		if (m == 0) {
453			error = ENOBUFS;
454			ipstat.ips_odropped++;
455			goto sendorfree;
456		}
457		m->m_data += max_linkhdr;
458		mhip = mtod(m, struct ip *);
459		*mhip = *ip;
460		if (hlen > sizeof (struct ip)) {
461			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
462			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
463		}
464		m->m_len = mhlen;
465		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
466		if (ip->ip_off & IP_MF)
467			mhip->ip_off |= IP_MF;
468		if (off + len >= (u_short)ip->ip_len)
469			len = (u_short)ip->ip_len - off;
470		else
471			mhip->ip_off |= IP_MF;
472		mhip->ip_len = htons((u_short)(len + mhlen));
473		m->m_next = m_copy(m0, off, len);
474		if (m->m_next == 0) {
475			(void) m_free(m);
476			error = ENOBUFS;	/* ??? */
477			ipstat.ips_odropped++;
478			goto sendorfree;
479		}
480		m->m_pkthdr.len = mhlen + len;
481		m->m_pkthdr.rcvif = (struct ifnet *)0;
482		mhip->ip_off = htons((u_short)mhip->ip_off);
483		mhip->ip_sum = 0;
484		if (mhip->ip_vhl == IP_VHL_BORING) {
485			mhip->ip_sum = in_cksum_hdr(mhip);
486		} else {
487			mhip->ip_sum = in_cksum(m, mhlen);
488		}
489		*mnext = m;
490		mnext = &m->m_nextpkt;
491		ipstat.ips_ofragments++;
492	}
493	/*
494	 * Update first fragment by trimming what's been copied out
495	 * and updating header, then send each fragment (in order).
496	 */
497	m = m0;
498	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
499	m->m_pkthdr.len = hlen + firstlen;
500	ip->ip_len = htons((u_short)m->m_pkthdr.len);
501	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
502	ip->ip_sum = 0;
503	if (ip->ip_vhl == IP_VHL_BORING) {
504		ip->ip_sum = in_cksum_hdr(ip);
505	} else {
506		ip->ip_sum = in_cksum(m, hlen);
507	}
508sendorfree:
509	for (m = m0; m; m = m0) {
510		m0 = m->m_nextpkt;
511		m->m_nextpkt = 0;
512		if (error == 0)
513			error = (*ifp->if_output)(ifp, m,
514			    (struct sockaddr *)dst, ro->ro_rt);
515		else
516			m_freem(m);
517	}
518
519	if (error == 0)
520		ipstat.ips_fragmented++;
521    }
522done:
523	return (error);
524bad:
525	m_freem(m0);
526	goto done;
527}
528
529/*
530 * Insert IP options into preformed packet.
531 * Adjust IP destination as required for IP source routing,
532 * as indicated by a non-zero in_addr at the start of the options.
533 *
534 * XXX This routine assumes that the packet has no options in place.
535 */
536static struct mbuf *
537ip_insertoptions(m, opt, phlen)
538	register struct mbuf *m;
539	struct mbuf *opt;
540	int *phlen;
541{
542	register struct ipoption *p = mtod(opt, struct ipoption *);
543	struct mbuf *n;
544	register struct ip *ip = mtod(m, struct ip *);
545	unsigned optlen;
546
547	optlen = opt->m_len - sizeof(p->ipopt_dst);
548	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
549		return (m);		/* XXX should fail */
550	if (p->ipopt_dst.s_addr)
551		ip->ip_dst = p->ipopt_dst;
552	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
553		MGETHDR(n, M_DONTWAIT, MT_HEADER);
554		if (n == 0)
555			return (m);
556		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
557		m->m_len -= sizeof(struct ip);
558		m->m_data += sizeof(struct ip);
559		n->m_next = m;
560		m = n;
561		m->m_len = optlen + sizeof(struct ip);
562		m->m_data += max_linkhdr;
563		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
564	} else {
565		m->m_data -= optlen;
566		m->m_len += optlen;
567		m->m_pkthdr.len += optlen;
568		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
569	}
570	ip = mtod(m, struct ip *);
571	bcopy(p->ipopt_list, ip + 1, optlen);
572	*phlen = sizeof(struct ip) + optlen;
573	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
574	ip->ip_len += optlen;
575	return (m);
576}
577
578/*
579 * Copy options from ip to jp,
580 * omitting those not copied during fragmentation.
581 */
582#if !defined(IPFILTER) && !defined(IPFILTER_LKM)
583static
584#endif
585int
586ip_optcopy(ip, jp)
587	struct ip *ip, *jp;
588{
589	register u_char *cp, *dp;
590	int opt, optlen, cnt;
591
592	cp = (u_char *)(ip + 1);
593	dp = (u_char *)(jp + 1);
594	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
595	for (; cnt > 0; cnt -= optlen, cp += optlen) {
596		opt = cp[0];
597		if (opt == IPOPT_EOL)
598			break;
599		if (opt == IPOPT_NOP) {
600			/* Preserve for IP mcast tunnel's LSRR alignment. */
601			*dp++ = IPOPT_NOP;
602			optlen = 1;
603			continue;
604		} else
605			optlen = cp[IPOPT_OLEN];
606		/* bogus lengths should have been caught by ip_dooptions */
607		if (optlen > cnt)
608			optlen = cnt;
609		if (IPOPT_COPIED(opt)) {
610			bcopy(cp, dp, optlen);
611			dp += optlen;
612		}
613	}
614	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
615		*dp++ = IPOPT_EOL;
616	return (optlen);
617}
618
619/*
620 * IP socket option processing.
621 */
622int
623ip_ctloutput(op, so, level, optname, mp, p)
624	int op;
625	struct socket *so;
626	int level, optname;
627	struct mbuf **mp;
628	struct proc *p;
629{
630	register struct inpcb *inp = sotoinpcb(so);
631	register struct mbuf *m = *mp;
632	register int optval = 0;
633	int error = 0;
634
635	if (level != IPPROTO_IP) {
636		error = EINVAL;
637		if (op == PRCO_SETOPT && *mp)
638			(void) m_free(*mp);
639	} else switch (op) {
640
641	case PRCO_SETOPT:
642		switch (optname) {
643		case IP_OPTIONS:
644#ifdef notyet
645		case IP_RETOPTS:
646			return (ip_pcbopts(optname, &inp->inp_options, m));
647#else
648			return (ip_pcbopts(&inp->inp_options, m));
649#endif
650
651		case IP_TOS:
652		case IP_TTL:
653		case IP_RECVOPTS:
654		case IP_RECVRETOPTS:
655		case IP_RECVDSTADDR:
656		case IP_RECVIF:
657			if (m == 0 || m->m_len != sizeof(int))
658				error = EINVAL;
659			else {
660				optval = *mtod(m, int *);
661				switch (optname) {
662
663				case IP_TOS:
664					inp->inp_ip_tos = optval;
665					break;
666
667				case IP_TTL:
668					inp->inp_ip_ttl = optval;
669					break;
670#define	OPTSET(bit) \
671	if (optval) \
672		inp->inp_flags |= bit; \
673	else \
674		inp->inp_flags &= ~bit;
675
676				case IP_RECVOPTS:
677					OPTSET(INP_RECVOPTS);
678					break;
679
680				case IP_RECVRETOPTS:
681					OPTSET(INP_RECVRETOPTS);
682					break;
683
684				case IP_RECVDSTADDR:
685					OPTSET(INP_RECVDSTADDR);
686					break;
687
688				case IP_RECVIF:
689					OPTSET(INP_RECVIF);
690					break;
691				}
692			}
693			break;
694#undef OPTSET
695
696		case IP_MULTICAST_IF:
697		case IP_MULTICAST_VIF:
698		case IP_MULTICAST_TTL:
699		case IP_MULTICAST_LOOP:
700		case IP_ADD_MEMBERSHIP:
701		case IP_DROP_MEMBERSHIP:
702			error = ip_setmoptions(optname, &inp->inp_moptions, m);
703			break;
704
705		case IP_PORTRANGE:
706			if (m == 0 || m->m_len != sizeof(int))
707				error = EINVAL;
708			else {
709				optval = *mtod(m, int *);
710
711				switch (optval) {
712
713				case IP_PORTRANGE_DEFAULT:
714					inp->inp_flags &= ~(INP_LOWPORT);
715					inp->inp_flags &= ~(INP_HIGHPORT);
716					break;
717
718				case IP_PORTRANGE_HIGH:
719					inp->inp_flags &= ~(INP_LOWPORT);
720					inp->inp_flags |= INP_HIGHPORT;
721					break;
722
723				case IP_PORTRANGE_LOW:
724					inp->inp_flags &= ~(INP_HIGHPORT);
725					inp->inp_flags |= INP_LOWPORT;
726					break;
727
728				default:
729					error = EINVAL;
730					break;
731				}
732			}
733			break;
734
735		default:
736			error = ENOPROTOOPT;
737			break;
738		}
739		if (m)
740			(void)m_free(m);
741		break;
742
743	case PRCO_GETOPT:
744		switch (optname) {
745		case IP_OPTIONS:
746		case IP_RETOPTS:
747			*mp = m = m_get(M_WAIT, MT_SOOPTS);
748			if (inp->inp_options) {
749				m->m_len = inp->inp_options->m_len;
750				bcopy(mtod(inp->inp_options, void *),
751				    mtod(m, void *), m->m_len);
752			} else
753				m->m_len = 0;
754			break;
755
756		case IP_TOS:
757		case IP_TTL:
758		case IP_RECVOPTS:
759		case IP_RECVRETOPTS:
760		case IP_RECVDSTADDR:
761		case IP_RECVIF:
762			*mp = m = m_get(M_WAIT, MT_SOOPTS);
763			m->m_len = sizeof(int);
764			switch (optname) {
765
766			case IP_TOS:
767				optval = inp->inp_ip_tos;
768				break;
769
770			case IP_TTL:
771				optval = inp->inp_ip_ttl;
772				break;
773
774#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
775
776			case IP_RECVOPTS:
777				optval = OPTBIT(INP_RECVOPTS);
778				break;
779
780			case IP_RECVRETOPTS:
781				optval = OPTBIT(INP_RECVRETOPTS);
782				break;
783
784			case IP_RECVDSTADDR:
785				optval = OPTBIT(INP_RECVDSTADDR);
786				break;
787
788			case IP_RECVIF:
789				optval = OPTBIT(INP_RECVIF);
790				break;
791			}
792			*mtod(m, int *) = optval;
793			break;
794
795		case IP_MULTICAST_IF:
796		case IP_MULTICAST_VIF:
797		case IP_MULTICAST_TTL:
798		case IP_MULTICAST_LOOP:
799		case IP_ADD_MEMBERSHIP:
800		case IP_DROP_MEMBERSHIP:
801			error = ip_getmoptions(optname, inp->inp_moptions, mp);
802			break;
803
804		case IP_PORTRANGE:
805			*mp = m = m_get(M_WAIT, MT_SOOPTS);
806			m->m_len = sizeof(int);
807
808			if (inp->inp_flags & INP_HIGHPORT)
809				optval = IP_PORTRANGE_HIGH;
810			else if (inp->inp_flags & INP_LOWPORT)
811				optval = IP_PORTRANGE_LOW;
812			else
813				optval = 0;
814
815			*mtod(m, int *) = optval;
816			break;
817
818		default:
819			error = ENOPROTOOPT;
820			break;
821		}
822		break;
823	}
824	return (error);
825}
826
827/*
828 * Set up IP options in pcb for insertion in output packets.
829 * Store in mbuf with pointer in pcbopt, adding pseudo-option
830 * with destination address if source routed.
831 */
832static int
833#ifdef notyet
834ip_pcbopts(optname, pcbopt, m)
835	int optname;
836#else
837ip_pcbopts(pcbopt, m)
838#endif
839	struct mbuf **pcbopt;
840	register struct mbuf *m;
841{
842	register cnt, optlen;
843	register u_char *cp;
844	u_char opt;
845
846	/* turn off any old options */
847	if (*pcbopt)
848		(void)m_free(*pcbopt);
849	*pcbopt = 0;
850	if (m == (struct mbuf *)0 || m->m_len == 0) {
851		/*
852		 * Only turning off any previous options.
853		 */
854		if (m)
855			(void)m_free(m);
856		return (0);
857	}
858
859#ifndef	vax
860	if (m->m_len % sizeof(long))
861		goto bad;
862#endif
863	/*
864	 * IP first-hop destination address will be stored before
865	 * actual options; move other options back
866	 * and clear it when none present.
867	 */
868	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
869		goto bad;
870	cnt = m->m_len;
871	m->m_len += sizeof(struct in_addr);
872	cp = mtod(m, u_char *) + sizeof(struct in_addr);
873	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
874	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
875
876	for (; cnt > 0; cnt -= optlen, cp += optlen) {
877		opt = cp[IPOPT_OPTVAL];
878		if (opt == IPOPT_EOL)
879			break;
880		if (opt == IPOPT_NOP)
881			optlen = 1;
882		else {
883			optlen = cp[IPOPT_OLEN];
884			if (optlen <= IPOPT_OLEN || optlen > cnt)
885				goto bad;
886		}
887		switch (opt) {
888
889		default:
890			break;
891
892		case IPOPT_LSRR:
893		case IPOPT_SSRR:
894			/*
895			 * user process specifies route as:
896			 *	->A->B->C->D
897			 * D must be our final destination (but we can't
898			 * check that since we may not have connected yet).
899			 * A is first hop destination, which doesn't appear in
900			 * actual IP option, but is stored before the options.
901			 */
902			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
903				goto bad;
904			m->m_len -= sizeof(struct in_addr);
905			cnt -= sizeof(struct in_addr);
906			optlen -= sizeof(struct in_addr);
907			cp[IPOPT_OLEN] = optlen;
908			/*
909			 * Move first hop before start of options.
910			 */
911			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
912			    sizeof(struct in_addr));
913			/*
914			 * Then copy rest of options back
915			 * to close up the deleted entry.
916			 */
917			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
918			    sizeof(struct in_addr)),
919			    (caddr_t)&cp[IPOPT_OFFSET+1],
920			    (unsigned)cnt + sizeof(struct in_addr));
921			break;
922		}
923	}
924	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
925		goto bad;
926	*pcbopt = m;
927	return (0);
928
929bad:
930	(void)m_free(m);
931	return (EINVAL);
932}
933
934/*
935 * Set the IP multicast options in response to user setsockopt().
936 */
937static int
938ip_setmoptions(optname, imop, m)
939	int optname;
940	struct ip_moptions **imop;
941	struct mbuf *m;
942{
943	register int error = 0;
944	u_char loop;
945	register int i;
946	struct in_addr addr;
947	register struct ip_mreq *mreq;
948	register struct ifnet *ifp;
949	register struct ip_moptions *imo = *imop;
950	struct route ro;
951	register struct sockaddr_in *dst;
952	int s;
953
954	if (imo == NULL) {
955		/*
956		 * No multicast option buffer attached to the pcb;
957		 * allocate one and initialize to default values.
958		 */
959		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
960		    M_WAITOK);
961
962		if (imo == NULL)
963			return (ENOBUFS);
964		*imop = imo;
965		imo->imo_multicast_ifp = NULL;
966		imo->imo_multicast_vif = -1;
967		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
968		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
969		imo->imo_num_memberships = 0;
970	}
971
972	switch (optname) {
973	/* store an index number for the vif you wanna use in the send */
974	case IP_MULTICAST_VIF:
975		if (!legal_vif_num) {
976			error = EOPNOTSUPP;
977			break;
978		}
979		if (m == NULL || m->m_len != sizeof(int)) {
980			error = EINVAL;
981			break;
982		}
983		i = *(mtod(m, int *));
984		if (!legal_vif_num(i) && (i != -1)) {
985			error = EINVAL;
986			break;
987		}
988		imo->imo_multicast_vif = i;
989		break;
990
991	case IP_MULTICAST_IF:
992		/*
993		 * Select the interface for outgoing multicast packets.
994		 */
995		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
996			error = EINVAL;
997			break;
998		}
999		addr = *(mtod(m, struct in_addr *));
1000		/*
1001		 * INADDR_ANY is used to remove a previous selection.
1002		 * When no interface is selected, a default one is
1003		 * chosen every time a multicast packet is sent.
1004		 */
1005		if (addr.s_addr == INADDR_ANY) {
1006			imo->imo_multicast_ifp = NULL;
1007			break;
1008		}
1009		/*
1010		 * The selected interface is identified by its local
1011		 * IP address.  Find the interface and confirm that
1012		 * it supports multicasting.
1013		 */
1014		s = splimp();
1015		INADDR_TO_IFP(addr, ifp);
1016		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1017			splx(s);
1018			error = EADDRNOTAVAIL;
1019			break;
1020		}
1021		imo->imo_multicast_ifp = ifp;
1022		splx(s);
1023		break;
1024
1025	case IP_MULTICAST_TTL:
1026		/*
1027		 * Set the IP time-to-live for outgoing multicast packets.
1028		 */
1029		if (m == NULL || m->m_len != 1) {
1030			error = EINVAL;
1031			break;
1032		}
1033		imo->imo_multicast_ttl = *(mtod(m, u_char *));
1034		break;
1035
1036	case IP_MULTICAST_LOOP:
1037		/*
1038		 * Set the loopback flag for outgoing multicast packets.
1039		 * Must be zero or one.
1040		 */
1041		if (m == NULL || m->m_len != 1 ||
1042		   (loop = *(mtod(m, u_char *))) > 1) {
1043			error = EINVAL;
1044			break;
1045		}
1046		imo->imo_multicast_loop = loop;
1047		break;
1048
1049	case IP_ADD_MEMBERSHIP:
1050		/*
1051		 * Add a multicast group membership.
1052		 * Group must be a valid IP multicast address.
1053		 */
1054		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1055			error = EINVAL;
1056			break;
1057		}
1058		mreq = mtod(m, struct ip_mreq *);
1059		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1060			error = EINVAL;
1061			break;
1062		}
1063		s = splimp();
1064		/*
1065		 * If no interface address was provided, use the interface of
1066		 * the route to the given multicast address.
1067		 */
1068		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1069			bzero((caddr_t)&ro, sizeof(ro));
1070			dst = (struct sockaddr_in *)&ro.ro_dst;
1071			dst->sin_len = sizeof(*dst);
1072			dst->sin_family = AF_INET;
1073			dst->sin_addr = mreq->imr_multiaddr;
1074			rtalloc(&ro);
1075			if (ro.ro_rt == NULL) {
1076				error = EADDRNOTAVAIL;
1077				splx(s);
1078				break;
1079			}
1080			ifp = ro.ro_rt->rt_ifp;
1081			rtfree(ro.ro_rt);
1082		}
1083		else {
1084			INADDR_TO_IFP(mreq->imr_interface, ifp);
1085		}
1086
1087		/*
1088		 * See if we found an interface, and confirm that it
1089		 * supports multicast.
1090		 */
1091		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1092			error = EADDRNOTAVAIL;
1093			splx(s);
1094			break;
1095		}
1096		/*
1097		 * See if the membership already exists or if all the
1098		 * membership slots are full.
1099		 */
1100		for (i = 0; i < imo->imo_num_memberships; ++i) {
1101			if (imo->imo_membership[i]->inm_ifp == ifp &&
1102			    imo->imo_membership[i]->inm_addr.s_addr
1103						== mreq->imr_multiaddr.s_addr)
1104				break;
1105		}
1106		if (i < imo->imo_num_memberships) {
1107			error = EADDRINUSE;
1108			splx(s);
1109			break;
1110		}
1111		if (i == IP_MAX_MEMBERSHIPS) {
1112			error = ETOOMANYREFS;
1113			splx(s);
1114			break;
1115		}
1116		/*
1117		 * Everything looks good; add a new record to the multicast
1118		 * address list for the given interface.
1119		 */
1120		if ((imo->imo_membership[i] =
1121		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1122			error = ENOBUFS;
1123			splx(s);
1124			break;
1125		}
1126		++imo->imo_num_memberships;
1127		splx(s);
1128		break;
1129
1130	case IP_DROP_MEMBERSHIP:
1131		/*
1132		 * Drop a multicast group membership.
1133		 * Group must be a valid IP multicast address.
1134		 */
1135		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1136			error = EINVAL;
1137			break;
1138		}
1139		mreq = mtod(m, struct ip_mreq *);
1140		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1141			error = EINVAL;
1142			break;
1143		}
1144
1145		s = splimp();
1146		/*
1147		 * If an interface address was specified, get a pointer
1148		 * to its ifnet structure.
1149		 */
1150		if (mreq->imr_interface.s_addr == INADDR_ANY)
1151			ifp = NULL;
1152		else {
1153			INADDR_TO_IFP(mreq->imr_interface, ifp);
1154			if (ifp == NULL) {
1155				error = EADDRNOTAVAIL;
1156				splx(s);
1157				break;
1158			}
1159		}
1160		/*
1161		 * Find the membership in the membership array.
1162		 */
1163		for (i = 0; i < imo->imo_num_memberships; ++i) {
1164			if ((ifp == NULL ||
1165			     imo->imo_membership[i]->inm_ifp == ifp) &&
1166			     imo->imo_membership[i]->inm_addr.s_addr ==
1167			     mreq->imr_multiaddr.s_addr)
1168				break;
1169		}
1170		if (i == imo->imo_num_memberships) {
1171			error = EADDRNOTAVAIL;
1172			splx(s);
1173			break;
1174		}
1175		/*
1176		 * Give up the multicast address record to which the
1177		 * membership points.
1178		 */
1179		in_delmulti(imo->imo_membership[i]);
1180		/*
1181		 * Remove the gap in the membership array.
1182		 */
1183		for (++i; i < imo->imo_num_memberships; ++i)
1184			imo->imo_membership[i-1] = imo->imo_membership[i];
1185		--imo->imo_num_memberships;
1186		splx(s);
1187		break;
1188
1189	default:
1190		error = EOPNOTSUPP;
1191		break;
1192	}
1193
1194	/*
1195	 * If all options have default values, no need to keep the mbuf.
1196	 */
1197	if (imo->imo_multicast_ifp == NULL &&
1198	    imo->imo_multicast_vif == -1 &&
1199	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1200	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1201	    imo->imo_num_memberships == 0) {
1202		free(*imop, M_IPMOPTS);
1203		*imop = NULL;
1204	}
1205
1206	return (error);
1207}
1208
1209/*
1210 * Return the IP multicast options in response to user getsockopt().
1211 */
1212static int
1213ip_getmoptions(optname, imo, mp)
1214	int optname;
1215	register struct ip_moptions *imo;
1216	register struct mbuf **mp;
1217{
1218	u_char *ttl;
1219	u_char *loop;
1220	struct in_addr *addr;
1221	struct in_ifaddr *ia;
1222
1223	*mp = m_get(M_WAIT, MT_SOOPTS);
1224
1225	switch (optname) {
1226
1227	case IP_MULTICAST_VIF:
1228		if (imo != NULL)
1229			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1230		else
1231			*(mtod(*mp, int *)) = -1;
1232		(*mp)->m_len = sizeof(int);
1233		return(0);
1234
1235	case IP_MULTICAST_IF:
1236		addr = mtod(*mp, struct in_addr *);
1237		(*mp)->m_len = sizeof(struct in_addr);
1238		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1239			addr->s_addr = INADDR_ANY;
1240		else {
1241			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1242			addr->s_addr = (ia == NULL) ? INADDR_ANY
1243					: IA_SIN(ia)->sin_addr.s_addr;
1244		}
1245		return (0);
1246
1247	case IP_MULTICAST_TTL:
1248		ttl = mtod(*mp, u_char *);
1249		(*mp)->m_len = 1;
1250		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1251				     : imo->imo_multicast_ttl;
1252		return (0);
1253
1254	case IP_MULTICAST_LOOP:
1255		loop = mtod(*mp, u_char *);
1256		(*mp)->m_len = 1;
1257		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1258				      : imo->imo_multicast_loop;
1259		return (0);
1260
1261	default:
1262		return (EOPNOTSUPP);
1263	}
1264}
1265
1266/*
1267 * Discard the IP multicast options.
1268 */
1269void
1270ip_freemoptions(imo)
1271	register struct ip_moptions *imo;
1272{
1273	register int i;
1274
1275	if (imo != NULL) {
1276		for (i = 0; i < imo->imo_num_memberships; ++i)
1277			in_delmulti(imo->imo_membership[i]);
1278		free(imo, M_IPMOPTS);
1279	}
1280}
1281
1282/*
1283 * Routine called from ip_output() to loop back a copy of an IP multicast
1284 * packet to the input queue of a specified interface.  Note that this
1285 * calls the output routine of the loopback "driver", but with an interface
1286 * pointer that might NOT be a loopback interface -- evil, but easier than
1287 * replicating that code here.
1288 */
1289static void
1290ip_mloopback(ifp, m, dst, hlen)
1291	struct ifnet *ifp;
1292	register struct mbuf *m;
1293	register struct sockaddr_in *dst;
1294	int hlen;
1295{
1296	register struct ip *ip;
1297	struct mbuf *copym;
1298
1299	copym = m_copy(m, 0, M_COPYALL);
1300	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1301		copym = m_pullup(copym, hlen);
1302	if (copym != NULL) {
1303		/*
1304		 * We don't bother to fragment if the IP length is greater
1305		 * than the interface's MTU.  Can this possibly matter?
1306		 */
1307		ip = mtod(copym, struct ip *);
1308		ip->ip_len = htons((u_short)ip->ip_len);
1309		ip->ip_off = htons((u_short)ip->ip_off);
1310		ip->ip_sum = 0;
1311		if (ip->ip_vhl == IP_VHL_BORING) {
1312			ip->ip_sum = in_cksum_hdr(ip);
1313		} else {
1314			ip->ip_sum = in_cksum(copym, hlen);
1315		}
1316		/*
1317		 * NB:
1318		 * It's not clear whether there are any lingering
1319		 * reentrancy problems in other areas which might
1320		 * be exposed by using ip_input directly (in
1321		 * particular, everything which modifies the packet
1322		 * in-place).  Yet another option is using the
1323		 * protosw directly to deliver the looped back
1324		 * packet.  For the moment, we'll err on the side
1325		 * of safety by continuing to abuse looutput().
1326		 */
1327#ifdef notdef
1328		copym->m_pkthdr.rcvif = ifp;
1329		ip_input(copym)
1330#else
1331		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1332#endif
1333	}
1334}
1335