ip_output.c revision 17072
1123221Stjr/*
2123221Stjr * Copyright (c) 1982, 1986, 1988, 1990, 1993
3123221Stjr *	The Regents of the University of California.  All rights reserved.
4123221Stjr *
5123221Stjr * Redistribution and use in source and binary forms, with or without
6123221Stjr * modification, are permitted provided that the following conditions
7123221Stjr * are met:
8123221Stjr * 1. Redistributions of source code must retain the above copyright
9123221Stjr *    notice, this list of conditions and the following disclaimer.
10123221Stjr * 2. Redistributions in binary form must reproduce the above copyright
11123221Stjr *    notice, this list of conditions and the following disclaimer in the
12123221Stjr *    documentation and/or other materials provided with the distribution.
13123221Stjr * 3. All advertising materials mentioning features or use of this software
14123221Stjr *    must display the following acknowledgement:
15123221Stjr *	This product includes software developed by the University of
16123221Stjr *	California, Berkeley and its contributors.
17123221Stjr * 4. Neither the name of the University nor the names of its contributors
18123221Stjr *    may be used to endorse or promote products derived from this software
19123221Stjr *    without specific prior written permission.
20123221Stjr *
21123221Stjr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22123221Stjr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23123221Stjr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24123221Stjr * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25123221Stjr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26123221Stjr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27123221Stjr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28123221Stjr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29123221Stjr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30123221Stjr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31123221Stjr * SUCH DAMAGE.
32123221Stjr *
33123221Stjr *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34123221Stjr *	$Id: ip_output.c,v 1.40 1996/06/08 08:18:59 bde Exp $
35123221Stjr */
36123221Stjr
37123221Stjr#define _IP_VHL
38123221Stjr
39123221Stjr#include <sys/param.h>
40123221Stjr#include <sys/queue.h>
41123221Stjr#include <sys/systm.h>
42123221Stjr#include <sys/malloc.h>
43123221Stjr#include <sys/mbuf.h>
44123221Stjr#include <sys/errno.h>
45123221Stjr#include <sys/protosw.h>
46123221Stjr#include <sys/socket.h>
47227753Stheraven#include <sys/socketvar.h>
48227753Stheraven
49227753Stheraven#include <net/if.h>
50123221Stjr#include <net/route.h>
51123221Stjr
52123221Stjr#include <netinet/in.h>
53123221Stjr#include <netinet/in_systm.h>
54123221Stjr#include <netinet/ip.h>
55123221Stjr#include <netinet/in_pcb.h>
56123221Stjr#include <netinet/in_var.h>
57123221Stjr#include <netinet/ip_var.h>
58123221Stjr
59123221Stjr#ifdef vax
60123221Stjr#include <machine/mtpr.h>
61123221Stjr#endif
62123221Stjr#include <machine/in_cksum.h>
63123221Stjr
64123221Stjr#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
65123221Stjr#undef COMPAT_IPFW
66123221Stjr#define COMPAT_IPFW 1
67123221Stjr#else
68123221Stjr#undef COMPAT_IPFW
69123221Stjr#endif
70123221Stjr
71123221Stjru_short ip_id;
72123221Stjr
73123221Stjrstatic struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
74123221Stjrstatic void	ip_mloopback
75123221Stjr	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
76123221Stjrstatic int	ip_getmoptions
77123221Stjr	__P((int, struct ip_moptions *, struct mbuf **));
78123221Stjrstatic int	ip_optcopy __P((struct ip *, struct ip *));
79123221Stjrstatic int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
80123221Stjrstatic int	ip_setmoptions
81123221Stjr	__P((int, struct ip_moptions **, struct mbuf *));
82123221Stjr
83123221Stjrextern	struct protosw inetsw[];
84123221Stjr
85123221Stjr/*
86123221Stjr * IP output.  The packet in mbuf chain m contains a skeletal IP
87131608Sru * header (with len, off, ttl, proto, tos, src, dst).
88123221Stjr * The mbuf chain containing the packet will be freed.
89123221Stjr * The mbuf opt, if present, will not be freed.
90131608Sru */
91123221Stjrint
92123221Stjrip_output(m0, opt, ro, flags, imo)
93131608Sru	struct mbuf *m0;
94123221Stjr	struct mbuf *opt;
95123221Stjr	struct route *ro;
96131608Sru	int flags;
97123221Stjr	struct ip_moptions *imo;
98123221Stjr{
99123221Stjr	struct ip *ip, *mhip;
100123221Stjr	struct ifnet *ifp;
101123221Stjr	struct mbuf *m = m0;
102123221Stjr	int hlen = sizeof (struct ip);
103123221Stjr	int len, off, error = 0;
104123221Stjr	struct sockaddr_in *dst;
105123221Stjr	struct in_ifaddr *ia;
106131608Sru	int isbroadcast;
107123221Stjr
108131608Sru#ifdef	DIAGNOSTIC
109123221Stjr	if ((m->m_flags & M_PKTHDR) == 0)
110131608Sru		panic("ip_output no HDR");
111123221Stjr	if (!ro)
112131608Sru		panic("ip_output no route, proto = %d",
113123221Stjr		      mtod(m, struct ip *)->ip_p);
114131608Sru#endif
115123221Stjr	if (opt) {
116131608Sru		m = ip_insertoptions(m, opt, &len);
117123221Stjr		hlen = len;
118131608Sru	}
119123221Stjr	ip = mtod(m, struct ip *);
120123221Stjr	/*
121131608Sru	 * Fill in IP header.
122123221Stjr	 */
123123221Stjr	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
124131608Sru		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
125123221Stjr		ip->ip_off &= IP_DF;
126123221Stjr		ip->ip_id = htons(ip_id++);
127131608Sru		ipstat.ips_localout++;
128123221Stjr	} else {
129123221Stjr		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
130131608Sru	}
131123221Stjr
132123221Stjr	dst = (struct sockaddr_in *)&ro->ro_dst;
133131608Sru	/*
134123221Stjr	 * If there is a cached route,
135123221Stjr	 * check that it is to the same destination
136131608Sru	 * and is still up.  If not, free it and try again.
137123221Stjr	 */
138131608Sru	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
139123221Stjr	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
140131608Sru		RTFREE(ro->ro_rt);
141123221Stjr		ro->ro_rt = (struct rtentry *)0;
142131608Sru	}
143123221Stjr	if (ro->ro_rt == 0) {
144131608Sru		dst->sin_family = AF_INET;
145123221Stjr		dst->sin_len = sizeof(*dst);
146131608Sru		dst->sin_addr = ip->ip_dst;
147123221Stjr	}
148131608Sru	/*
149123221Stjr	 * If routing to interface only,
150131608Sru	 * short circuit routing lookup.
151123221Stjr	 */
152123221Stjr#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
153123221Stjr#define sintosa(sin)	((struct sockaddr *)(sin))
154123221Stjr	if (flags & IP_ROUTETOIF) {
155123221Stjr		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
156123221Stjr		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
157123221Stjr			ipstat.ips_noroute++;
158123221Stjr			error = ENETUNREACH;
159123221Stjr			goto bad;
160131608Sru		}
161123221Stjr		ifp = ia->ia_ifp;
162123221Stjr		ip->ip_ttl = 1;
163131608Sru		isbroadcast = in_broadcast(dst->sin_addr, ifp);
164123221Stjr	} else {
165131608Sru		/*
166123221Stjr		 * If this is the case, we probably don't want to allocate
167131608Sru		 * a protocol-cloned route since we didn't get one from the
168123221Stjr		 * ULP.  This lets TCP do its thing, while not burdening
169131608Sru		 * forwarding or ICMP with the overhead of cloning a route.
170123221Stjr		 * Of course, we still want to do any cloning requested by
171131608Sru		 * the link layer, as this is probably required in all cases
172123221Stjr		 * for correct operation (as it is for ARP).
173131608Sru		 */
174123221Stjr		if (ro->ro_rt == 0)
175131608Sru			rtalloc_ign(ro, RTF_PRCLONING);
176123221Stjr		if (ro->ro_rt == 0) {
177131608Sru			ipstat.ips_noroute++;
178123221Stjr			error = EHOSTUNREACH;
179131608Sru			goto bad;
180123221Stjr		}
181131608Sru		ia = ifatoia(ro->ro_rt->rt_ifa);
182123221Stjr		ifp = ro->ro_rt->rt_ifp;
183131608Sru		ro->ro_rt->rt_use++;
184123221Stjr		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
185131608Sru			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
186123221Stjr		if (ro->ro_rt->rt_flags & RTF_HOST)
187131608Sru			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
188123221Stjr		else
189131608Sru			isbroadcast = in_broadcast(dst->sin_addr, ifp);
190123221Stjr	}
191131608Sru	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
192123221Stjr		struct in_multi *inm;
193123221Stjr
194123221Stjr		m->m_flags |= M_MCAST;
195123221Stjr		/*
196123221Stjr		 * IP destination address is multicast.  Make sure "dst"
197123221Stjr		 * still points to the address in "ro".  (It may have been
198123221Stjr		 * changed to point to a gateway address, above.)
199123221Stjr		 */
200123221Stjr		dst = (struct sockaddr_in *)&ro->ro_dst;
201123221Stjr		/*
202227753Stheraven		 * See if the caller provided any multicast options
203227753Stheraven		 */
204227753Stheraven		if (imo != NULL) {
205227753Stheraven			ip->ip_ttl = imo->imo_multicast_ttl;
206228199Sobrien			if (imo->imo_multicast_ifp != NULL)
207123221Stjr				ifp = imo->imo_multicast_ifp;
208123221Stjr			if (imo->imo_multicast_vif != -1)
209123221Stjr				ip->ip_src.s_addr =
210123221Stjr				    ip_mcast_src(imo->imo_multicast_vif);
211123221Stjr		} else
212131608Sru			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
213123221Stjr		/*
214123221Stjr		 * Confirm that the outgoing interface supports multicast.
215228199Sobrien		 */
216227753Stheraven		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
217228199Sobrien			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
218228199Sobrien				ipstat.ips_noroute++;
219227753Stheraven				error = ENETUNREACH;
220227753Stheraven				goto bad;
221227753Stheraven			}
222227753Stheraven		}
223123221Stjr		/*
224123221Stjr		 * If source address not specified yet, use address
225123221Stjr		 * of outgoing interface.
226123221Stjr		 */
227123221Stjr		if (ip->ip_src.s_addr == INADDR_ANY) {
228123221Stjr			register struct in_ifaddr *ia;
229123221Stjr
230123221Stjr			for (ia = in_ifaddr; ia; ia = ia->ia_next)
231123221Stjr				if (ia->ia_ifp == ifp) {
232123221Stjr					ip->ip_src = IA_SIN(ia)->sin_addr;
233123221Stjr					break;
234123221Stjr				}
235123221Stjr		}
236123221Stjr
237123221Stjr		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
238		if (inm != NULL &&
239		   (imo == NULL || imo->imo_multicast_loop)) {
240			/*
241			 * If we belong to the destination multicast group
242			 * on the outgoing interface, and the caller did not
243			 * forbid loopback, loop back a copy.
244			 */
245			ip_mloopback(ifp, m, dst);
246		}
247		else {
248			/*
249			 * If we are acting as a multicast router, perform
250			 * multicast forwarding as if the packet had just
251			 * arrived on the interface to which we are about
252			 * to send.  The multicast forwarding function
253			 * recursively calls this function, using the
254			 * IP_FORWARDING flag to prevent infinite recursion.
255			 *
256			 * Multicasts that are looped back by ip_mloopback(),
257			 * above, will be forwarded by the ip_input() routine,
258			 * if necessary.
259			 */
260			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
261				/*
262				 * Check if rsvp daemon is running. If not, don't
263				 * set ip_moptions. This ensures that the packet
264				 * is multicast and not just sent down one link
265				 * as prescribed by rsvpd.
266				 */
267				if (!rsvp_on)
268				  imo = NULL;
269				if (ip_mforward(ip, ifp, m, imo) != 0) {
270					m_freem(m);
271					goto done;
272				}
273			}
274		}
275
276		/*
277		 * Multicasts with a time-to-live of zero may be looped-
278		 * back, above, but must not be transmitted on a network.
279		 * Also, multicasts addressed to the loopback interface
280		 * are not sent -- the above call to ip_mloopback() will
281		 * loop back a copy if this host actually belongs to the
282		 * destination group on the loopback interface.
283		 */
284		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
285			m_freem(m);
286			goto done;
287		}
288
289		goto sendit;
290	}
291#ifndef notdef
292	/*
293	 * If source address not specified yet, use address
294	 * of outgoing interface.
295	 */
296	if (ip->ip_src.s_addr == INADDR_ANY)
297		ip->ip_src = IA_SIN(ia)->sin_addr;
298#endif
299	/*
300	 * Verify that we have any chance at all of being able to queue
301	 *      the packet or packet fragments
302	 */
303	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
304		ifp->if_snd.ifq_maxlen) {
305			error = ENOBUFS;
306			goto bad;
307	}
308
309	/*
310	 * Look for broadcast address and
311	 * and verify user is allowed to send
312	 * such a packet.
313	 */
314	if (isbroadcast) {
315		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
316			error = EADDRNOTAVAIL;
317			goto bad;
318		}
319		if ((flags & IP_ALLOWBROADCAST) == 0) {
320			error = EACCES;
321			goto bad;
322		}
323		/* don't allow broadcast messages to be fragmented */
324		if ((u_short)ip->ip_len > ifp->if_mtu) {
325			error = EMSGSIZE;
326			goto bad;
327		}
328		m->m_flags |= M_BCAST;
329	} else {
330		m->m_flags &= ~M_BCAST;
331	}
332
333sendit:
334#ifdef COMPAT_IPFW
335	/*
336	 * Check with the firewall...
337	 */
338	if (ip_fw_chk_ptr) {
339		int action;
340
341#ifdef IPDIVERT
342		action = (*ip_fw_chk_ptr)(&ip,
343				hlen, ifp, (~0 << 16) | ip_divert_ignore, &m);
344#else
345		action = (*ip_fw_chk_ptr)(&ip, hlen, ifp, (~0 << 16), &m);
346#endif
347		if (action == -1) {
348			error = EACCES;		/* XXX is this appropriate? */
349			goto done;
350		} else if (action != 0) {
351#ifdef IPDIVERT
352			ip_divert_port = action;	/* divert to port */
353			(*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, 0);
354			goto done;
355#else
356			m_freem(m);	/* ipfw says divert, but we can't */
357			goto done;
358#endif
359		}
360	}
361#endif /* COMPAT_IPFW */
362
363	/*
364	 * If small enough for interface, can just send directly.
365	 */
366	if ((u_short)ip->ip_len <= ifp->if_mtu) {
367		ip->ip_len = htons((u_short)ip->ip_len);
368		ip->ip_off = htons((u_short)ip->ip_off);
369		ip->ip_sum = 0;
370		if (ip->ip_vhl == IP_VHL_BORING) {
371			ip->ip_sum = in_cksum_hdr(ip);
372		} else {
373			ip->ip_sum = in_cksum(m, hlen);
374		}
375		error = (*ifp->if_output)(ifp, m,
376				(struct sockaddr *)dst, ro->ro_rt);
377		goto done;
378	}
379	/*
380	 * Too large for interface; fragment if possible.
381	 * Must be able to put at least 8 bytes per fragment.
382	 */
383	if (ip->ip_off & IP_DF) {
384		error = EMSGSIZE;
385		/*
386		 * This case can happen if the user changed the MTU
387		 * of an interface after enabling IP on it.  Because
388		 * most netifs don't keep track of routes pointing to
389		 * them, there is no way for one to update all its
390		 * routes when the MTU is changed.
391		 */
392		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
393		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
394		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
395			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
396		}
397		ipstat.ips_cantfrag++;
398		goto bad;
399	}
400	len = (ifp->if_mtu - hlen) &~ 7;
401	if (len < 8) {
402		error = EMSGSIZE;
403		goto bad;
404	}
405
406    {
407	int mhlen, firstlen = len;
408	struct mbuf **mnext = &m->m_nextpkt;
409
410	/*
411	 * Loop through length of segment after first fragment,
412	 * make new header and copy data of each part and link onto chain.
413	 */
414	m0 = m;
415	mhlen = sizeof (struct ip);
416	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
417		MGETHDR(m, M_DONTWAIT, MT_HEADER);
418		if (m == 0) {
419			error = ENOBUFS;
420			ipstat.ips_odropped++;
421			goto sendorfree;
422		}
423		m->m_data += max_linkhdr;
424		mhip = mtod(m, struct ip *);
425		*mhip = *ip;
426		if (hlen > sizeof (struct ip)) {
427			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
428			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
429		}
430		m->m_len = mhlen;
431		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
432		if (ip->ip_off & IP_MF)
433			mhip->ip_off |= IP_MF;
434		if (off + len >= (u_short)ip->ip_len)
435			len = (u_short)ip->ip_len - off;
436		else
437			mhip->ip_off |= IP_MF;
438		mhip->ip_len = htons((u_short)(len + mhlen));
439		m->m_next = m_copy(m0, off, len);
440		if (m->m_next == 0) {
441			(void) m_free(m);
442			error = ENOBUFS;	/* ??? */
443			ipstat.ips_odropped++;
444			goto sendorfree;
445		}
446		m->m_pkthdr.len = mhlen + len;
447		m->m_pkthdr.rcvif = (struct ifnet *)0;
448		mhip->ip_off = htons((u_short)mhip->ip_off);
449		mhip->ip_sum = 0;
450		if (mhip->ip_vhl == IP_VHL_BORING) {
451			mhip->ip_sum = in_cksum_hdr(mhip);
452		} else {
453			mhip->ip_sum = in_cksum(m, mhlen);
454		}
455		*mnext = m;
456		mnext = &m->m_nextpkt;
457		ipstat.ips_ofragments++;
458	}
459	/*
460	 * Update first fragment by trimming what's been copied out
461	 * and updating header, then send each fragment (in order).
462	 */
463	m = m0;
464	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
465	m->m_pkthdr.len = hlen + firstlen;
466	ip->ip_len = htons((u_short)m->m_pkthdr.len);
467	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
468	ip->ip_sum = 0;
469	if (ip->ip_vhl == IP_VHL_BORING) {
470		ip->ip_sum = in_cksum_hdr(ip);
471	} else {
472		ip->ip_sum = in_cksum(m, hlen);
473	}
474sendorfree:
475	for (m = m0; m; m = m0) {
476		m0 = m->m_nextpkt;
477		m->m_nextpkt = 0;
478		if (error == 0)
479			error = (*ifp->if_output)(ifp, m,
480			    (struct sockaddr *)dst, ro->ro_rt);
481		else
482			m_freem(m);
483	}
484
485	if (error == 0)
486		ipstat.ips_fragmented++;
487    }
488done:
489	return (error);
490bad:
491	m_freem(m0);
492	goto done;
493}
494
495/*
496 * Insert IP options into preformed packet.
497 * Adjust IP destination as required for IP source routing,
498 * as indicated by a non-zero in_addr at the start of the options.
499 *
500 * XXX This routine assumes that the packet has no options in place.
501 */
502static struct mbuf *
503ip_insertoptions(m, opt, phlen)
504	register struct mbuf *m;
505	struct mbuf *opt;
506	int *phlen;
507{
508	register struct ipoption *p = mtod(opt, struct ipoption *);
509	struct mbuf *n;
510	register struct ip *ip = mtod(m, struct ip *);
511	unsigned optlen;
512
513	optlen = opt->m_len - sizeof(p->ipopt_dst);
514	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
515		return (m);		/* XXX should fail */
516	if (p->ipopt_dst.s_addr)
517		ip->ip_dst = p->ipopt_dst;
518	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
519		MGETHDR(n, M_DONTWAIT, MT_HEADER);
520		if (n == 0)
521			return (m);
522		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
523		m->m_len -= sizeof(struct ip);
524		m->m_data += sizeof(struct ip);
525		n->m_next = m;
526		m = n;
527		m->m_len = optlen + sizeof(struct ip);
528		m->m_data += max_linkhdr;
529		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
530	} else {
531		m->m_data -= optlen;
532		m->m_len += optlen;
533		m->m_pkthdr.len += optlen;
534		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
535	}
536	ip = mtod(m, struct ip *);
537	bcopy(p->ipopt_list, ip + 1, optlen);
538	*phlen = sizeof(struct ip) + optlen;
539	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
540	ip->ip_len += optlen;
541	return (m);
542}
543
544/*
545 * Copy options from ip to jp,
546 * omitting those not copied during fragmentation.
547 */
548static int
549ip_optcopy(ip, jp)
550	struct ip *ip, *jp;
551{
552	register u_char *cp, *dp;
553	int opt, optlen, cnt;
554
555	cp = (u_char *)(ip + 1);
556	dp = (u_char *)(jp + 1);
557	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
558	for (; cnt > 0; cnt -= optlen, cp += optlen) {
559		opt = cp[0];
560		if (opt == IPOPT_EOL)
561			break;
562		if (opt == IPOPT_NOP) {
563			/* Preserve for IP mcast tunnel's LSRR alignment. */
564			*dp++ = IPOPT_NOP;
565			optlen = 1;
566			continue;
567		} else
568			optlen = cp[IPOPT_OLEN];
569		/* bogus lengths should have been caught by ip_dooptions */
570		if (optlen > cnt)
571			optlen = cnt;
572		if (IPOPT_COPIED(opt)) {
573			bcopy(cp, dp, optlen);
574			dp += optlen;
575		}
576	}
577	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
578		*dp++ = IPOPT_EOL;
579	return (optlen);
580}
581
582/*
583 * IP socket option processing.
584 */
585int
586ip_ctloutput(op, so, level, optname, mp)
587	int op;
588	struct socket *so;
589	int level, optname;
590	struct mbuf **mp;
591{
592	register struct inpcb *inp = sotoinpcb(so);
593	register struct mbuf *m = *mp;
594	register int optval = 0;
595	int error = 0;
596
597	if (level != IPPROTO_IP) {
598		error = EINVAL;
599		if (op == PRCO_SETOPT && *mp)
600			(void) m_free(*mp);
601	} else switch (op) {
602
603	case PRCO_SETOPT:
604		switch (optname) {
605		case IP_OPTIONS:
606#ifdef notyet
607		case IP_RETOPTS:
608			return (ip_pcbopts(optname, &inp->inp_options, m));
609#else
610			return (ip_pcbopts(&inp->inp_options, m));
611#endif
612
613		case IP_TOS:
614		case IP_TTL:
615		case IP_RECVOPTS:
616		case IP_RECVRETOPTS:
617		case IP_RECVDSTADDR:
618			if (m == 0 || m->m_len != sizeof(int))
619				error = EINVAL;
620			else {
621				optval = *mtod(m, int *);
622				switch (optname) {
623
624				case IP_TOS:
625					inp->inp_ip.ip_tos = optval;
626					break;
627
628				case IP_TTL:
629					inp->inp_ip.ip_ttl = optval;
630					break;
631#define	OPTSET(bit) \
632	if (optval) \
633		inp->inp_flags |= bit; \
634	else \
635		inp->inp_flags &= ~bit;
636
637				case IP_RECVOPTS:
638					OPTSET(INP_RECVOPTS);
639					break;
640
641				case IP_RECVRETOPTS:
642					OPTSET(INP_RECVRETOPTS);
643					break;
644
645				case IP_RECVDSTADDR:
646					OPTSET(INP_RECVDSTADDR);
647					break;
648				}
649			}
650			break;
651#undef OPTSET
652
653		case IP_MULTICAST_IF:
654		case IP_MULTICAST_VIF:
655		case IP_MULTICAST_TTL:
656		case IP_MULTICAST_LOOP:
657		case IP_ADD_MEMBERSHIP:
658		case IP_DROP_MEMBERSHIP:
659			error = ip_setmoptions(optname, &inp->inp_moptions, m);
660			break;
661
662		case IP_PORTRANGE:
663			if (m == 0 || m->m_len != sizeof(int))
664				error = EINVAL;
665			else {
666				optval = *mtod(m, int *);
667
668				switch (optval) {
669
670				case IP_PORTRANGE_DEFAULT:
671					inp->inp_flags &= ~(INP_LOWPORT);
672					inp->inp_flags &= ~(INP_HIGHPORT);
673					break;
674
675				case IP_PORTRANGE_HIGH:
676					inp->inp_flags &= ~(INP_LOWPORT);
677					inp->inp_flags |= INP_HIGHPORT;
678					break;
679
680				case IP_PORTRANGE_LOW:
681					inp->inp_flags &= ~(INP_HIGHPORT);
682					inp->inp_flags |= INP_LOWPORT;
683					break;
684
685				default:
686					error = EINVAL;
687					break;
688				}
689			}
690			break;
691
692		default:
693			error = ENOPROTOOPT;
694			break;
695		}
696		if (m)
697			(void)m_free(m);
698		break;
699
700	case PRCO_GETOPT:
701		switch (optname) {
702		case IP_OPTIONS:
703		case IP_RETOPTS:
704			*mp = m = m_get(M_WAIT, MT_SOOPTS);
705			if (inp->inp_options) {
706				m->m_len = inp->inp_options->m_len;
707				bcopy(mtod(inp->inp_options, void *),
708				    mtod(m, void *), m->m_len);
709			} else
710				m->m_len = 0;
711			break;
712
713		case IP_TOS:
714		case IP_TTL:
715		case IP_RECVOPTS:
716		case IP_RECVRETOPTS:
717		case IP_RECVDSTADDR:
718			*mp = m = m_get(M_WAIT, MT_SOOPTS);
719			m->m_len = sizeof(int);
720			switch (optname) {
721
722			case IP_TOS:
723				optval = inp->inp_ip.ip_tos;
724				break;
725
726			case IP_TTL:
727				optval = inp->inp_ip.ip_ttl;
728				break;
729
730#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
731
732			case IP_RECVOPTS:
733				optval = OPTBIT(INP_RECVOPTS);
734				break;
735
736			case IP_RECVRETOPTS:
737				optval = OPTBIT(INP_RECVRETOPTS);
738				break;
739
740			case IP_RECVDSTADDR:
741				optval = OPTBIT(INP_RECVDSTADDR);
742				break;
743			}
744			*mtod(m, int *) = optval;
745			break;
746
747		case IP_MULTICAST_IF:
748		case IP_MULTICAST_VIF:
749		case IP_MULTICAST_TTL:
750		case IP_MULTICAST_LOOP:
751		case IP_ADD_MEMBERSHIP:
752		case IP_DROP_MEMBERSHIP:
753			error = ip_getmoptions(optname, inp->inp_moptions, mp);
754			break;
755
756		case IP_PORTRANGE:
757			*mp = m = m_get(M_WAIT, MT_SOOPTS);
758			m->m_len = sizeof(int);
759
760			if (inp->inp_flags & INP_HIGHPORT)
761				optval = IP_PORTRANGE_HIGH;
762			else if (inp->inp_flags & INP_LOWPORT)
763				optval = IP_PORTRANGE_LOW;
764			else
765				optval = 0;
766
767			*mtod(m, int *) = optval;
768			break;
769
770		default:
771			error = ENOPROTOOPT;
772			break;
773		}
774		break;
775	}
776	return (error);
777}
778
779/*
780 * Set up IP options in pcb for insertion in output packets.
781 * Store in mbuf with pointer in pcbopt, adding pseudo-option
782 * with destination address if source routed.
783 */
784static int
785#ifdef notyet
786ip_pcbopts(optname, pcbopt, m)
787	int optname;
788#else
789ip_pcbopts(pcbopt, m)
790#endif
791	struct mbuf **pcbopt;
792	register struct mbuf *m;
793{
794	register cnt, optlen;
795	register u_char *cp;
796	u_char opt;
797
798	/* turn off any old options */
799	if (*pcbopt)
800		(void)m_free(*pcbopt);
801	*pcbopt = 0;
802	if (m == (struct mbuf *)0 || m->m_len == 0) {
803		/*
804		 * Only turning off any previous options.
805		 */
806		if (m)
807			(void)m_free(m);
808		return (0);
809	}
810
811#ifndef	vax
812	if (m->m_len % sizeof(long))
813		goto bad;
814#endif
815	/*
816	 * IP first-hop destination address will be stored before
817	 * actual options; move other options back
818	 * and clear it when none present.
819	 */
820	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
821		goto bad;
822	cnt = m->m_len;
823	m->m_len += sizeof(struct in_addr);
824	cp = mtod(m, u_char *) + sizeof(struct in_addr);
825	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
826	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
827
828	for (; cnt > 0; cnt -= optlen, cp += optlen) {
829		opt = cp[IPOPT_OPTVAL];
830		if (opt == IPOPT_EOL)
831			break;
832		if (opt == IPOPT_NOP)
833			optlen = 1;
834		else {
835			optlen = cp[IPOPT_OLEN];
836			if (optlen <= IPOPT_OLEN || optlen > cnt)
837				goto bad;
838		}
839		switch (opt) {
840
841		default:
842			break;
843
844		case IPOPT_LSRR:
845		case IPOPT_SSRR:
846			/*
847			 * user process specifies route as:
848			 *	->A->B->C->D
849			 * D must be our final destination (but we can't
850			 * check that since we may not have connected yet).
851			 * A is first hop destination, which doesn't appear in
852			 * actual IP option, but is stored before the options.
853			 */
854			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
855				goto bad;
856			m->m_len -= sizeof(struct in_addr);
857			cnt -= sizeof(struct in_addr);
858			optlen -= sizeof(struct in_addr);
859			cp[IPOPT_OLEN] = optlen;
860			/*
861			 * Move first hop before start of options.
862			 */
863			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
864			    sizeof(struct in_addr));
865			/*
866			 * Then copy rest of options back
867			 * to close up the deleted entry.
868			 */
869			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
870			    sizeof(struct in_addr)),
871			    (caddr_t)&cp[IPOPT_OFFSET+1],
872			    (unsigned)cnt + sizeof(struct in_addr));
873			break;
874		}
875	}
876	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
877		goto bad;
878	*pcbopt = m;
879	return (0);
880
881bad:
882	(void)m_free(m);
883	return (EINVAL);
884}
885
886/*
887 * Set the IP multicast options in response to user setsockopt().
888 */
889static int
890ip_setmoptions(optname, imop, m)
891	int optname;
892	struct ip_moptions **imop;
893	struct mbuf *m;
894{
895	register int error = 0;
896	u_char loop;
897	register int i;
898	struct in_addr addr;
899	register struct ip_mreq *mreq;
900	register struct ifnet *ifp;
901	register struct ip_moptions *imo = *imop;
902	struct route ro;
903	register struct sockaddr_in *dst;
904	int s;
905
906	if (imo == NULL) {
907		/*
908		 * No multicast option buffer attached to the pcb;
909		 * allocate one and initialize to default values.
910		 */
911		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
912		    M_WAITOK);
913
914		if (imo == NULL)
915			return (ENOBUFS);
916		*imop = imo;
917		imo->imo_multicast_ifp = NULL;
918		imo->imo_multicast_vif = -1;
919		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
920		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
921		imo->imo_num_memberships = 0;
922	}
923
924	switch (optname) {
925	/* store an index number for the vif you wanna use in the send */
926	case IP_MULTICAST_VIF:
927		if (!legal_vif_num) {
928			error = EOPNOTSUPP;
929			break;
930		}
931		if (m == NULL || m->m_len != sizeof(int)) {
932			error = EINVAL;
933			break;
934		}
935		i = *(mtod(m, int *));
936		if (!legal_vif_num(i) && (i != -1)) {
937			error = EINVAL;
938			break;
939		}
940		imo->imo_multicast_vif = i;
941		break;
942
943	case IP_MULTICAST_IF:
944		/*
945		 * Select the interface for outgoing multicast packets.
946		 */
947		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
948			error = EINVAL;
949			break;
950		}
951		addr = *(mtod(m, struct in_addr *));
952		/*
953		 * INADDR_ANY is used to remove a previous selection.
954		 * When no interface is selected, a default one is
955		 * chosen every time a multicast packet is sent.
956		 */
957		if (addr.s_addr == INADDR_ANY) {
958			imo->imo_multicast_ifp = NULL;
959			break;
960		}
961		/*
962		 * The selected interface is identified by its local
963		 * IP address.  Find the interface and confirm that
964		 * it supports multicasting.
965		 */
966		s = splimp();
967		INADDR_TO_IFP(addr, ifp);
968		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
969			splx(s);
970			error = EADDRNOTAVAIL;
971			break;
972		}
973		imo->imo_multicast_ifp = ifp;
974		splx(s);
975		break;
976
977	case IP_MULTICAST_TTL:
978		/*
979		 * Set the IP time-to-live for outgoing multicast packets.
980		 */
981		if (m == NULL || m->m_len != 1) {
982			error = EINVAL;
983			break;
984		}
985		imo->imo_multicast_ttl = *(mtod(m, u_char *));
986		break;
987
988	case IP_MULTICAST_LOOP:
989		/*
990		 * Set the loopback flag for outgoing multicast packets.
991		 * Must be zero or one.
992		 */
993		if (m == NULL || m->m_len != 1 ||
994		   (loop = *(mtod(m, u_char *))) > 1) {
995			error = EINVAL;
996			break;
997		}
998		imo->imo_multicast_loop = loop;
999		break;
1000
1001	case IP_ADD_MEMBERSHIP:
1002		/*
1003		 * Add a multicast group membership.
1004		 * Group must be a valid IP multicast address.
1005		 */
1006		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1007			error = EINVAL;
1008			break;
1009		}
1010		mreq = mtod(m, struct ip_mreq *);
1011		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1012			error = EINVAL;
1013			break;
1014		}
1015		s = splimp();
1016		/*
1017		 * If no interface address was provided, use the interface of
1018		 * the route to the given multicast address.
1019		 */
1020		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1021			bzero((caddr_t)&ro, sizeof(ro));
1022			dst = (struct sockaddr_in *)&ro.ro_dst;
1023			dst->sin_len = sizeof(*dst);
1024			dst->sin_family = AF_INET;
1025			dst->sin_addr = mreq->imr_multiaddr;
1026			rtalloc(&ro);
1027			if (ro.ro_rt == NULL) {
1028				error = EADDRNOTAVAIL;
1029				splx(s);
1030				break;
1031			}
1032			ifp = ro.ro_rt->rt_ifp;
1033			rtfree(ro.ro_rt);
1034		}
1035		else {
1036			INADDR_TO_IFP(mreq->imr_interface, ifp);
1037		}
1038
1039		/*
1040		 * See if we found an interface, and confirm that it
1041		 * supports multicast.
1042		 */
1043		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1044			error = EADDRNOTAVAIL;
1045			splx(s);
1046			break;
1047		}
1048		/*
1049		 * See if the membership already exists or if all the
1050		 * membership slots are full.
1051		 */
1052		for (i = 0; i < imo->imo_num_memberships; ++i) {
1053			if (imo->imo_membership[i]->inm_ifp == ifp &&
1054			    imo->imo_membership[i]->inm_addr.s_addr
1055						== mreq->imr_multiaddr.s_addr)
1056				break;
1057		}
1058		if (i < imo->imo_num_memberships) {
1059			error = EADDRINUSE;
1060			splx(s);
1061			break;
1062		}
1063		if (i == IP_MAX_MEMBERSHIPS) {
1064			error = ETOOMANYREFS;
1065			splx(s);
1066			break;
1067		}
1068		/*
1069		 * Everything looks good; add a new record to the multicast
1070		 * address list for the given interface.
1071		 */
1072		if ((imo->imo_membership[i] =
1073		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1074			error = ENOBUFS;
1075			splx(s);
1076			break;
1077		}
1078		++imo->imo_num_memberships;
1079		splx(s);
1080		break;
1081
1082	case IP_DROP_MEMBERSHIP:
1083		/*
1084		 * Drop a multicast group membership.
1085		 * Group must be a valid IP multicast address.
1086		 */
1087		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1088			error = EINVAL;
1089			break;
1090		}
1091		mreq = mtod(m, struct ip_mreq *);
1092		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1093			error = EINVAL;
1094			break;
1095		}
1096
1097		s = splimp();
1098		/*
1099		 * If an interface address was specified, get a pointer
1100		 * to its ifnet structure.
1101		 */
1102		if (mreq->imr_interface.s_addr == INADDR_ANY)
1103			ifp = NULL;
1104		else {
1105			INADDR_TO_IFP(mreq->imr_interface, ifp);
1106			if (ifp == NULL) {
1107				error = EADDRNOTAVAIL;
1108				splx(s);
1109				break;
1110			}
1111		}
1112		/*
1113		 * Find the membership in the membership array.
1114		 */
1115		for (i = 0; i < imo->imo_num_memberships; ++i) {
1116			if ((ifp == NULL ||
1117			     imo->imo_membership[i]->inm_ifp == ifp) &&
1118			     imo->imo_membership[i]->inm_addr.s_addr ==
1119			     mreq->imr_multiaddr.s_addr)
1120				break;
1121		}
1122		if (i == imo->imo_num_memberships) {
1123			error = EADDRNOTAVAIL;
1124			splx(s);
1125			break;
1126		}
1127		/*
1128		 * Give up the multicast address record to which the
1129		 * membership points.
1130		 */
1131		in_delmulti(imo->imo_membership[i]);
1132		/*
1133		 * Remove the gap in the membership array.
1134		 */
1135		for (++i; i < imo->imo_num_memberships; ++i)
1136			imo->imo_membership[i-1] = imo->imo_membership[i];
1137		--imo->imo_num_memberships;
1138		splx(s);
1139		break;
1140
1141	default:
1142		error = EOPNOTSUPP;
1143		break;
1144	}
1145
1146	/*
1147	 * If all options have default values, no need to keep the mbuf.
1148	 */
1149	if (imo->imo_multicast_ifp == NULL &&
1150	    imo->imo_multicast_vif == -1 &&
1151	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1152	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1153	    imo->imo_num_memberships == 0) {
1154		free(*imop, M_IPMOPTS);
1155		*imop = NULL;
1156	}
1157
1158	return (error);
1159}
1160
1161/*
1162 * Return the IP multicast options in response to user getsockopt().
1163 */
1164static int
1165ip_getmoptions(optname, imo, mp)
1166	int optname;
1167	register struct ip_moptions *imo;
1168	register struct mbuf **mp;
1169{
1170	u_char *ttl;
1171	u_char *loop;
1172	struct in_addr *addr;
1173	struct in_ifaddr *ia;
1174
1175	*mp = m_get(M_WAIT, MT_SOOPTS);
1176
1177	switch (optname) {
1178
1179	case IP_MULTICAST_VIF:
1180		if (imo != NULL)
1181			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1182		else
1183			*(mtod(*mp, int *)) = -1;
1184		(*mp)->m_len = sizeof(int);
1185		return(0);
1186
1187	case IP_MULTICAST_IF:
1188		addr = mtod(*mp, struct in_addr *);
1189		(*mp)->m_len = sizeof(struct in_addr);
1190		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1191			addr->s_addr = INADDR_ANY;
1192		else {
1193			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1194			addr->s_addr = (ia == NULL) ? INADDR_ANY
1195					: IA_SIN(ia)->sin_addr.s_addr;
1196		}
1197		return (0);
1198
1199	case IP_MULTICAST_TTL:
1200		ttl = mtod(*mp, u_char *);
1201		(*mp)->m_len = 1;
1202		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1203				     : imo->imo_multicast_ttl;
1204		return (0);
1205
1206	case IP_MULTICAST_LOOP:
1207		loop = mtod(*mp, u_char *);
1208		(*mp)->m_len = 1;
1209		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1210				      : imo->imo_multicast_loop;
1211		return (0);
1212
1213	default:
1214		return (EOPNOTSUPP);
1215	}
1216}
1217
1218/*
1219 * Discard the IP multicast options.
1220 */
1221void
1222ip_freemoptions(imo)
1223	register struct ip_moptions *imo;
1224{
1225	register int i;
1226
1227	if (imo != NULL) {
1228		for (i = 0; i < imo->imo_num_memberships; ++i)
1229			in_delmulti(imo->imo_membership[i]);
1230		free(imo, M_IPMOPTS);
1231	}
1232}
1233
1234/*
1235 * Routine called from ip_output() to loop back a copy of an IP multicast
1236 * packet to the input queue of a specified interface.  Note that this
1237 * calls the output routine of the loopback "driver", but with an interface
1238 * pointer that might NOT be a loopback interface -- evil, but easier than
1239 * replicating that code here.
1240 */
1241static void
1242ip_mloopback(ifp, m, dst)
1243	struct ifnet *ifp;
1244	register struct mbuf *m;
1245	register struct sockaddr_in *dst;
1246{
1247	register struct ip *ip;
1248	struct mbuf *copym;
1249
1250	copym = m_copy(m, 0, M_COPYALL);
1251	if (copym != NULL) {
1252		/*
1253		 * We don't bother to fragment if the IP length is greater
1254		 * than the interface's MTU.  Can this possibly matter?
1255		 */
1256		ip = mtod(copym, struct ip *);
1257		ip->ip_len = htons((u_short)ip->ip_len);
1258		ip->ip_off = htons((u_short)ip->ip_off);
1259		ip->ip_sum = 0;
1260		if (ip->ip_vhl == IP_VHL_BORING) {
1261			ip->ip_sum = in_cksum_hdr(ip);
1262		} else {
1263			ip->ip_sum = in_cksum(copym,
1264					      IP_VHL_HL(ip->ip_vhl) << 2);
1265		}
1266		/*
1267		 * NB:
1268		 * We can't simply call ip_input() directly because
1269		 * the ip_mforward() depends on the `input interface'
1270		 * being set to something unreasonable so that we don't
1271		 * attempt to forward the looped-back copy.
1272		 * It's also not clear whether there are any lingering
1273		 * reentrancy problems in other areas which might be
1274		 * exposed by this code.  For the moment, we'll err
1275		 * on the side of safety by continuing to abuse
1276		 * loinput().
1277		 */
1278#ifdef notdef
1279		copym->m_pkthdr.rcvif = &loif[0];
1280		ip_input(copym)
1281#else
1282		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1283#endif
1284	}
1285}
1286