raw_ip.c revision 23324
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
34 *	$Id: raw_ip.c,v 1.42 1997/02/18 20:46:29 wollman Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/errno.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/protosw.h>
44#include <sys/queue.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
47#include <sys/sysctl.h>
48
49#include <net/if.h>
50#include <net/route.h>
51
52#define _IP_VHL
53#include <netinet/in.h>
54#include <netinet/in_systm.h>
55#include <netinet/ip.h>
56#include <netinet/in_pcb.h>
57#include <netinet/in_var.h>
58#include <netinet/ip_var.h>
59#include <netinet/ip_mroute.h>
60
61#include <netinet/ip_fw.h>
62
63#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
64#undef COMPAT_IPFW
65#define COMPAT_IPFW 1
66#else
67#undef COMPAT_IPFW
68#endif
69
70static struct inpcbhead ripcb;
71static struct inpcbinfo ripcbinfo;
72
73/*
74 * Nominal space allocated to a raw ip socket.
75 */
76#define	RIPSNDQ		8192
77#define	RIPRCVQ		8192
78
79/*
80 * Raw interface to IP protocol.
81 */
82
83/*
84 * Initialize raw connection block q.
85 */
86void
87rip_init()
88{
89	LIST_INIT(&ripcb);
90	ripcbinfo.listhead = &ripcb;
91	/*
92	 * XXX We don't use the hash list for raw IP, but it's easier
93	 * to allocate a one entry hash list than it is to check all
94	 * over the place for hashbase == NULL.
95	 */
96	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
97}
98
99static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
100/*
101 * Setup generic address and protocol structures
102 * for raw_input routine, then pass them along with
103 * mbuf chain.
104 */
105void
106rip_input(m, iphlen)
107	struct mbuf *m;
108	int iphlen;
109{
110	register struct ip *ip = mtod(m, struct ip *);
111	register struct inpcb *inp;
112	struct inpcb *last = 0;
113	struct mbuf *opts = 0;
114
115	ripsrc.sin_addr = ip->ip_src;
116	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
117		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
118			continue;
119		if (inp->inp_laddr.s_addr &&
120                  inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
121			continue;
122		if (inp->inp_faddr.s_addr &&
123                  inp->inp_faddr.s_addr != ip->ip_src.s_addr)
124			continue;
125		if (last) {
126			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
127			if (n) {
128				if (last->inp_flags & INP_CONTROLOPTS ||
129				    last->inp_socket->so_options & SO_TIMESTAMP)
130				    ip_savecontrol(last, &opts, ip, n);
131				if (sbappendaddr(&last->inp_socket->so_rcv,
132				    (struct sockaddr *)&ripsrc, n,
133				    opts) == 0) {
134					/* should notify about lost packet */
135					m_freem(n);
136					if (opts)
137					    m_freem(opts);
138				} else
139					sorwakeup(last->inp_socket);
140				opts = 0;
141			}
142		}
143		last = inp;
144	}
145	if (last) {
146		if (last->inp_flags & INP_CONTROLOPTS ||
147		    last->inp_socket->so_options & SO_TIMESTAMP)
148			ip_savecontrol(last, &opts, ip, m);
149		if (sbappendaddr(&last->inp_socket->so_rcv,
150		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
151			m_freem(m);
152			if (opts)
153			    m_freem(opts);
154		} else
155			sorwakeup(last->inp_socket);
156	} else {
157		m_freem(m);
158              ipstat.ips_noproto++;
159              ipstat.ips_delivered--;
160      }
161}
162
163/*
164 * Generate IP header and pass packet to ip_output.
165 * Tack on options user may have setup with control call.
166 */
167int
168rip_output(m, so, dst)
169	register struct mbuf *m;
170	struct socket *so;
171	u_long dst;
172{
173	register struct ip *ip;
174	register struct inpcb *inp = sotoinpcb(so);
175	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
176
177	/*
178	 * If the user handed us a complete IP packet, use it.
179	 * Otherwise, allocate an mbuf for a header and fill it in.
180	 */
181	if ((inp->inp_flags & INP_HDRINCL) == 0) {
182		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
183			m_freem(m);
184			return(EMSGSIZE);
185		}
186		M_PREPEND(m, sizeof(struct ip), M_WAIT);
187		ip = mtod(m, struct ip *);
188		ip->ip_tos = 0;
189		ip->ip_off = 0;
190		ip->ip_p = inp->inp_ip.ip_p;
191		ip->ip_len = m->m_pkthdr.len;
192		ip->ip_src = inp->inp_laddr;
193		ip->ip_dst.s_addr = dst;
194		ip->ip_ttl = MAXTTL;
195	} else {
196		if (m->m_pkthdr.len > IP_MAXPACKET) {
197			m_freem(m);
198			return(EMSGSIZE);
199		}
200		ip = mtod(m, struct ip *);
201		/* don't allow both user specified and setsockopt options,
202		   and don't allow packet length sizes that will crash */
203		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
204		     && inp->inp_options)
205		    || (ip->ip_len > m->m_pkthdr.len)) {
206			m_freem(m);
207			return EINVAL;
208		}
209		if (ip->ip_id == 0)
210			ip->ip_id = htons(ip_id++);
211		/* XXX prevent ip_output from overwriting header fields */
212		flags |= IP_RAWOUTPUT;
213		ipstat.ips_rawout++;
214	}
215	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
216			  inp->inp_moptions));
217}
218
219/*
220 * Raw IP socket option processing.
221 */
222int
223rip_ctloutput(op, so, level, optname, m)
224	int op;
225	struct socket *so;
226	int level, optname;
227	struct mbuf **m;
228{
229	register struct inpcb *inp = sotoinpcb(so);
230	register int error;
231
232	if (level != IPPROTO_IP) {
233		if (op == PRCO_SETOPT && *m)
234			(void)m_free(*m);
235		return (EINVAL);
236	}
237
238	switch (optname) {
239
240	case IP_HDRINCL:
241		error = 0;
242		if (op == PRCO_SETOPT) {
243			if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
244				error = EINVAL;
245			else if (*mtod(*m, int *))
246				inp->inp_flags |= INP_HDRINCL;
247			else
248				inp->inp_flags &= ~INP_HDRINCL;
249			if (*m)
250				(void)m_free(*m);
251		} else {
252			*m = m_get(M_WAIT, MT_SOOPTS);
253			(*m)->m_len = sizeof (int);
254			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
255		}
256		return (error);
257
258#ifdef COMPAT_IPFW
259	case IP_FW_GET:
260		if (ip_fw_ctl_ptr == NULL || op == PRCO_SETOPT) {
261			if (*m) (void)m_free(*m);
262			return(EINVAL);
263		}
264		return (*ip_fw_ctl_ptr)(optname, m);
265
266	case IP_FW_ADD:
267	case IP_FW_DEL:
268	case IP_FW_FLUSH:
269	case IP_FW_ZERO:
270		if (ip_fw_ctl_ptr == NULL || op != PRCO_SETOPT) {
271			if (*m) (void)m_free(*m);
272			return(EINVAL);
273		}
274		return (*ip_fw_ctl_ptr)(optname, m);
275
276	case IP_NAT:
277		if (ip_nat_ctl_ptr == NULL) {
278			if (*m) (void)m_free(*m);
279			return(EINVAL);
280		}
281		return (*ip_nat_ctl_ptr)(op, m);
282
283#endif
284	case IP_RSVP_ON:
285		return ip_rsvp_init(so);
286		break;
287
288	case IP_RSVP_OFF:
289		return ip_rsvp_done();
290		break;
291
292	case IP_RSVP_VIF_ON:
293		return ip_rsvp_vif_init(so, *m);
294
295	case IP_RSVP_VIF_OFF:
296		return ip_rsvp_vif_done(so, *m);
297
298	case MRT_INIT:
299	case MRT_DONE:
300	case MRT_ADD_VIF:
301	case MRT_DEL_VIF:
302	case MRT_ADD_MFC:
303	case MRT_DEL_MFC:
304	case MRT_VERSION:
305	case MRT_ASSERT:
306		if (op == PRCO_SETOPT) {
307			error = ip_mrouter_set(optname, so, *m);
308			if (*m)
309				(void)m_free(*m);
310		} else if (op == PRCO_GETOPT) {
311			error = ip_mrouter_get(optname, so, m);
312		} else
313			error = EINVAL;
314		return (error);
315	}
316	return (ip_ctloutput(op, so, level, optname, m));
317}
318
319/*
320 * This function exists solely to receive the PRC_IFDOWN messages which
321 * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
322 * and calls in_ifadown() to remove all routes corresponding to that address.
323 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
324 * interface routes.
325 */
326void
327rip_ctlinput(cmd, sa, vip)
328	int cmd;
329	struct sockaddr *sa;
330	void *vip;
331{
332	struct in_ifaddr *ia;
333	struct ifnet *ifp;
334	int err;
335	int flags;
336
337	switch(cmd) {
338	case PRC_IFDOWN:
339		for (ia = in_ifaddrhead.tqh_first; ia;
340		     ia = ia->ia_link.tqe_next) {
341			if (ia->ia_ifa.ifa_addr == sa
342			    && (ia->ia_flags & IFA_ROUTE)) {
343				/*
344				 * in_ifscrub kills the interface route.
345				 */
346				in_ifscrub(ia->ia_ifp, ia);
347				/*
348				 * in_ifadown gets rid of all the rest of
349				 * the routes.  This is not quite the right
350				 * thing to do, but at least if we are running
351				 * a routing process they will come back.
352				 */
353				in_ifadown(&ia->ia_ifa);
354				break;
355			}
356		}
357		break;
358
359	case PRC_IFUP:
360		for (ia = in_ifaddrhead.tqh_first; ia;
361		     ia = ia->ia_link.tqe_next) {
362			if (ia->ia_ifa.ifa_addr == sa)
363				break;
364		}
365		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
366			return;
367		flags = RTF_UP;
368		ifp = ia->ia_ifa.ifa_ifp;
369
370		if ((ifp->if_flags & IFF_LOOPBACK)
371		    || (ifp->if_flags & IFF_POINTOPOINT))
372			flags |= RTF_HOST;
373
374		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
375		if (err == 0)
376			ia->ia_flags |= IFA_ROUTE;
377		break;
378	}
379}
380
381static u_long	rip_sendspace = RIPSNDQ;
382static u_long	rip_recvspace = RIPRCVQ;
383
384SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace,
385	   0, "");
386SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace,
387	   0, "");
388
389static int
390rip_attach(struct socket *so, int proto)
391{
392	struct inpcb *inp;
393	int error;
394
395	inp = sotoinpcb(so);
396	if (inp)
397		panic("rip_attach");
398	if ((so->so_state & SS_PRIV) == 0)
399		return EACCES;
400
401	if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
402	    (error = in_pcballoc(so, &ripcbinfo)))
403		return error;
404	inp = (struct inpcb *)so->so_pcb;
405	inp->inp_ip.ip_p = proto;
406	return 0;
407}
408
409static int
410rip_detach(struct socket *so)
411{
412	struct inpcb *inp;
413
414	inp = sotoinpcb(so);
415	if (inp == 0)
416		panic("rip_detach");
417	if (so == ip_mrouter)
418		ip_mrouter_done();
419	ip_rsvp_force_done(so);
420	if (so == ip_rsvpd)
421		ip_rsvp_done();
422	in_pcbdetach(inp);
423	return 0;
424}
425
426static int
427rip_abort(struct socket *so)
428{
429	soisdisconnected(so);
430	return rip_detach(so);
431}
432
433static int
434rip_disconnect(struct socket *so)
435{
436	if ((so->so_state & SS_ISCONNECTED) == 0)
437		return ENOTCONN;
438	return rip_abort(so);
439}
440
441static int
442rip_bind(struct socket *so, struct mbuf *nam)
443{
444	struct inpcb *inp = sotoinpcb(so);
445	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
446
447	if (nam->m_len != sizeof(*addr))
448		return EINVAL;
449
450	if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) &&
451				    (addr->sin_family != AF_IMPLINK)) ||
452	    (addr->sin_addr.s_addr &&
453	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
454		return EADDRNOTAVAIL;
455	inp->inp_laddr = addr->sin_addr;
456	return 0;
457}
458
459static int
460rip_connect(struct socket *so, struct mbuf *nam)
461{
462	struct inpcb *inp = sotoinpcb(so);
463	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
464
465	if (nam->m_len != sizeof(*addr))
466		return EINVAL;
467	if (TAILQ_EMPTY(&ifnet))
468		return EADDRNOTAVAIL;
469	if ((addr->sin_family != AF_INET) &&
470	    (addr->sin_family != AF_IMPLINK))
471		return EAFNOSUPPORT;
472	inp->inp_faddr = addr->sin_addr;
473	soisconnected(so);
474	return 0;
475}
476
477static int
478rip_shutdown(struct socket *so)
479{
480	socantsendmore(so);
481	return 0;
482}
483
484static int
485rip_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam,
486	 struct mbuf *control)
487{
488	struct inpcb *inp = sotoinpcb(so);
489	register u_long dst;
490
491	if (so->so_state & SS_ISCONNECTED) {
492		if (nam) {
493			m_freem(m);
494			return EISCONN;
495		}
496		dst = inp->inp_faddr.s_addr;
497	} else {
498		if (nam == NULL) {
499			m_freem(m);
500			return ENOTCONN;
501		}
502		dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
503	}
504	return rip_output(m, so, dst);
505}
506
507struct pr_usrreqs rip_usrreqs = {
508	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
509	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
510	pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
511	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
512	in_setsockaddr
513};
514