raw_ip.c revision 32821
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
34 *	$Id: raw_ip.c,v 1.50 1997/12/18 09:13:39 davidg Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/proc.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/sysctl.h>
47
48#include <net/if.h>
49#include <net/route.h>
50
51#define _IP_VHL
52#include <netinet/in.h>
53#include <netinet/in_systm.h>
54#include <netinet/ip.h>
55#include <netinet/in_pcb.h>
56#include <netinet/in_var.h>
57#include <netinet/ip_var.h>
58#include <netinet/ip_mroute.h>
59
60#include <netinet/ip_fw.h>
61
62#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
63#undef COMPAT_IPFW
64#define COMPAT_IPFW 1
65#else
66#undef COMPAT_IPFW
67#endif
68
69static struct inpcbhead ripcb;
70static struct inpcbinfo ripcbinfo;
71
72/*
73 * Nominal space allocated to a raw ip socket.
74 */
75#define	RIPSNDQ		8192
76#define	RIPRCVQ		8192
77
78/*
79 * Raw interface to IP protocol.
80 */
81
82/*
83 * Initialize raw connection block q.
84 */
85void
86rip_init()
87{
88	LIST_INIT(&ripcb);
89	ripcbinfo.listhead = &ripcb;
90	/*
91	 * XXX We don't use the hash list for raw IP, but it's easier
92	 * to allocate a one entry hash list than it is to check all
93	 * over the place for hashbase == NULL.
94	 */
95	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
96	ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
97}
98
99static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
100/*
101 * Setup generic address and protocol structures
102 * for raw_input routine, then pass them along with
103 * mbuf chain.
104 */
105void
106rip_input(m, iphlen)
107	struct mbuf *m;
108	int iphlen;
109{
110	register struct ip *ip = mtod(m, struct ip *);
111	register struct inpcb *inp;
112	struct inpcb *last = 0;
113	struct mbuf *opts = 0;
114
115	ripsrc.sin_addr = ip->ip_src;
116	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
117		if (inp->inp_ip_p && inp->inp_ip_p != ip->ip_p)
118			continue;
119		if (inp->inp_laddr.s_addr &&
120                  inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
121			continue;
122		if (inp->inp_faddr.s_addr &&
123                  inp->inp_faddr.s_addr != ip->ip_src.s_addr)
124			continue;
125		if (last) {
126			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
127			if (n) {
128				if (last->inp_flags & INP_CONTROLOPTS ||
129				    last->inp_socket->so_options & SO_TIMESTAMP)
130				    ip_savecontrol(last, &opts, ip, n);
131				if (sbappendaddr(&last->inp_socket->so_rcv,
132				    (struct sockaddr *)&ripsrc, n,
133				    opts) == 0) {
134					/* should notify about lost packet */
135					m_freem(n);
136					if (opts)
137					    m_freem(opts);
138				} else
139					sorwakeup(last->inp_socket);
140				opts = 0;
141			}
142		}
143		last = inp;
144	}
145	if (last) {
146		if (last->inp_flags & INP_CONTROLOPTS ||
147		    last->inp_socket->so_options & SO_TIMESTAMP)
148			ip_savecontrol(last, &opts, ip, m);
149		if (sbappendaddr(&last->inp_socket->so_rcv,
150		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
151			m_freem(m);
152			if (opts)
153			    m_freem(opts);
154		} else
155			sorwakeup(last->inp_socket);
156	} else {
157		m_freem(m);
158              ipstat.ips_noproto++;
159              ipstat.ips_delivered--;
160      }
161}
162
163/*
164 * Generate IP header and pass packet to ip_output.
165 * Tack on options user may have setup with control call.
166 */
167int
168rip_output(m, so, dst)
169	register struct mbuf *m;
170	struct socket *so;
171	u_long dst;
172{
173	register struct ip *ip;
174	register struct inpcb *inp = sotoinpcb(so);
175	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
176
177	/*
178	 * If the user handed us a complete IP packet, use it.
179	 * Otherwise, allocate an mbuf for a header and fill it in.
180	 */
181	if ((inp->inp_flags & INP_HDRINCL) == 0) {
182		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
183			m_freem(m);
184			return(EMSGSIZE);
185		}
186		M_PREPEND(m, sizeof(struct ip), M_WAIT);
187		ip = mtod(m, struct ip *);
188		ip->ip_tos = 0;
189		ip->ip_off = 0;
190		ip->ip_p = inp->inp_ip_p;
191		ip->ip_len = m->m_pkthdr.len;
192		ip->ip_src = inp->inp_laddr;
193		ip->ip_dst.s_addr = dst;
194		ip->ip_ttl = MAXTTL;
195	} else {
196		if (m->m_pkthdr.len > IP_MAXPACKET) {
197			m_freem(m);
198			return(EMSGSIZE);
199		}
200		ip = mtod(m, struct ip *);
201		/* don't allow both user specified and setsockopt options,
202		   and don't allow packet length sizes that will crash */
203		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
204		     && inp->inp_options)
205		    || (ip->ip_len > m->m_pkthdr.len)
206		    || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
207			m_freem(m);
208			return EINVAL;
209		}
210		if (ip->ip_id == 0)
211			ip->ip_id = htons(ip_id++);
212		/* XXX prevent ip_output from overwriting header fields */
213		flags |= IP_RAWOUTPUT;
214		ipstat.ips_rawout++;
215	}
216	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
217			  inp->inp_moptions));
218}
219
220/*
221 * Raw IP socket option processing.
222 */
223int
224rip_ctloutput(op, so, level, optname, m, p)
225	int op;
226	struct socket *so;
227	int level, optname;
228	struct mbuf **m;
229	struct proc *p;
230{
231	register struct inpcb *inp = sotoinpcb(so);
232	register int error;
233
234	if (level != IPPROTO_IP) {
235		if (op == PRCO_SETOPT && *m)
236			(void)m_free(*m);
237		return (EINVAL);
238	}
239
240	switch (optname) {
241
242	case IP_HDRINCL:
243		error = 0;
244		if (op == PRCO_SETOPT) {
245			if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
246				error = EINVAL;
247			else if (*mtod(*m, int *))
248				inp->inp_flags |= INP_HDRINCL;
249			else
250				inp->inp_flags &= ~INP_HDRINCL;
251			if (*m)
252				(void)m_free(*m);
253		} else {
254			*m = m_get(M_WAIT, MT_SOOPTS);
255			(*m)->m_len = sizeof (int);
256			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
257		}
258		return (error);
259
260#ifdef COMPAT_IPFW
261	case IP_FW_GET:
262		if (ip_fw_ctl_ptr == NULL || op == PRCO_SETOPT) {
263			if (*m) (void)m_free(*m);
264			return(EINVAL);
265		}
266		return (*ip_fw_ctl_ptr)(optname, m);
267
268	case IP_FW_ADD:
269	case IP_FW_DEL:
270	case IP_FW_FLUSH:
271	case IP_FW_ZERO:
272		if (ip_fw_ctl_ptr == NULL || op != PRCO_SETOPT) {
273			if (*m) (void)m_free(*m);
274			return(EINVAL);
275		}
276		return (*ip_fw_ctl_ptr)(optname, m);
277
278	case IP_NAT:
279		if (ip_nat_ctl_ptr == NULL) {
280			if (*m) (void)m_free(*m);
281			return(EINVAL);
282		}
283		return (*ip_nat_ctl_ptr)(op, m);
284
285#endif
286	case IP_RSVP_ON:
287		return ip_rsvp_init(so);
288		break;
289
290	case IP_RSVP_OFF:
291		return ip_rsvp_done();
292		break;
293
294	case IP_RSVP_VIF_ON:
295		return ip_rsvp_vif_init(so, *m);
296
297	case IP_RSVP_VIF_OFF:
298		return ip_rsvp_vif_done(so, *m);
299
300	case MRT_INIT:
301	case MRT_DONE:
302	case MRT_ADD_VIF:
303	case MRT_DEL_VIF:
304	case MRT_ADD_MFC:
305	case MRT_DEL_MFC:
306	case MRT_VERSION:
307	case MRT_ASSERT:
308		if (op == PRCO_SETOPT) {
309			error = ip_mrouter_set(optname, so, *m);
310			if (*m)
311				(void)m_free(*m);
312		} else if (op == PRCO_GETOPT) {
313			error = ip_mrouter_get(optname, so, m);
314		} else
315			error = EINVAL;
316		return (error);
317	}
318	return (ip_ctloutput(op, so, level, optname, m, p));
319}
320
321/*
322 * This function exists solely to receive the PRC_IFDOWN messages which
323 * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
324 * and calls in_ifadown() to remove all routes corresponding to that address.
325 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
326 * interface routes.
327 */
328void
329rip_ctlinput(cmd, sa, vip)
330	int cmd;
331	struct sockaddr *sa;
332	void *vip;
333{
334	struct in_ifaddr *ia;
335	struct ifnet *ifp;
336	int err;
337	int flags;
338
339	switch(cmd) {
340	case PRC_IFDOWN:
341		for (ia = in_ifaddrhead.tqh_first; ia;
342		     ia = ia->ia_link.tqe_next) {
343			if (ia->ia_ifa.ifa_addr == sa
344			    && (ia->ia_flags & IFA_ROUTE)) {
345				/*
346				 * in_ifscrub kills the interface route.
347				 */
348				in_ifscrub(ia->ia_ifp, ia);
349				/*
350				 * in_ifadown gets rid of all the rest of
351				 * the routes.  This is not quite the right
352				 * thing to do, but at least if we are running
353				 * a routing process they will come back.
354				 */
355				in_ifadown(&ia->ia_ifa);
356				break;
357			}
358		}
359		break;
360
361	case PRC_IFUP:
362		for (ia = in_ifaddrhead.tqh_first; ia;
363		     ia = ia->ia_link.tqe_next) {
364			if (ia->ia_ifa.ifa_addr == sa)
365				break;
366		}
367		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
368			return;
369		flags = RTF_UP;
370		ifp = ia->ia_ifa.ifa_ifp;
371
372		if ((ifp->if_flags & IFF_LOOPBACK)
373		    || (ifp->if_flags & IFF_POINTOPOINT))
374			flags |= RTF_HOST;
375
376		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
377		if (err == 0)
378			ia->ia_flags |= IFA_ROUTE;
379		break;
380	}
381}
382
383static u_long	rip_sendspace = RIPSNDQ;
384static u_long	rip_recvspace = RIPRCVQ;
385
386SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace,
387	   0, "");
388SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace,
389	   0, "");
390
391static int
392rip_attach(struct socket *so, int proto, struct proc *p)
393{
394	struct inpcb *inp;
395	int error, s;
396
397	inp = sotoinpcb(so);
398	if (inp)
399		panic("rip_attach");
400	if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0)
401		return error;
402
403	s = splnet();
404	error = in_pcballoc(so, &ripcbinfo, p);
405	splx(s);
406	if (error)
407		return error;
408	error = soreserve(so, rip_sendspace, rip_recvspace);
409	if (error)
410		return error;
411	inp = (struct inpcb *)so->so_pcb;
412	inp->inp_ip_p = proto;
413	return 0;
414}
415
416static int
417rip_detach(struct socket *so)
418{
419	struct inpcb *inp;
420
421	inp = sotoinpcb(so);
422	if (inp == 0)
423		panic("rip_detach");
424	if (so == ip_mrouter)
425		ip_mrouter_done();
426	ip_rsvp_force_done(so);
427	if (so == ip_rsvpd)
428		ip_rsvp_done();
429	in_pcbdetach(inp);
430	return 0;
431}
432
433static int
434rip_abort(struct socket *so)
435{
436	soisdisconnected(so);
437	return rip_detach(so);
438}
439
440static int
441rip_disconnect(struct socket *so)
442{
443	if ((so->so_state & SS_ISCONNECTED) == 0)
444		return ENOTCONN;
445	return rip_abort(so);
446}
447
448static int
449rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
450{
451	struct inpcb *inp = sotoinpcb(so);
452	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
453
454	if (nam->sa_len != sizeof(*addr))
455		return EINVAL;
456
457	if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) &&
458				    (addr->sin_family != AF_IMPLINK)) ||
459	    (addr->sin_addr.s_addr &&
460	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
461		return EADDRNOTAVAIL;
462	inp->inp_laddr = addr->sin_addr;
463	return 0;
464}
465
466static int
467rip_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
468{
469	struct inpcb *inp = sotoinpcb(so);
470	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
471
472	if (nam->sa_len != sizeof(*addr))
473		return EINVAL;
474	if (TAILQ_EMPTY(&ifnet))
475		return EADDRNOTAVAIL;
476	if ((addr->sin_family != AF_INET) &&
477	    (addr->sin_family != AF_IMPLINK))
478		return EAFNOSUPPORT;
479	inp->inp_faddr = addr->sin_addr;
480	soisconnected(so);
481	return 0;
482}
483
484static int
485rip_shutdown(struct socket *so)
486{
487	socantsendmore(so);
488	return 0;
489}
490
491static int
492rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
493	 struct mbuf *control, struct proc *p)
494{
495	struct inpcb *inp = sotoinpcb(so);
496	register u_long dst;
497
498	if (so->so_state & SS_ISCONNECTED) {
499		if (nam) {
500			m_freem(m);
501			return EISCONN;
502		}
503		dst = inp->inp_faddr.s_addr;
504	} else {
505		if (nam == NULL) {
506			m_freem(m);
507			return ENOTCONN;
508		}
509		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
510	}
511	return rip_output(m, so, dst);
512}
513
514struct pr_usrreqs rip_usrreqs = {
515	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
516	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
517	pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
518	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
519	in_setsockaddr, sosend, soreceive, sopoll
520};
521