raw_ip.c revision 34881
1/*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
34 *	$Id: raw_ip.c,v 1.51 1998/01/27 09:15:07 davidg Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/proc.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/sysctl.h>
47#include <vm/vm_zone.h>
48
49#include <net/if.h>
50#include <net/route.h>
51
52#define _IP_VHL
53#include <netinet/in.h>
54#include <netinet/in_systm.h>
55#include <netinet/ip.h>
56#include <netinet/in_pcb.h>
57#include <netinet/in_var.h>
58#include <netinet/ip_var.h>
59#include <netinet/ip_mroute.h>
60
61#include <netinet/ip_fw.h>
62
63#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
64#undef COMPAT_IPFW
65#define COMPAT_IPFW 1
66#else
67#undef COMPAT_IPFW
68#endif
69
70static struct inpcbhead ripcb;
71static struct inpcbinfo ripcbinfo;
72
73/*
74 * Nominal space allocated to a raw ip socket.
75 */
76#define	RIPSNDQ		8192
77#define	RIPRCVQ		8192
78
79/*
80 * Raw interface to IP protocol.
81 */
82
83/*
84 * Initialize raw connection block q.
85 */
86void
87rip_init()
88{
89	LIST_INIT(&ripcb);
90	ripcbinfo.listhead = &ripcb;
91	/*
92	 * XXX We don't use the hash list for raw IP, but it's easier
93	 * to allocate a one entry hash list than it is to check all
94	 * over the place for hashbase == NULL.
95	 */
96	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
97	ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
98	ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb),
99				   nmbclusters/4, ZONE_INTERRUPT, 0);
100}
101
102static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
103/*
104 * Setup generic address and protocol structures
105 * for raw_input routine, then pass them along with
106 * mbuf chain.
107 */
108void
109rip_input(m, iphlen)
110	struct mbuf *m;
111	int iphlen;
112{
113	register struct ip *ip = mtod(m, struct ip *);
114	register struct inpcb *inp;
115	struct inpcb *last = 0;
116	struct mbuf *opts = 0;
117
118	ripsrc.sin_addr = ip->ip_src;
119	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
120		if (inp->inp_ip_p && inp->inp_ip_p != ip->ip_p)
121			continue;
122		if (inp->inp_laddr.s_addr &&
123                  inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
124			continue;
125		if (inp->inp_faddr.s_addr &&
126                  inp->inp_faddr.s_addr != ip->ip_src.s_addr)
127			continue;
128		if (last) {
129			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
130			if (n) {
131				if (last->inp_flags & INP_CONTROLOPTS ||
132				    last->inp_socket->so_options & SO_TIMESTAMP)
133				    ip_savecontrol(last, &opts, ip, n);
134				if (sbappendaddr(&last->inp_socket->so_rcv,
135				    (struct sockaddr *)&ripsrc, n,
136				    opts) == 0) {
137					/* should notify about lost packet */
138					m_freem(n);
139					if (opts)
140					    m_freem(opts);
141				} else
142					sorwakeup(last->inp_socket);
143				opts = 0;
144			}
145		}
146		last = inp;
147	}
148	if (last) {
149		if (last->inp_flags & INP_CONTROLOPTS ||
150		    last->inp_socket->so_options & SO_TIMESTAMP)
151			ip_savecontrol(last, &opts, ip, m);
152		if (sbappendaddr(&last->inp_socket->so_rcv,
153		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
154			m_freem(m);
155			if (opts)
156			    m_freem(opts);
157		} else
158			sorwakeup(last->inp_socket);
159	} else {
160		m_freem(m);
161              ipstat.ips_noproto++;
162              ipstat.ips_delivered--;
163      }
164}
165
166/*
167 * Generate IP header and pass packet to ip_output.
168 * Tack on options user may have setup with control call.
169 */
170int
171rip_output(m, so, dst)
172	register struct mbuf *m;
173	struct socket *so;
174	u_long dst;
175{
176	register struct ip *ip;
177	register struct inpcb *inp = sotoinpcb(so);
178	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
179
180	/*
181	 * If the user handed us a complete IP packet, use it.
182	 * Otherwise, allocate an mbuf for a header and fill it in.
183	 */
184	if ((inp->inp_flags & INP_HDRINCL) == 0) {
185		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
186			m_freem(m);
187			return(EMSGSIZE);
188		}
189		M_PREPEND(m, sizeof(struct ip), M_WAIT);
190		ip = mtod(m, struct ip *);
191		ip->ip_tos = 0;
192		ip->ip_off = 0;
193		ip->ip_p = inp->inp_ip_p;
194		ip->ip_len = m->m_pkthdr.len;
195		ip->ip_src = inp->inp_laddr;
196		ip->ip_dst.s_addr = dst;
197		ip->ip_ttl = MAXTTL;
198	} else {
199		if (m->m_pkthdr.len > IP_MAXPACKET) {
200			m_freem(m);
201			return(EMSGSIZE);
202		}
203		ip = mtod(m, struct ip *);
204		/* don't allow both user specified and setsockopt options,
205		   and don't allow packet length sizes that will crash */
206		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
207		     && inp->inp_options)
208		    || (ip->ip_len > m->m_pkthdr.len)
209		    || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
210			m_freem(m);
211			return EINVAL;
212		}
213		if (ip->ip_id == 0)
214			ip->ip_id = htons(ip_id++);
215		/* XXX prevent ip_output from overwriting header fields */
216		flags |= IP_RAWOUTPUT;
217		ipstat.ips_rawout++;
218	}
219	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
220			  inp->inp_moptions));
221}
222
223/*
224 * Raw IP socket option processing.
225 */
226int
227rip_ctloutput(op, so, level, optname, m, p)
228	int op;
229	struct socket *so;
230	int level, optname;
231	struct mbuf **m;
232	struct proc *p;
233{
234	register struct inpcb *inp = sotoinpcb(so);
235	register int error;
236
237	if (level != IPPROTO_IP) {
238		if (op == PRCO_SETOPT && *m)
239			(void)m_free(*m);
240		return (EINVAL);
241	}
242
243	switch (optname) {
244
245	case IP_HDRINCL:
246		error = 0;
247		if (op == PRCO_SETOPT) {
248			if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
249				error = EINVAL;
250			else if (*mtod(*m, int *))
251				inp->inp_flags |= INP_HDRINCL;
252			else
253				inp->inp_flags &= ~INP_HDRINCL;
254			if (*m)
255				(void)m_free(*m);
256		} else {
257			*m = m_get(M_WAIT, MT_SOOPTS);
258			(*m)->m_len = sizeof (int);
259			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
260		}
261		return (error);
262
263#ifdef COMPAT_IPFW
264	case IP_FW_GET:
265		if (ip_fw_ctl_ptr == NULL || op == PRCO_SETOPT) {
266			if (*m) (void)m_free(*m);
267			return(EINVAL);
268		}
269		return (*ip_fw_ctl_ptr)(optname, m);
270
271	case IP_FW_ADD:
272	case IP_FW_DEL:
273	case IP_FW_FLUSH:
274	case IP_FW_ZERO:
275		if (ip_fw_ctl_ptr == NULL || op != PRCO_SETOPT) {
276			if (*m) (void)m_free(*m);
277			return(EINVAL);
278		}
279		return (*ip_fw_ctl_ptr)(optname, m);
280
281	case IP_NAT:
282		if (ip_nat_ctl_ptr == NULL) {
283			if (*m) (void)m_free(*m);
284			return(EINVAL);
285		}
286		return (*ip_nat_ctl_ptr)(op, m);
287
288#endif
289	case IP_RSVP_ON:
290		return ip_rsvp_init(so);
291		break;
292
293	case IP_RSVP_OFF:
294		return ip_rsvp_done();
295		break;
296
297	case IP_RSVP_VIF_ON:
298		return ip_rsvp_vif_init(so, *m);
299
300	case IP_RSVP_VIF_OFF:
301		return ip_rsvp_vif_done(so, *m);
302
303	case MRT_INIT:
304	case MRT_DONE:
305	case MRT_ADD_VIF:
306	case MRT_DEL_VIF:
307	case MRT_ADD_MFC:
308	case MRT_DEL_MFC:
309	case MRT_VERSION:
310	case MRT_ASSERT:
311		if (op == PRCO_SETOPT) {
312			error = ip_mrouter_set(optname, so, *m);
313			if (*m)
314				(void)m_free(*m);
315		} else if (op == PRCO_GETOPT) {
316			error = ip_mrouter_get(optname, so, m);
317		} else
318			error = EINVAL;
319		return (error);
320	}
321	return (ip_ctloutput(op, so, level, optname, m, p));
322}
323
324/*
325 * This function exists solely to receive the PRC_IFDOWN messages which
326 * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
327 * and calls in_ifadown() to remove all routes corresponding to that address.
328 * It also receives the PRC_IFUP messages from if_up() and reinstalls the
329 * interface routes.
330 */
331void
332rip_ctlinput(cmd, sa, vip)
333	int cmd;
334	struct sockaddr *sa;
335	void *vip;
336{
337	struct in_ifaddr *ia;
338	struct ifnet *ifp;
339	int err;
340	int flags;
341
342	switch(cmd) {
343	case PRC_IFDOWN:
344		for (ia = in_ifaddrhead.tqh_first; ia;
345		     ia = ia->ia_link.tqe_next) {
346			if (ia->ia_ifa.ifa_addr == sa
347			    && (ia->ia_flags & IFA_ROUTE)) {
348				/*
349				 * in_ifscrub kills the interface route.
350				 */
351				in_ifscrub(ia->ia_ifp, ia);
352				/*
353				 * in_ifadown gets rid of all the rest of
354				 * the routes.  This is not quite the right
355				 * thing to do, but at least if we are running
356				 * a routing process they will come back.
357				 */
358				in_ifadown(&ia->ia_ifa);
359				break;
360			}
361		}
362		break;
363
364	case PRC_IFUP:
365		for (ia = in_ifaddrhead.tqh_first; ia;
366		     ia = ia->ia_link.tqe_next) {
367			if (ia->ia_ifa.ifa_addr == sa)
368				break;
369		}
370		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
371			return;
372		flags = RTF_UP;
373		ifp = ia->ia_ifa.ifa_ifp;
374
375		if ((ifp->if_flags & IFF_LOOPBACK)
376		    || (ifp->if_flags & IFF_POINTOPOINT))
377			flags |= RTF_HOST;
378
379		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
380		if (err == 0)
381			ia->ia_flags |= IFA_ROUTE;
382		break;
383	}
384}
385
386static u_long	rip_sendspace = RIPSNDQ;
387static u_long	rip_recvspace = RIPRCVQ;
388
389SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace,
390	   0, "");
391SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace,
392	   0, "");
393
394static int
395rip_attach(struct socket *so, int proto, struct proc *p)
396{
397	struct inpcb *inp;
398	int error, s;
399
400	inp = sotoinpcb(so);
401	if (inp)
402		panic("rip_attach");
403	if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0)
404		return error;
405
406	s = splnet();
407	error = in_pcballoc(so, &ripcbinfo, p);
408	splx(s);
409	if (error)
410		return error;
411	error = soreserve(so, rip_sendspace, rip_recvspace);
412	if (error)
413		return error;
414	inp = (struct inpcb *)so->so_pcb;
415	inp->inp_ip_p = proto;
416	return 0;
417}
418
419static int
420rip_detach(struct socket *so)
421{
422	struct inpcb *inp;
423
424	inp = sotoinpcb(so);
425	if (inp == 0)
426		panic("rip_detach");
427	if (so == ip_mrouter)
428		ip_mrouter_done();
429	ip_rsvp_force_done(so);
430	if (so == ip_rsvpd)
431		ip_rsvp_done();
432	in_pcbdetach(inp);
433	return 0;
434}
435
436static int
437rip_abort(struct socket *so)
438{
439	soisdisconnected(so);
440	return rip_detach(so);
441}
442
443static int
444rip_disconnect(struct socket *so)
445{
446	if ((so->so_state & SS_ISCONNECTED) == 0)
447		return ENOTCONN;
448	return rip_abort(so);
449}
450
451static int
452rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
453{
454	struct inpcb *inp = sotoinpcb(so);
455	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
456
457	if (nam->sa_len != sizeof(*addr))
458		return EINVAL;
459
460	if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) &&
461				    (addr->sin_family != AF_IMPLINK)) ||
462	    (addr->sin_addr.s_addr &&
463	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
464		return EADDRNOTAVAIL;
465	inp->inp_laddr = addr->sin_addr;
466	return 0;
467}
468
469static int
470rip_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
471{
472	struct inpcb *inp = sotoinpcb(so);
473	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
474
475	if (nam->sa_len != sizeof(*addr))
476		return EINVAL;
477	if (TAILQ_EMPTY(&ifnet))
478		return EADDRNOTAVAIL;
479	if ((addr->sin_family != AF_INET) &&
480	    (addr->sin_family != AF_IMPLINK))
481		return EAFNOSUPPORT;
482	inp->inp_faddr = addr->sin_addr;
483	soisconnected(so);
484	return 0;
485}
486
487static int
488rip_shutdown(struct socket *so)
489{
490	socantsendmore(so);
491	return 0;
492}
493
494static int
495rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
496	 struct mbuf *control, struct proc *p)
497{
498	struct inpcb *inp = sotoinpcb(so);
499	register u_long dst;
500
501	if (so->so_state & SS_ISCONNECTED) {
502		if (nam) {
503			m_freem(m);
504			return EISCONN;
505		}
506		dst = inp->inp_faddr.s_addr;
507	} else {
508		if (nam == NULL) {
509			m_freem(m);
510			return ENOTCONN;
511		}
512		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
513	}
514	return rip_output(m, so, dst);
515}
516
517struct pr_usrreqs rip_usrreqs = {
518	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
519	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
520	pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
521	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
522	in_setsockaddr, sosend, soreceive, sopoll
523};
524