raw_ip.c revision 21673
126497Sache/*
226497Sache * Copyright (c) 1982, 1986, 1988, 1993
326497Sache *	The Regents of the University of California.  All rights reserved.
426497Sache *
526497Sache * Redistribution and use in source and binary forms, with or without
626497Sache * modification, are permitted provided that the following conditions
726497Sache * are met:
826497Sache * 1. Redistributions of source code must retain the above copyright
926497Sache *    notice, this list of conditions and the following disclaimer.
1026497Sache * 2. Redistributions in binary form must reproduce the above copyright
1126497Sache *    notice, this list of conditions and the following disclaimer in the
1226497Sache *    documentation and/or other materials provided with the distribution.
1326497Sache * 3. All advertising materials mentioning features or use of this software
1426497Sache *    must display the following acknowledgement:
1526497Sache *	This product includes software developed by the University of
1626497Sache *	California, Berkeley and its contributors.
1726497Sache * 4. Neither the name of the University nor the names of its contributors
1826497Sache *    may be used to endorse or promote products derived from this software
1926497Sache *    without specific prior written permission.
2026497Sache *
2126497Sache * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2226497Sache * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2326497Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2426497Sache * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2526497Sache * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2626497Sache * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2726497Sache * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2826497Sache * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2926497Sache * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3026497Sache * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3126497Sache * SUCH DAMAGE.
3226497Sache *
3326497Sache *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
3426497Sache *	$FreeBSD: head/sys/netinet/raw_ip.c 21673 1997-01-14 07:20:47Z jkh $
3526497Sache */
3626497Sache
3726497Sache#include <sys/param.h>
3826497Sache#include <sys/queue.h>
3926497Sache#include <sys/malloc.h>
4026497Sache#include <sys/mbuf.h>
4126497Sache#include <sys/socket.h>
4226497Sache#include <sys/protosw.h>
4326497Sache#include <sys/socketvar.h>
4426497Sache#include <sys/errno.h>
4526497Sache#include <sys/systm.h>
4626497Sache
4726497Sache#include <net/if.h>
4826497Sache#include <net/route.h>
4926497Sache
5026497Sache#define _IP_VHL
5126497Sache#include <netinet/in.h>
5226497Sache#include <netinet/in_systm.h>
5326497Sache#include <netinet/ip.h>
5426497Sache#include <netinet/in_pcb.h>
5526497Sache#include <netinet/in_var.h>
5626497Sache#include <netinet/ip_var.h>
5726497Sache#include <netinet/ip_mroute.h>
5826497Sache
5926497Sache#include <netinet/ip_fw.h>
6026497Sache
6126497Sache#if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
6226497Sache#undef COMPAT_IPFW
6326497Sache#define COMPAT_IPFW 1
6426497Sache#else
6526497Sache#undef COMPAT_IPFW
6626497Sache#endif
6726497Sache
6826497Sachestatic struct inpcbhead ripcb;
6926497Sachestatic struct inpcbinfo ripcbinfo;
7026497Sache
7126497Sache/*
7226497Sache * Nominal space allocated to a raw ip socket.
7326497Sache */
7426497Sache#define	RIPSNDQ		8192
7526497Sache#define	RIPRCVQ		8192
7626497Sache
7726497Sache/*
7826497Sache * Raw interface to IP protocol.
7926497Sache */
8026497Sache
8126497Sache/*
8226497Sache * Initialize raw connection block q.
8326497Sache */
8426497Sachevoid
8526497Sacherip_init()
8626497Sache{
8726497Sache	LIST_INIT(&ripcb);
8826497Sache	ripcbinfo.listhead = &ripcb;
8926497Sache	/*
9026497Sache	 * XXX We don't use the hash list for raw IP, but it's easier
9126497Sache	 * to allocate a one entry hash list than it is to check all
9226497Sache	 * over the place for hashbase == NULL.
9326497Sache	 */
9426497Sache	ripcbinfo.hashbase = phashinit(1, M_PCB, &ripcbinfo.hashsize);
9526497Sache}
9626497Sache
9747558Sachestatic struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
9826497Sache/*
9926497Sache * Setup generic address and protocol structures
10026497Sache * for raw_input routine, then pass them along with
10126497Sache * mbuf chain.
10226497Sache */
10326497Sachevoid
10426497Sacherip_input(m, iphlen)
10547558Sache	struct mbuf *m;
10626497Sache	int iphlen;
10726497Sache{
10826497Sache	register struct ip *ip = mtod(m, struct ip *);
10926497Sache	register struct inpcb *inp;
11026497Sache	struct inpcb *last = 0;
11126497Sache	struct mbuf *opts = 0;
11226497Sache
11326497Sache	ripsrc.sin_addr = ip->ip_src;
11426497Sache	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
11526497Sache		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
11626497Sache			continue;
11726497Sache		if (inp->inp_laddr.s_addr &&
11826497Sache                  inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
11926497Sache			continue;
12026497Sache		if (inp->inp_faddr.s_addr &&
12126497Sache                  inp->inp_faddr.s_addr != ip->ip_src.s_addr)
12226497Sache			continue;
12326497Sache		if (last) {
12426497Sache			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
12526497Sache			if (n) {
12626497Sache				if (last->inp_flags & INP_CONTROLOPTS ||
12726497Sache				    last->inp_socket->so_options & SO_TIMESTAMP)
12826497Sache				    ip_savecontrol(last, &opts, ip, n);
12926497Sache				if (sbappendaddr(&last->inp_socket->so_rcv,
13026497Sache				    (struct sockaddr *)&ripsrc, n,
13126497Sache				    opts) == 0) {
13226497Sache					/* should notify about lost packet */
13326497Sache					m_freem(n);
13426497Sache					if (opts)
13526497Sache					    m_freem(opts);
13626497Sache				} else
13726497Sache					sorwakeup(last->inp_socket);
13826497Sache				opts = 0;
13926497Sache			}
14026497Sache		}
14126497Sache		last = inp;
14226497Sache	}
14326497Sache	if (last) {
14426497Sache		if (last->inp_flags & INP_CONTROLOPTS ||
14526497Sache		    last->inp_socket->so_options & SO_TIMESTAMP)
14626497Sache			ip_savecontrol(last, &opts, ip, m);
14726497Sache		if (sbappendaddr(&last->inp_socket->so_rcv,
14826497Sache		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
14926497Sache			m_freem(m);
15026497Sache			if (opts)
15126497Sache			    m_freem(opts);
15226497Sache		} else
15326497Sache			sorwakeup(last->inp_socket);
15426497Sache	} else {
15535486Sache		m_freem(m);
15626497Sache              ipstat.ips_noproto++;
15726497Sache              ipstat.ips_delivered--;
15826497Sache      }
15926497Sache}
16026497Sache
16126497Sache/*
16226497Sache * Generate IP header and pass packet to ip_output.
16326497Sache * Tack on options user may have setup with control call.
16426497Sache */
16526497Sacheint
16626497Sacherip_output(m, so, dst)
16726497Sache	register struct mbuf *m;
16826497Sache	struct socket *so;
16926497Sache	u_long dst;
17026497Sache{
17126497Sache	register struct ip *ip;
17226497Sache	register struct inpcb *inp = sotoinpcb(so);
17326497Sache	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
17426497Sache
17526497Sache	/*
17626497Sache	 * If the user handed us a complete IP packet, use it.
17735486Sache	 * Otherwise, allocate an mbuf for a header and fill it in.
17847558Sache	 */
17947558Sache	if ((inp->inp_flags & INP_HDRINCL) == 0) {
18026497Sache		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
18126497Sache			m_freem(m);
18226497Sache			return(EMSGSIZE);
18326497Sache		}
18426497Sache		M_PREPEND(m, sizeof(struct ip), M_WAIT);
18526497Sache		ip = mtod(m, struct ip *);
18626497Sache		ip->ip_tos = 0;
18747558Sache		ip->ip_off = 0;
18826497Sache		ip->ip_p = inp->inp_ip.ip_p;
18926497Sache		ip->ip_len = m->m_pkthdr.len;
19026497Sache		ip->ip_src = inp->inp_laddr;
19126497Sache		ip->ip_dst.s_addr = dst;
19226497Sache		ip->ip_ttl = MAXTTL;
19326497Sache	} else {
19426497Sache		if (m->m_pkthdr.len > IP_MAXPACKET) {
19526497Sache			m_freem(m);
19626497Sache			return(EMSGSIZE);
19726497Sache		}
19826497Sache		ip = mtod(m, struct ip *);
19926497Sache		/* don't allow both user specified and setsockopt options,
20026497Sache		   and don't allow packet length sizes that will crash */
20126497Sache		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
20226497Sache		     && inp->inp_options)
20326497Sache		    || (ip->ip_len > m->m_pkthdr.len)) {
20426497Sache			m_freem(m);
20526497Sache			return EINVAL;
20626497Sache		}
20726497Sache		if (ip->ip_id == 0)
20826497Sache			ip->ip_id = htons(ip_id++);
20926497Sache		/* XXX prevent ip_output from overwriting header fields */
21026497Sache		flags |= IP_RAWOUTPUT;
21126497Sache		ipstat.ips_rawout++;
21247558Sache	}
21347558Sache	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
21426497Sache			  inp->inp_moptions));
21547558Sache}
21647558Sache
21747558Sache/*
21847558Sache * Raw IP socket option processing.
21926497Sache */
22026497Sacheint
22126497Sacherip_ctloutput(op, so, level, optname, m)
22226497Sache	int op;
22326497Sache	struct socket *so;
22426497Sache	int level, optname;
22526497Sache	struct mbuf **m;
22626497Sache{
22726497Sache	register struct inpcb *inp = sotoinpcb(so);
22826497Sache	register int error;
22926497Sache
23026497Sache	if (level != IPPROTO_IP) {
23126497Sache		if (op == PRCO_SETOPT && *m)
23226497Sache			(void)m_free(*m);
23326497Sache		return (EINVAL);
23426497Sache	}
23526497Sache
23626497Sache	switch (optname) {
23726497Sache
23826497Sache	case IP_HDRINCL:
23926497Sache		error = 0;
24026497Sache		if (op == PRCO_SETOPT) {
24126497Sache			if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
24226497Sache				error = EINVAL;
24326497Sache			else if (*mtod(*m, int *))
24426497Sache				inp->inp_flags |= INP_HDRINCL;
24526497Sache			else
24626497Sache				inp->inp_flags &= ~INP_HDRINCL;
24726497Sache			if (*m)
24826497Sache				(void)m_free(*m);
24926497Sache		} else {
25026497Sache			*m = m_get(M_WAIT, MT_SOOPTS);
25126497Sache			(*m)->m_len = sizeof (int);
25226497Sache			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
25326497Sache		}
25426497Sache		return (error);
25526497Sache
25626497Sache#ifdef COMPAT_IPFW
25726497Sache	case IP_FW_GET:
25826497Sache		if (ip_fw_ctl_ptr == NULL || op == PRCO_SETOPT) {
25926497Sache			if (*m) (void)m_free(*m);
26026497Sache			return(EINVAL);
26126497Sache		}
26226497Sache		return (*ip_fw_ctl_ptr)(optname, m);
26326497Sache
26426497Sache	case IP_FW_ADD:
26526497Sache	case IP_FW_DEL:
26626497Sache	case IP_FW_FLUSH:
26726497Sache	case IP_FW_ZERO:
26826497Sache		if (ip_fw_ctl_ptr == NULL || op != PRCO_SETOPT) {
26926497Sache			if (*m) (void)m_free(*m);
27026497Sache			return(EINVAL);
27126497Sache		}
27226497Sache		return (*ip_fw_ctl_ptr)(optname, m);
27326497Sache
27426497Sache	case IP_NAT:
27526497Sache		if (ip_nat_ctl_ptr == NULL) {
27626497Sache			if (*m) (void)m_free(*m);
27726497Sache			return(EINVAL);
27826497Sache		}
27926497Sache		return (*ip_nat_ctl_ptr)(op, m);
28026497Sache
28126497Sache#endif
28226497Sache	case IP_RSVP_ON:
28326497Sache		return ip_rsvp_init(so);
28426497Sache		break;
28526497Sache
28626497Sache	case IP_RSVP_OFF:
28726497Sache		return ip_rsvp_done();
28826497Sache		break;
28926497Sache
29026497Sache	case IP_RSVP_VIF_ON:
29126497Sache		return ip_rsvp_vif_init(so, *m);
29226497Sache
29326497Sache	case IP_RSVP_VIF_OFF:
29426497Sache		return ip_rsvp_vif_done(so, *m);
29526497Sache
29626497Sache	case MRT_INIT:
29726497Sache	case MRT_DONE:
29826497Sache	case MRT_ADD_VIF:
29926497Sache	case MRT_DEL_VIF:
30026497Sache	case MRT_ADD_MFC:
30126497Sache	case MRT_DEL_MFC:
30226497Sache	case MRT_VERSION:
30326497Sache	case MRT_ASSERT:
30426497Sache		if (op == PRCO_SETOPT) {
30526497Sache			error = ip_mrouter_set(optname, so, *m);
30626497Sache			if (*m)
30726497Sache				(void)m_free(*m);
30826497Sache		} else if (op == PRCO_GETOPT) {
30926497Sache			error = ip_mrouter_get(optname, so, m);
31026497Sache		} else
31126497Sache			error = EINVAL;
31226497Sache		return (error);
31326497Sache	}
31426497Sache	return (ip_ctloutput(op, so, level, optname, m));
31526497Sache}
31626497Sache
31726497Sachestatic u_long	rip_sendspace = RIPSNDQ; /* XXX sysctl ? */
31826497Sachestatic u_long	rip_recvspace = RIPRCVQ; /* XXX sysctl ? */
31926497Sache
32026497Sache/*ARGSUSED*/
32126497Sacheint
32226497Sacherip_usrreq(so, req, m, nam, control)
32326497Sache	register struct socket *so;
32426497Sache	int req;
32526497Sache	struct mbuf *m, *nam, *control;
32626497Sache{
32726497Sache	register int error = 0;
32826497Sache	register struct inpcb *inp = sotoinpcb(so);
32926497Sache
33026497Sache	if (req == PRU_CONTROL)
33126497Sache		return (in_control(so, (u_long)m, (caddr_t)nam,
33226497Sache			(struct ifnet *)control));
33326497Sache
33426497Sache	switch (req) {
33526497Sache
33626497Sache	case PRU_ATTACH:
33726497Sache		if (inp)
33826497Sache			panic("rip_attach");
33926497Sache		if ((so->so_state & SS_PRIV) == 0) {
34026497Sache			error = EACCES;
34126497Sache			break;
34226497Sache		}
34326497Sache		if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
34426497Sache		    (error = in_pcballoc(so, &ripcbinfo)))
34526497Sache			break;
34626497Sache		inp = (struct inpcb *)so->so_pcb;
34726497Sache		inp->inp_ip.ip_p = (int)nam;
34826497Sache		break;
34926497Sache
35026497Sache	case PRU_DISCONNECT:
35126497Sache		if ((so->so_state & SS_ISCONNECTED) == 0) {
35226497Sache			error = ENOTCONN;
35326497Sache			break;
35426497Sache		}
35526497Sache		/* FALLTHROUGH */
35626497Sache	case PRU_ABORT:
35726497Sache		soisdisconnected(so);
35826497Sache		/* FALLTHROUGH */
35926497Sache	case PRU_DETACH:
36026497Sache		if (inp == 0)
36126497Sache			panic("rip_detach");
36226497Sache		if (so == ip_mrouter)
36326497Sache			ip_mrouter_done();
36426497Sache		ip_rsvp_force_done(so);
36526497Sache		if (so == ip_rsvpd)
36626497Sache			ip_rsvp_done();
36726497Sache		in_pcbdetach(inp);
36826497Sache		break;
36926497Sache
37026497Sache	case PRU_BIND:
37126497Sache	    {
37226497Sache		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
37326497Sache
37426497Sache		if (nam->m_len != sizeof(*addr)) {
37526497Sache			error = EINVAL;
37626497Sache			break;
37726497Sache		}
37826497Sache		if (TAILQ_EMPTY(&ifnet) ||
37926497Sache		    ((addr->sin_family != AF_INET) &&
38026497Sache		     (addr->sin_family != AF_IMPLINK)) ||
38126497Sache		    (addr->sin_addr.s_addr &&
38226497Sache		     ifa_ifwithaddr((struct sockaddr *)addr) == 0)) {
38326497Sache			error = EADDRNOTAVAIL;
38426497Sache			break;
38526497Sache		}
38626497Sache		inp->inp_laddr = addr->sin_addr;
38726497Sache		break;
38826497Sache	    }
38926497Sache	case PRU_CONNECT:
39026497Sache	    {
39126497Sache		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
39226497Sache
39326497Sache		if (nam->m_len != sizeof(*addr)) {
39426497Sache			error = EINVAL;
39526497Sache			break;
39626497Sache		}
39726497Sache		if (TAILQ_EMPTY(&ifnet)) {
39826497Sache			error = EADDRNOTAVAIL;
39926497Sache			break;
40026497Sache		}
40126497Sache		if ((addr->sin_family != AF_INET) &&
40226497Sache		     (addr->sin_family != AF_IMPLINK)) {
40326497Sache			error = EAFNOSUPPORT;
40426497Sache			break;
40526497Sache		}
40626497Sache		inp->inp_faddr = addr->sin_addr;
40726497Sache		soisconnected(so);
40826497Sache		break;
40926497Sache	    }
41026497Sache
41126497Sache	case PRU_CONNECT2:
41226497Sache		error = EOPNOTSUPP;
41347558Sache		break;
41447558Sache
41547558Sache	/*
41647558Sache	 * Mark the connection as being incapable of further input.
41747558Sache	 */
41847558Sache	case PRU_SHUTDOWN:
41947558Sache		socantsendmore(so);
42047558Sache		break;
42126497Sache
42226497Sache	/*
42326497Sache	 * Ship a packet out.  The appropriate raw output
42426497Sache	 * routine handles any massaging necessary.
42526497Sache	 */
42626497Sache	case PRU_SEND:
42726497Sache	    {
42826497Sache		register u_long dst;
42926497Sache
43026497Sache		if (so->so_state & SS_ISCONNECTED) {
43126497Sache			if (nam) {
43226497Sache				error = EISCONN;
43326497Sache				break;
43426497Sache			}
43526497Sache			dst = inp->inp_faddr.s_addr;
43626497Sache		} else {
43726497Sache			if (nam == NULL) {
43826497Sache				error = ENOTCONN;
43926497Sache				break;
44026497Sache			}
44126497Sache			dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
44226497Sache		}
44326497Sache		error = rip_output(m, so, dst);
44426497Sache		m = NULL;
44526497Sache		break;
44626497Sache	    }
44726497Sache
44826497Sache	case PRU_SENSE:
44926497Sache		/*
45026497Sache		 * stat: don't bother with a blocksize.
45126497Sache		 */
45226497Sache		return (0);
45326497Sache
45426497Sache	/*
45526497Sache	 * Not supported.
45626497Sache	 */
45726497Sache	case PRU_RCVOOB:
45826497Sache	case PRU_RCVD:
45926497Sache	case PRU_LISTEN:
46026497Sache	case PRU_ACCEPT:
46126497Sache	case PRU_SENDOOB:
46226497Sache		error = EOPNOTSUPP;
46326497Sache		break;
46426497Sache
46526497Sache	case PRU_SOCKADDR:
46626497Sache		in_setsockaddr(inp, nam);
46726497Sache		break;
46826497Sache
46926497Sache	case PRU_PEERADDR:
47026497Sache		in_setpeeraddr(inp, nam);
47147558Sache		break;
47247558Sache
47347558Sache	default:
47447558Sache		panic("rip_usrreq");
47547558Sache	}
47647558Sache	if (m != NULL)
47747558Sache		m_freem(m);
47847558Sache	return (error);
47926497Sache}
48026497Sache