ip_divert.c revision 36903
1193323Sed/* 2193323Sed * Copyright (c) 1982, 1986, 1988, 1993 3193323Sed * The Regents of the University of California. All rights reserved. 4193323Sed * 5193323Sed * Redistribution and use in source and binary forms, with or without 6193323Sed * modification, are permitted provided that the following conditions 7193323Sed * are met: 8193323Sed * 1. Redistributions of source code must retain the above copyright 9193323Sed * notice, this list of conditions and the following disclaimer. 10193323Sed * 2. Redistributions in binary form must reproduce the above copyright 11193323Sed * notice, this list of conditions and the following disclaimer in the 12193323Sed * documentation and/or other materials provided with the distribution. 13193323Sed * 3. All advertising materials mentioning features or use of this software 14193323Sed * must display the following acknowledgement: 15193323Sed * This product includes software developed by the University of 16193323Sed * California, Berkeley and its contributors. 17193323Sed * 4. Neither the name of the University nor the names of its contributors 18193323Sed * may be used to endorse or promote products derived from this software 19193323Sed * without specific prior written permission. 20193323Sed * 21193323Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24193323Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25193323Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29193323Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30193323Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31193323Sed * SUCH DAMAGE. 32193323Sed * 33193323Sed * $Id: ip_divert.c,v 1.29 1998/06/06 20:45:25 julian Exp $ 34193323Sed */ 35193323Sed 36193323Sed#include "opt_inet.h" 37193323Sed#include "opt_ipfw.h" 38193323Sed 39210299Sed#ifndef INET 40193323Sed#error "IPDIVERT requires INET." 41249423Sdim#endif 42249423Sdim 43249423Sdim#include <sys/param.h> 44249423Sdim#include <sys/malloc.h> 45249423Sdim#include <sys/mbuf.h> 46249423Sdim#include <sys/socket.h> 47249423Sdim#include <sys/protosw.h> 48193323Sed#include <sys/socketvar.h> 49193323Sed#include <sys/systm.h> 50249423Sdim#include <sys/proc.h> 51249423Sdim 52193323Sed#include <vm/vm_zone.h> 53193323Sed 54193323Sed#include <net/if.h> 55193323Sed#include <net/route.h> 56193323Sed 57193323Sed#include <netinet/in.h> 58193323Sed#include <netinet/in_systm.h> 59193323Sed#include <netinet/ip.h> 60193323Sed#include <netinet/in_pcb.h> 61193323Sed#include <netinet/in_var.h> 62193323Sed#include <netinet/ip_var.h> 63198892Srdivacky 64263508Sdim/* 65263508Sdim * Divert sockets 66193323Sed */ 67193323Sed 68193323Sed/* 69193323Sed * Allocate enough space to hold a full IP packet 70224145Sdim */ 71193323Sed#define DIVSNDQ (65536 + 100) 72207618Srdivacky#define DIVRCVQ (65536 + 100) 73207618Srdivacky 74193323Sed/* Global variables */ 75193323Sed 76193323Sed/* 77263508Sdim * ip_input() and ip_output() set this secret value before calling us to 78207618Srdivacky * let us know which divert port to divert a packet to; this is done so 79263508Sdim * we can use the existing prototype for struct protosw's pr_input(). 80263508Sdim * This is stored in host order. 81218893Sdim */ 82218893Sdimu_short ip_divert_port; 83193323Sed 84193323Sed/* 85193323Sed * #ifdef IPFW_DIVERT_OLDRESTART 86193323Sed * We set this value to a non-zero port number when we want the call to 87193323Sed * ip_fw_chk() in ip_input() or ip_output() to ignore ``divert <port>'' 88212904Sdim * chain entries. This is stored in host order. 89193323Sed * #else 90193323Sed * A 16 bit cookie is passed to the user process. 91193323Sed * The user process can send it back to help the caller know something 92193323Sed * about where the packet came from. 93193323Sed * 94210299Sed * If IPFW is the caller then the cookie is the rule that sent 95193323Sed * us here. On reinjection is is the rule after which processing 96207618Srdivacky * should continue. Leaving it the same will make processing start 97207618Srdivacky * at the rule number after that which sent it here. Setting it to 98193323Sed * 0 will restart processing at the beginning. 99193323Sed * #endif 100193323Sed */ 101193323Sedu_int16_t ip_divert_cookie; 102193323Sed 103212904Sdim/* Internal variables */ 104212904Sdim 105218893Sdimstatic struct inpcbhead divcb; 106193323Sedstatic struct inpcbinfo divcbinfo; 107212904Sdim 108193323Sedstatic u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ 109193323Sedstatic u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ 110263508Sdim 111207618Srdivacky/* Optimization: have this preinitialized */ 112263508Sdimstatic struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET }; 113207618Srdivacky 114193323Sed/* Internal functions */ 115193323Sed 116193323Sedstatic int div_output(struct socket *so, 117193323Sed struct mbuf *m, struct sockaddr *addr, struct mbuf *control); 118226633Sdim 119207618Srdivacky/* 120193323Sed * Initialize divert connection block queue. 121263508Sdim */ 122193323Sedvoid 123193323Seddiv_init(void) 124224145Sdim{ 125193323Sed LIST_INIT(&divcb); 126226633Sdim divcbinfo.listhead = &divcb; 127224145Sdim /* 128224145Sdim * XXX We don't use the hash list for divert IP, but it's easier 129193323Sed * to allocate a one entry hash list than it is to check all 130226633Sdim * over the place for hashbase == NULL. 131193323Sed */ 132193323Sed divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); 133193323Sed divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); 134193323Sed divcbinfo.ipi_zone = zinit("divcb", sizeof(struct inpcb), 135198090Srdivacky maxsockets, ZONE_INTERRUPT, 0); 136193323Sed} 137193323Sed 138198090Srdivacky/* 139193323Sed * Setup generic address and protocol structures 140193323Sed * for div_input routine, then pass them along with 141218893Sdim * mbuf chain. ip->ip_len is assumed to have had 142218893Sdim * the header length (hlen) subtracted out already. 143218893Sdim * We tell whether the packet was incoming or outgoing 144218893Sdim * by seeing if hlen == 0, which is a hack. 145218893Sdim */ 146218893Sdimvoid 147193323Seddiv_input(struct mbuf *m, int hlen) 148193323Sed{ 149193323Sed struct ip *ip; 150193323Sed struct inpcb *inp; 151218893Sdim struct socket *sa; 152218893Sdim 153218893Sdim /* Sanity check */ 154218893Sdim if (ip_divert_port == 0) 155193323Sed panic("div_input: port is 0"); 156193323Sed 157193323Sed /* Assure header */ 158207618Srdivacky if (m->m_len < sizeof(struct ip) && 159207618Srdivacky (m = m_pullup(m, sizeof(struct ip))) == 0) { 160193323Sed return; 161193323Sed } 162193323Sed ip = mtod(m, struct ip *); 163198090Srdivacky 164198090Srdivacky /* Record divert port */ 165193323Sed#ifdef IPFW_DIVERT_OLDRESTART 166193323Sed divsrc.sin_port = htons(ip_divert_cookie); 167193323Sed#else 168193323Sed divsrc.sin_port = ip_divert_cookie; 169193323Sed#endif /* IPFW_DIVERT_OLDRESTART */ 170193323Sed ip_divert_cookie = 0; 171193323Sed 172210299Sed /* Restore packet header fields */ 173193323Sed ip->ip_len += hlen; 174193323Sed HTONS(ip->ip_len); 175224145Sdim HTONS(ip->ip_off); 176193323Sed 177193323Sed /* Record receive interface address, if any */ 178193323Sed divsrc.sin_addr.s_addr = 0; 179218893Sdim if (hlen) { 180193323Sed struct ifaddr *ifa; 181193323Sed 182193323Sed#ifdef DIAGNOSTIC 183193323Sed /* Sanity check */ 184193323Sed if (!(m->m_flags & M_PKTHDR)) 185193323Sed panic("div_input: no pkt hdr"); 186193323Sed#endif 187193323Sed 188193323Sed /* More fields affected by ip_input() */ 189193323Sed HTONS(ip->ip_id); 190193323Sed 191193323Sed /* Find IP address for receive interface */ 192193323Sed for (ifa = m->m_pkthdr.rcvif->if_addrhead.tqh_first; 193193323Sed ifa != NULL; ifa = ifa->ifa_link.tqe_next) { 194193323Sed if (ifa->ifa_addr == NULL) 195193323Sed continue; 196193323Sed if (ifa->ifa_addr->sa_family != AF_INET) 197193323Sed continue; 198193323Sed divsrc.sin_addr = 199193323Sed ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr; 200207618Srdivacky break; 201193323Sed } 202198090Srdivacky } 203198090Srdivacky if (m->m_pkthdr.rcvif) { 204193323Sed char name[32]; 205193323Sed /* 206193323Sed * Hide the actual interface name in there in the 207193323Sed * sin_zero array. XXX This needs to be moved to a 208193323Sed * different sockaddr type for divert, e.g. 209193323Sed * sockaddr_div with multiple fields like 210193323Sed * sockaddr_dl. Presently we have only 7 bytes 211193323Sed * but that will do for now as most interfaces 212193323Sed * are 4 or less + 2 or less bytes for unit. 213193323Sed * There is probably a faster way of doing this, 214193323Sed * possibly taking it from the sockaddr_dl on the iface. 215193323Sed * This solves the problem of a P2P link and a LAN interface 216193323Sed * having the same address, which can result in the wrong 217193323Sed * interface being assigned to the packet when fed back 218193323Sed * into the divert socket. Theoretically if the daemon saves 219207618Srdivacky * and re-uses the sockaddr_in as suggested in the man pages, 220207618Srdivacky * this iface name will come along for the ride. 221207618Srdivacky * (see div_output for the other half of this.) 222207618Srdivacky */ 223207618Srdivacky sprintf(name, "%s%d", 224193323Sed m->m_pkthdr.rcvif->if_name, m->m_pkthdr.rcvif->if_unit); 225226633Sdim strncpy(divsrc.sin_zero, name, 7); 226193323Sed } 227198090Srdivacky 228198090Srdivacky /* Put packet on socket queue, if any */ 229193323Sed sa = NULL; 230226633Sdim for (inp = divcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) { 231226633Sdim if (inp->inp_lport == htons(ip_divert_port)) 232226633Sdim sa = inp->inp_socket; 233207618Srdivacky } 234207618Srdivacky if (sa) { 235193323Sed if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, 236193323Sed m, (struct mbuf *)0) == 0) 237193323Sed m_freem(m); 238193323Sed else 239210299Sed sorwakeup(sa); 240193323Sed } else { 241224145Sdim m_freem(m); 242193323Sed ipstat.ips_noproto++; 243193323Sed ipstat.ips_delivered--; 244193323Sed } 245218893Sdim} 246193323Sed 247193323Sed/* 248193323Sed * Deliver packet back into the IP processing machinery. 249193323Sed * 250193323Sed * If no address specified, or address is 0.0.0.0, send to ip_output(); 251193323Sed * otherwise, send to ip_input() and mark as having been received on 252193323Sed * the interface with that address. 253193323Sed * 254193323Sed * If no address specified, or dest port is 0, allow packet to divert 255193323Sed * back to this socket; otherwise, don't. 256193323Sed */ 257193323Sedstatic int 258193323Seddiv_output(so, m, addr, control) 259193323Sed struct socket *so; 260193323Sed register struct mbuf *m; 261193323Sed struct sockaddr *addr; 262193323Sed struct mbuf *control; 263193323Sed{ 264193323Sed register struct inpcb *const inp = sotoinpcb(so); 265193323Sed register struct ip *const ip = mtod(m, struct ip *); 266193323Sed struct sockaddr_in *sin = (struct sockaddr_in *)addr; 267193323Sed int error = 0; 268210299Sed 269193323Sed if (control) 270193323Sed m_freem(control); /* XXX */ 271193323Sed 272193323Sed /* Loopback avoidance and state recovery */ 273193323Sed if (sin) { 274193323Sed int len = 0; 275193323Sed char *c = sin->sin_zero; 276193323Sed#ifdef IPFW_DIVERT_OLDRESTART 277193323Sed ip_divert_cookie = ntohs(sin->sin_port); 278193323Sed#else 279193323Sed ip_divert_cookie = sin->sin_port; 280193323Sed#endif /* IPFW_DIVERT_OLDRESTART */ 281193323Sed 282193323Sed /* 283193323Sed * Find receive interface with the given name or IP address. 284193323Sed * The name is user supplied data so don't trust it's size or 285193323Sed * that it is zero terminated. The name has priority. 286193323Sed * We are presently assuming that the sockaddr_in 287193323Sed * has not been replaced by a sockaddr_div, so we limit it 288193323Sed * to 16 bytes in total. the name is stuffed (if it exists) 289193323Sed * in the sin_zero[] field. 290226633Sdim */ 291210299Sed while (*c++ && (len++ < sizeof(sin->sin_zero))); 292210299Sed if ((len > 0) && (len < sizeof(sin->sin_zero))) 293210299Sed m->m_pkthdr.rcvif = ifunit(sin->sin_zero); 294210299Sed } else { 295210299Sed ip_divert_cookie = 0; 296210299Sed } 297210299Sed 298210299Sed /* Reinject packet into the system as incoming or outgoing */ 299210299Sed if (!sin || sin->sin_addr.s_addr == 0) { 300210299Sed /* 301210299Sed * Don't allow both user specified and setsockopt options, 302210299Sed * and don't allow packet length sizes that will crash 303210299Sed */ 304210299Sed if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || 305210299Sed ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) { 306210299Sed error = EINVAL; 307210299Sed goto cantsend; 308210299Sed } 309210299Sed 310210299Sed /* Convert fields to host order for ip_output() */ 311210299Sed NTOHS(ip->ip_len); 312210299Sed NTOHS(ip->ip_off); 313210299Sed 314210299Sed /* Send packet to output processing */ 315210299Sed ipstat.ips_rawout++; /* XXX */ 316210299Sed error = ip_output(m, inp->inp_options, &inp->inp_route, 317193323Sed (so->so_options & SO_DONTROUTE) | 318193323Sed IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions); 319193323Sed } else { 320193323Sed struct ifaddr *ifa; 321193323Sed 322193323Sed /* If no luck with the name above. check by IP address. */ 323193323Sed if (m->m_pkthdr.rcvif == NULL) { 324193323Sed if (!(ifa = ifa_ifwithaddr((struct sockaddr *) sin))) { 325193323Sed error = EADDRNOTAVAIL; 326193323Sed goto cantsend; 327193323Sed } 328193323Sed m->m_pkthdr.rcvif = ifa->ifa_ifp; 329193323Sed } 330193323Sed 331193323Sed /* Send packet to input processing */ 332193323Sed ip_input(m); 333193323Sed } 334193323Sed 335193323Sed /* paranoid: Reset for next time (and other packets) */ 336193323Sed /* almost definitly already done in the ipfw filter but.. */ 337193323Sed ip_divert_cookie = 0; 338210299Sed return error; 339193323Sed 340193323Sedcantsend: 341193323Sed ip_divert_cookie = 0; 342193323Sed m_freem(m); 343193323Sed return error; 344193323Sed} 345193323Sed 346193323Sedstatic int 347193323Seddiv_attach(struct socket *so, int proto, struct proc *p) 348193323Sed{ 349193323Sed struct inpcb *inp; 350193323Sed int error, s; 351193323Sed 352193323Sed inp = sotoinpcb(so); 353193323Sed if (inp) 354193323Sed panic("div_attach"); 355193323Sed if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0) 356193323Sed return error; 357193323Sed 358193323Sed s = splnet(); 359193323Sed error = in_pcballoc(so, &divcbinfo, p); 360193323Sed splx(s); 361193323Sed if (error) 362193323Sed return error; 363193323Sed error = soreserve(so, div_sendspace, div_recvspace); 364193323Sed if (error) 365193323Sed return error; 366193323Sed inp = (struct inpcb *)so->so_pcb; 367193323Sed inp->inp_ip_p = proto; 368193323Sed inp->inp_flags |= INP_HDRINCL; 369193323Sed /* The socket is always "connected" because 370193323Sed we always know "where" to send the packet */ 371193323Sed so->so_state |= SS_ISCONNECTED; 372193323Sed return 0; 373193323Sed} 374193323Sed 375193323Sedstatic int 376193323Seddiv_detach(struct socket *so) 377193323Sed{ 378193323Sed struct inpcb *inp; 379193323Sed 380193323Sed inp = sotoinpcb(so); 381193323Sed if (inp == 0) 382193323Sed panic("div_detach"); 383193323Sed in_pcbdetach(inp); 384193323Sed return 0; 385193323Sed} 386193323Sed 387210299Sedstatic int 388210299Seddiv_abort(struct socket *so) 389226633Sdim{ 390193323Sed soisdisconnected(so); 391193323Sed return div_detach(so); 392193323Sed} 393193323Sed 394193323Sedstatic int 395193323Seddiv_disconnect(struct socket *so) 396193323Sed{ 397193323Sed if ((so->so_state & SS_ISCONNECTED) == 0) 398193323Sed return ENOTCONN; 399193323Sed return div_abort(so); 400234353Sdim} 401193323Sed 402193323Sedstatic int 403193323Seddiv_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 404193323Sed{ 405193323Sed struct inpcb *inp; 406193323Sed int s; 407193323Sed int error; 408193323Sed 409193323Sed s = splnet(); 410193323Sed inp = sotoinpcb(so); 411193323Sed error = in_pcbbind(inp, nam, p); 412193323Sed splx(s); 413193323Sed return 0; 414193323Sed} 415193323Sed 416193323Sedstatic int 417193323Seddiv_shutdown(struct socket *so) 418193323Sed{ 419193323Sed socantsendmore(so); 420193323Sed return 0; 421193323Sed} 422193323Sed 423193323Sedstatic int 424193323Seddiv_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 425193323Sed struct mbuf *control, struct proc *p) 426193323Sed{ 427193323Sed /* Packet must have a header (but that's about it) */ 428193323Sed if (m->m_len < sizeof (struct ip) || 429263508Sdim (m = m_pullup(m, sizeof (struct ip))) == 0) { 430193323Sed ipstat.ips_toosmall++; 431193323Sed m_freem(m); 432198090Srdivacky return EINVAL; 433198090Srdivacky } 434193323Sed 435210299Sed /* Send packet */ 436210299Sed return div_output(so, m, nam, control); 437226633Sdim} 438210299Sed 439193323Sedstruct pr_usrreqs div_usrreqs = { 440193323Sed div_abort, pru_accept_notsupp, div_attach, div_bind, 441193323Sed pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach, 442193323Sed div_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, 443193323Sed pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown, 444193323Sed in_setsockaddr, sosend, soreceive, sopoll 445193323Sed}; 446193323Sed