ip_output.c revision 196234
133965Sjdp/*- 2104834Sobrien * Copyright (c) 1982, 1986, 1988, 1990, 1993 3218822Sdim * The Regents of the University of California. All rights reserved. 433965Sjdp * 533965Sjdp * Redistribution and use in source and binary forms, with or without 633965Sjdp * modification, are permitted provided that the following conditions 733965Sjdp * are met: 860484Sobrien * 1. Redistributions of source code must retain the above copyright 933965Sjdp * notice, this list of conditions and the following disclaimer. 1033965Sjdp * 2. Redistributions in binary form must reproduce the above copyright 1133965Sjdp * notice, this list of conditions and the following disclaimer in the 1233965Sjdp * documentation and/or other materials provided with the distribution. 1333965Sjdp * 4. Neither the name of the University nor the names of its contributors 1433965Sjdp * may be used to endorse or promote products derived from this software 1533965Sjdp * without specific prior written permission. 1633965Sjdp * 1733965Sjdp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1833965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1933965Sjdp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2033965Sjdp * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2133965Sjdp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2233965Sjdp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2333965Sjdp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24218822Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25218822Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2633965Sjdp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27218822Sdim * SUCH DAMAGE. 28218822Sdim * 29218822Sdim * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 3033965Sjdp */ 31218822Sdim 32218822Sdim#include <sys/cdefs.h> 33218822Sdim__FBSDID("$FreeBSD: head/sys/netinet/ip_output.c 196234 2009-08-14 23:44:59Z qingli $"); 34218822Sdim 35218822Sdim#include "opt_ipfw.h" 36218822Sdim#include "opt_ipsec.h" 37218822Sdim#include "opt_route.h" 38218822Sdim#include "opt_mbuf_stress_test.h" 39218822Sdim#include "opt_mpath.h" 40218822Sdim#include "opt_sctp.h" 41218822Sdim 42218822Sdim#include <sys/param.h> 43218822Sdim#include <sys/systm.h> 44218822Sdim#include <sys/kernel.h> 45218822Sdim#include <sys/malloc.h> 46218822Sdim#include <sys/mbuf.h> 47218822Sdim#include <sys/priv.h> 48218822Sdim#include <sys/proc.h> 49218822Sdim#include <sys/protosw.h> 50218822Sdim#include <sys/socket.h> 5133965Sjdp#include <sys/socketvar.h> 5233965Sjdp#include <sys/sysctl.h> 5360484Sobrien#include <sys/ucred.h> 5460484Sobrien 5533965Sjdp#include <net/if.h> 5633965Sjdp#include <net/if_llatbl.h> 5733965Sjdp#include <net/netisr.h> 5833965Sjdp#include <net/pfil.h> 5933965Sjdp#include <net/route.h> 6077298Sobrien#include <net/flowtable.h> 6133965Sjdp#ifdef RADIX_MPATH 6233965Sjdp#include <net/radix_mpath.h> 63218822Sdim#endif 6489857Sobrien#include <net/vnet.h> 6533965Sjdp 66130561Sobrien#include <netinet/in.h> 6733965Sjdp#include <netinet/in_systm.h> 68218822Sdim#include <netinet/ip.h> 69218822Sdim#include <netinet/in_pcb.h> 70218822Sdim#include <netinet/in_var.h> 71218822Sdim#include <netinet/ip_var.h> 7260484Sobrien#include <netinet/ip_options.h> 73130561Sobrien#ifdef SCTP 74130561Sobrien#include <netinet/sctp.h> 7533965Sjdp#include <netinet/sctp_crc32.h> 7633965Sjdp#endif 7733965Sjdp 7833965Sjdp#ifdef IPSEC 79130561Sobrien#include <netinet/ip_ipsec.h> 80130561Sobrien#include <netipsec/ipsec.h> 8133965Sjdp#endif /* IPSEC*/ 8233965Sjdp 8333965Sjdp#include <machine/in_cksum.h> 8489857Sobrien 8533965Sjdp#include <security/mac/mac_framework.h> 8633965Sjdp 87130561Sobrien#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 88130561Sobrien x, (ntohl(a.s_addr)>>24)&0xFF,\ 89130561Sobrien (ntohl(a.s_addr)>>16)&0xFF,\ 90130561Sobrien (ntohl(a.s_addr)>>8)&0xFF,\ 91130561Sobrien (ntohl(a.s_addr))&0xFF, y); 92130561Sobrien 93130561SobrienVNET_DEFINE(u_short, ip_id); 94130561Sobrien 95130561Sobrien#ifdef MBUF_STRESS_TEST 96130561Sobrienint mbuf_frag_size = 0; 9733965SjdpSYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 98130561Sobrien &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 99130561Sobrien#endif 10033965Sjdp 101218822Sdimstatic void ip_mloopback 102218822Sdim (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 10333965Sjdp 10433965Sjdp 10533965Sjdpextern int in_mcast_loop; 10677298Sobrienextern struct protosw inetsw[]; 10733965Sjdp 108218822Sdim/* 109218822Sdim * IP output. The packet in mbuf chain m contains a skeletal IP 11033965Sjdp * header (with len, off, ttl, proto, tos, src, dst). 111104834Sobrien * The mbuf chain containing the packet will be freed. 112218822Sdim * The mbuf opt, if present, will not be freed. 113218822Sdim * In the IP forwarding case, the packet will arrive with options already 114218822Sdim * inserted, so must have a NULL opt pointer. 115218822Sdim */ 116218822Sdimint 117218822Sdimip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 118218822Sdim struct ip_moptions *imo, struct inpcb *inp) 119218822Sdim{ 120218822Sdim struct ip *ip; 121218822Sdim struct ifnet *ifp = NULL; /* keep compiler happy */ 122218822Sdim struct mbuf *m0; 123218822Sdim int hlen = sizeof (struct ip); 124218822Sdim int mtu; 125218822Sdim int len, error = 0; 126218822Sdim int nortfree = 0; 127218822Sdim struct sockaddr_in *dst = NULL; /* keep compiler happy */ 128218822Sdim struct in_ifaddr *ia = NULL; 129218822Sdim int isbroadcast, sw_csum; 130218822Sdim struct route iproute; 131218822Sdim struct in_addr odst; 132218822Sdim#ifdef IPFIREWALL_FORWARD 133218822Sdim struct m_tag *fwd_tag = NULL; 134218822Sdim#endif 13560484Sobrien#ifdef IPSEC 13689857Sobrien int no_route_but_check_spd = 0; 13789857Sobrien#endif 13889857Sobrien M_ASSERTPKTHDR(m); 13989857Sobrien 140104834Sobrien if (inp != NULL) { 141104834Sobrien INP_LOCK_ASSERT(inp); 142218822Sdim M_SETFIB(m, inp->inp_inc.inc_fibnum); 14360484Sobrien if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) { 144104834Sobrien m->m_pkthdr.flowid = inp->inp_flowid; 14589857Sobrien m->m_flags |= M_FLOWID; 146130561Sobrien } 14733965Sjdp } 14833965Sjdp 14933965Sjdp if (ro == NULL) { 15033965Sjdp ro = &iproute; 15133965Sjdp bzero(ro, sizeof (*ro)); 152130561Sobrien 15333965Sjdp#ifdef FLOWTABLE 15433965Sjdp /* 15533965Sjdp * The flow table returns route entries valid for up to 30 15633965Sjdp * seconds; we rely on the remainder of ip_output() taking no 15733965Sjdp * longer than that long for the stability of ro_rt. The 15838889Sjdp * flow ID assignment must have happened before this point. 15938889Sjdp */ 16033965Sjdp if (flowtable_lookup(V_ip_ft, m, ro) == 0) 16138889Sjdp nortfree = 1; 16238889Sjdp#endif 16377298Sobrien } 16438889Sjdp 16577298Sobrien if (opt) { 16638889Sjdp len = 0; 16777298Sobrien m = ip_insertoptions(m, opt, &len); 16877298Sobrien if (len != 0) 16977298Sobrien hlen = len; 17077298Sobrien } 17138889Sjdp ip = mtod(m, struct ip *); 17233965Sjdp 17333965Sjdp /* 17433965Sjdp * Fill in IP header. If we are not allowing fragmentation, 17533965Sjdp * then the ip_id field is meaningless, but we don't set it 17633965Sjdp * to zero. Doing so causes various problems when devices along 17733965Sjdp * the path (routers, load balancers, firewalls, etc.) illegally 17877298Sobrien * disable DF on our packet. Note that a 16-bit counter 17977298Sobrien * will wrap around in less than 10 seconds at 100 Mbit/s on a 18033965Sjdp * medium with MTU 1500. See Steven M. Bellovin, "A Technique 18177298Sobrien * for Counting NATted Hosts", Proc. IMW'02, available at 18277298Sobrien * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>. 18333965Sjdp */ 18433965Sjdp if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 18533965Sjdp ip->ip_v = IPVERSION; 186130561Sobrien ip->ip_hl = hlen >> 2; 18733965Sjdp ip->ip_id = ip_newid(); 18877298Sobrien IPSTAT_INC(ips_localout); 18933965Sjdp } else { 19033965Sjdp hlen = ip->ip_hl << 2; 19177298Sobrien } 19233965Sjdp 19333965Sjdp dst = (struct sockaddr_in *)&ro->ro_dst; 19477298Sobrienagain: 19533965Sjdp /* 19633965Sjdp * If there is a cached route, 19777298Sobrien * check that it is to the same destination 19833965Sjdp * and is still up. If not, free it and try again. 19977298Sobrien * The address family should also be checked in case of sharing the 20077298Sobrien * cache with IPv6. 20133965Sjdp */ 20233965Sjdp if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 20333965Sjdp dst->sin_family != AF_INET || 20477298Sobrien dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 20533965Sjdp if (!nortfree) { 20633965Sjdp RTFREE(ro->ro_rt); 20733965Sjdp LLE_FREE(ro->ro_lle); 20833965Sjdp } 20933965Sjdp ro->ro_rt = (struct rtentry *)NULL; 21033965Sjdp ro->ro_lle = (struct llentry *)NULL; 21133965Sjdp } 21233965Sjdp#ifdef IPFIREWALL_FORWARD 21333965Sjdp if (ro->ro_rt == NULL && fwd_tag == NULL) { 21433965Sjdp#else 21533965Sjdp if (ro->ro_rt == NULL) { 21633965Sjdp#endif 21733965Sjdp bzero(dst, sizeof(*dst)); 21833965Sjdp dst->sin_family = AF_INET; 21933965Sjdp dst->sin_len = sizeof(*dst); 22033965Sjdp dst->sin_addr = ip->ip_dst; 22133965Sjdp } 22233965Sjdp /* 22333965Sjdp * If routing to interface only, short circuit routing lookup. 22433965Sjdp * The use of an all-ones broadcast address implies this; an 22533965Sjdp * interface is specified by the broadcast address of an interface, 22633965Sjdp * or the destination address of a ptp interface. 22733965Sjdp */ 22833965Sjdp if (flags & IP_SENDONES) { 22933965Sjdp if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL && 23033965Sjdp (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) { 23138889Sjdp IPSTAT_INC(ips_noroute); 23233965Sjdp error = ENETUNREACH; 23338889Sjdp goto bad; 23433965Sjdp } 23533965Sjdp ip->ip_dst.s_addr = INADDR_BROADCAST; 236130561Sobrien dst->sin_addr = ip->ip_dst; 237130561Sobrien ifp = ia->ia_ifp; 23833965Sjdp ip->ip_ttl = 1; 239130561Sobrien isbroadcast = 1; 24077298Sobrien } else if (flags & IP_ROUTETOIF) { 241130561Sobrien if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 24260484Sobrien (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 24377298Sobrien IPSTAT_INC(ips_noroute); 24477298Sobrien error = ENETUNREACH; 245130561Sobrien goto bad; 246130561Sobrien } 24760484Sobrien ifp = ia->ia_ifp; 248130561Sobrien ip->ip_ttl = 1; 249130561Sobrien isbroadcast = in_broadcast(dst->sin_addr, ifp); 25033965Sjdp } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 25177298Sobrien imo != NULL && imo->imo_multicast_ifp != NULL) { 25233965Sjdp /* 25333965Sjdp * Bypass the normal routing lookup for multicast 25433965Sjdp * packets if the interface is specified. 25533965Sjdp */ 25633965Sjdp ifp = imo->imo_multicast_ifp; 25733965Sjdp IFP_TO_IA(ifp, ia); 25833965Sjdp isbroadcast = 0; /* fool gcc */ 25933965Sjdp } else { 26033965Sjdp /* 26133965Sjdp * We want to do any cloning requested by the link layer, 26233965Sjdp * as this is probably required in all cases for correct 26333965Sjdp * operation (as it is for ARP). 26433965Sjdp */ 26533965Sjdp if (ro->ro_rt == NULL) 26633965Sjdp#ifdef RADIX_MPATH 26733965Sjdp rtalloc_mpath_fib(ro, 26833965Sjdp ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 26933965Sjdp inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); 27033965Sjdp#else 27133965Sjdp in_rtalloc_ign(ro, 0, 27233965Sjdp inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); 273130561Sobrien#endif 27433965Sjdp if (ro->ro_rt == NULL) { 27533965Sjdp#ifdef IPSEC 27633965Sjdp /* 277130561Sobrien * There is no route for this packet, but it is 27833965Sjdp * possible that a matching SPD entry exists. 27933965Sjdp */ 280130561Sobrien no_route_but_check_spd = 1; 28133965Sjdp mtu = 0; /* Silence GCC warning. */ 28233965Sjdp goto sendit; 28333965Sjdp#endif 284130561Sobrien IPSTAT_INC(ips_noroute); 28533965Sjdp error = EHOSTUNREACH; 28633965Sjdp goto bad; 28733965Sjdp } 28833965Sjdp ia = ifatoia(ro->ro_rt->rt_ifa); 28933965Sjdp ifa_ref(&ia->ia_ifa); 29033965Sjdp ifp = ro->ro_rt->rt_ifp; 291130561Sobrien ro->ro_rt->rt_rmx.rmx_pksent++; 29233965Sjdp if (ro->ro_rt->rt_flags & RTF_GATEWAY) 29333965Sjdp dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 294130561Sobrien if (ro->ro_rt->rt_flags & RTF_HOST) 29533965Sjdp isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 29633965Sjdp else 297130561Sobrien isbroadcast = in_broadcast(dst->sin_addr, ifp); 29833965Sjdp } 29933965Sjdp /* 30033965Sjdp * Calculate MTU. If we have a route that is up, use that, 30133965Sjdp * otherwise use the interface's MTU. 30233965Sjdp */ 30333965Sjdp if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) { 30433965Sjdp /* 30533965Sjdp * This case can happen if the user changed the MTU 30633965Sjdp * of an interface after enabling IP on it. Because 30733965Sjdp * most netifs don't keep track of routes pointing to 30877298Sobrien * them, there is no way for one to update all its 30933965Sjdp * routes when the MTU is changed. 31033965Sjdp */ 31133965Sjdp if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu) 31233965Sjdp ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 31333965Sjdp mtu = ro->ro_rt->rt_rmx.rmx_mtu; 31433965Sjdp } else { 31533965Sjdp mtu = ifp->if_mtu; 31633965Sjdp } 31733965Sjdp if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 31833965Sjdp m->m_flags |= M_MCAST; 319130561Sobrien /* 32033965Sjdp * IP destination address is multicast. Make sure "dst" 32133965Sjdp * still points to the address in "ro". (It may have been 32233965Sjdp * changed to point to a gateway address, above.) 32333965Sjdp */ 324130561Sobrien dst = (struct sockaddr_in *)&ro->ro_dst; 32533965Sjdp /* 32633965Sjdp * See if the caller provided any multicast options 327130561Sobrien */ 32833965Sjdp if (imo != NULL) { 32933965Sjdp ip->ip_ttl = imo->imo_multicast_ttl; 330130561Sobrien if (imo->imo_multicast_vif != -1) 33133965Sjdp ip->ip_src.s_addr = 33233965Sjdp ip_mcast_src ? 333130561Sobrien ip_mcast_src(imo->imo_multicast_vif) : 33433965Sjdp INADDR_ANY; 33533965Sjdp } else 33633965Sjdp ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 33733965Sjdp /* 33833965Sjdp * Confirm that the outgoing interface supports multicast. 33933965Sjdp */ 34033965Sjdp if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 34133965Sjdp if ((ifp->if_flags & IFF_MULTICAST) == 0) { 34233965Sjdp IPSTAT_INC(ips_noroute); 34333965Sjdp error = ENETUNREACH; 34433965Sjdp goto bad; 34533965Sjdp } 34633965Sjdp } 34733965Sjdp /* 34833965Sjdp * If source address not specified yet, use address 34933965Sjdp * of outgoing interface. 35033965Sjdp */ 35133965Sjdp if (ip->ip_src.s_addr == INADDR_ANY) { 35233965Sjdp /* Interface may have no addresses. */ 35333965Sjdp if (ia != NULL) 35433965Sjdp ip->ip_src = IA_SIN(ia)->sin_addr; 35533965Sjdp } 35660484Sobrien 35760484Sobrien if ((imo == NULL && in_mcast_loop) || 35877298Sobrien (imo && imo->imo_multicast_loop)) { 35960484Sobrien /* 36060484Sobrien * Loop back multicast datagram if not expressly 36138889Sjdp * forbidden to do so, even if we are not a member 36238889Sjdp * of the group; ip_input() will filter it later, 36338889Sjdp * thus deferring a hash lookup and mutex acquisition 36438889Sjdp * at the expense of a cheap copy using m_copym(). 36538889Sjdp */ 36638889Sjdp ip_mloopback(ifp, m, dst, hlen); 36733965Sjdp } else { 36833965Sjdp /* 369218822Sdim * If we are acting as a multicast router, perform 370218822Sdim * multicast forwarding as if the packet had just 371218822Sdim * arrived on the interface to which we are about 37233965Sjdp * to send. The multicast forwarding function 373218822Sdim * recursively calls this function, using the 374218822Sdim * IP_FORWARDING flag to prevent infinite recursion. 37533965Sjdp * 37633965Sjdp * Multicasts that are looped back by ip_mloopback(), 37733965Sjdp * above, will be forwarded by the ip_input() routine, 37833965Sjdp * if necessary. 37933965Sjdp */ 38033965Sjdp if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 38133965Sjdp /* 38233965Sjdp * If rsvp daemon is not running, do not 38333965Sjdp * set ip_moptions. This ensures that the packet 38433965Sjdp * is multicast and not just sent down one link 38533965Sjdp * as prescribed by rsvpd. 38633965Sjdp */ 38733965Sjdp if (!V_rsvp_on) 388130561Sobrien imo = NULL; 38933965Sjdp if (ip_mforward && 39033965Sjdp ip_mforward(ip, ifp, m, imo) != 0) { 39133965Sjdp m_freem(m); 39260484Sobrien goto done; 39360484Sobrien } 39460484Sobrien } 39560484Sobrien } 39660484Sobrien 39760484Sobrien /* 39889857Sobrien * Multicasts with a time-to-live of zero may be looped- 39960484Sobrien * back, above, but must not be transmitted on a network. 40089857Sobrien * Also, multicasts addressed to the loopback interface 40160484Sobrien * are not sent -- the above call to ip_mloopback() will 40289857Sobrien * loop back a copy. ip_input() will drop the copy if 40389857Sobrien * this host does not belong to the destination group on 40460484Sobrien * the loopback interface. 405130561Sobrien */ 406130561Sobrien if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 407218822Sdim m_freem(m); 408218822Sdim goto done; 409218822Sdim } 410218822Sdim 411218822Sdim goto sendit; 412218822Sdim } 413130561Sobrien 414130561Sobrien /* 415130561Sobrien * If the source address is not specified yet, use the address 416218822Sdim * of the outoing interface. 417218822Sdim */ 418218822Sdim if (ip->ip_src.s_addr == INADDR_ANY) { 419218822Sdim /* Interface may have no addresses. */ 420218822Sdim if (ia != NULL) { 421218822Sdim ip->ip_src = IA_SIN(ia)->sin_addr; 422218822Sdim } 423218822Sdim } 424218822Sdim 425218822Sdim /* 426218822Sdim * Verify that we have any chance at all of being able to queue the 427218822Sdim * packet or packet fragments, unless ALTQ is enabled on the given 428218822Sdim * interface in which case packetdrop should be done by queueing. 429218822Sdim */ 430218822Sdim#ifdef ALTQ 431218822Sdim if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) && 432218822Sdim ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 433218822Sdim ifp->if_snd.ifq_maxlen)) 434218822Sdim#else 435218822Sdim if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 436218822Sdim ifp->if_snd.ifq_maxlen) 437218822Sdim#endif /* ALTQ */ 438218822Sdim { 43989857Sobrien error = ENOBUFS; 44060484Sobrien IPSTAT_INC(ips_odropped); 44189857Sobrien ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1); 44260484Sobrien goto bad; 44360484Sobrien } 44477298Sobrien 44589857Sobrien /* 44660484Sobrien * Look for broadcast address and 447218822Sdim * verify user is allowed to send 44860484Sobrien * such a packet. 44960484Sobrien */ 450130561Sobrien if (isbroadcast) { 45189857Sobrien if ((ifp->if_flags & IFF_BROADCAST) == 0) { 45260484Sobrien error = EADDRNOTAVAIL; 453218822Sdim goto bad; 454218822Sdim } 455218822Sdim if ((flags & IP_ALLOWBROADCAST) == 0) { 456218822Sdim error = EACCES; 457218822Sdim goto bad; 458218822Sdim } 459218822Sdim /* don't allow broadcast messages to be fragmented */ 460104834Sobrien if (ip->ip_len > mtu) { 461104834Sobrien error = EMSGSIZE; 462104834Sobrien goto bad; 463218822Sdim } 464218822Sdim m->m_flags |= M_BCAST; 46560484Sobrien } else { 46660484Sobrien m->m_flags &= ~M_BCAST; 467218822Sdim } 46860484Sobrien 46989857Sobriensendit: 47089857Sobrien#ifdef IPSEC 47189857Sobrien switch(ip_ipsec_output(&m, inp, &flags, &error, &ifp)) { 47260484Sobrien case 1: 47338889Sjdp goto bad; 47438889Sjdp case -1: 475130561Sobrien goto done; 476130561Sobrien case 0: 47738889Sjdp default: 47838889Sjdp break; /* Continue with packet processing. */ 479130561Sobrien } 480130561Sobrien /* 481130561Sobrien * Check if there was a route for this packet; return error if not. 48238889Sjdp */ 48377298Sobrien if (no_route_but_check_spd) { 48438889Sjdp IPSTAT_INC(ips_noroute); 48538889Sjdp error = EHOSTUNREACH; 48638889Sjdp goto bad; 48738889Sjdp } 48838889Sjdp /* Update variables that are affected by ipsec4_output(). */ 48938889Sjdp ip = mtod(m, struct ip *); 49038889Sjdp hlen = ip->ip_hl << 2; 49138889Sjdp#endif /* IPSEC */ 49238889Sjdp 49338889Sjdp /* Jump over all PFIL processing if hooks are not active. */ 49438889Sjdp if (!PFIL_HOOKED(&inet_pfil_hook)) 49560484Sobrien goto passout; 49660484Sobrien 497104834Sobrien /* Run through list of hooks for output packets. */ 498104834Sobrien odst.s_addr = ip->ip_dst.s_addr; 499104834Sobrien error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp); 500104834Sobrien if (error != 0 || m == NULL) 50138889Sjdp goto done; 50238889Sjdp 50338889Sjdp ip = mtod(m, struct ip *); 50438889Sjdp 50538889Sjdp /* See if destination IP address was changed by packet filter. */ 50638889Sjdp if (odst.s_addr != ip->ip_dst.s_addr) { 50738889Sjdp m->m_flags |= M_SKIP_FIREWALL; 50860484Sobrien /* If destination is now ourself drop to ip_input(). */ 509104834Sobrien if (in_localip(ip->ip_dst)) { 510104834Sobrien m->m_flags |= M_FASTFWD_OURS; 51138889Sjdp if (m->m_pkthdr.rcvif == NULL) 51238889Sjdp m->m_pkthdr.rcvif = V_loif; 51360484Sobrien if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 51438889Sjdp m->m_pkthdr.csum_flags |= 51538889Sjdp CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 516218822Sdim m->m_pkthdr.csum_data = 0xffff; 517218822Sdim } 518218822Sdim m->m_pkthdr.csum_flags |= 519218822Sdim CSUM_IP_CHECKED | CSUM_IP_VALID; 520218822Sdim#ifdef SCTP 521218822Sdim if (m->m_pkthdr.csum_flags & CSUM_SCTP) 522218822Sdim m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 523218822Sdim#endif 524218822Sdim error = netisr_queue(NETISR_IP, m); 525218822Sdim goto done; 52633965Sjdp } else 527218822Sdim goto again; /* Redo the routing table lookup. */ 528218822Sdim } 529218822Sdim 53033965Sjdp#ifdef IPFIREWALL_FORWARD 53189857Sobrien /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 532218822Sdim if (m->m_flags & M_FASTFWD_OURS) { 53389857Sobrien if (m->m_pkthdr.rcvif == NULL) 53489857Sobrien m->m_pkthdr.rcvif = V_loif; 53589857Sobrien if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 53660484Sobrien m->m_pkthdr.csum_flags |= 53789857Sobrien CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 53889857Sobrien m->m_pkthdr.csum_data = 0xffff; 53989857Sobrien } 54089857Sobrien#ifdef SCTP 54189857Sobrien if (m->m_pkthdr.csum_flags & CSUM_SCTP) 54289857Sobrien m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 54389857Sobrien#endif 54489857Sobrien m->m_pkthdr.csum_flags |= 545218822Sdim CSUM_IP_CHECKED | CSUM_IP_VALID; 54689857Sobrien 54789857Sobrien error = netisr_queue(NETISR_IP, m); 54889857Sobrien goto done; 54989857Sobrien } 550218822Sdim /* Or forward to some other address? */ 55189857Sobrien fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 55289857Sobrien if (fwd_tag) { 55389857Sobrien dst = (struct sockaddr_in *)&ro->ro_dst; 55489857Sobrien bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 55589857Sobrien m->m_flags |= M_SKIP_FIREWALL; 55689857Sobrien m_tag_delete(m, fwd_tag); 55789857Sobrien goto again; 55860484Sobrien } 559218822Sdim#endif /* IPFIREWALL_FORWARD */ 560218822Sdim 561218822Sdimpassout: 56260484Sobrien /* 127/8 must not appear on wire - RFC1122. */ 56360484Sobrien if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 56460484Sobrien (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 56560484Sobrien if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 56660484Sobrien IPSTAT_INC(ips_badaddr); 56789857Sobrien error = EADDRNOTAVAIL; 56860484Sobrien goto bad; 56977298Sobrien } 57077298Sobrien } 57177298Sobrien 57260484Sobrien m->m_pkthdr.csum_flags |= CSUM_IP; 57360484Sobrien sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 57460484Sobrien if (sw_csum & CSUM_DELAY_DATA) { 57560484Sobrien in_delayed_cksum(m); 57660484Sobrien sw_csum &= ~CSUM_DELAY_DATA; 57760484Sobrien } 57860484Sobrien#ifdef SCTP 579130561Sobrien if (sw_csum & CSUM_SCTP) { 58089857Sobrien sctp_delayed_cksum(m); 58189857Sobrien sw_csum &= ~CSUM_SCTP; 58260484Sobrien } 58360484Sobrien#endif 58460484Sobrien m->m_pkthdr.csum_flags &= ifp->if_hwassist; 58577298Sobrien 58660484Sobrien /* 58760484Sobrien * If small enough for interface, or the interface will take 58860484Sobrien * care of the fragmentation for us, we can just send directly. 58989857Sobrien */ 59089857Sobrien if (ip->ip_len <= mtu || 59189857Sobrien (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || 59289857Sobrien ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { 59389857Sobrien ip->ip_len = htons(ip->ip_len); 594130561Sobrien ip->ip_off = htons(ip->ip_off); 595104834Sobrien ip->ip_sum = 0; 596104834Sobrien if (sw_csum & CSUM_DELAY_IP) 597104834Sobrien ip->ip_sum = in_cksum(m, hlen); 598104834Sobrien 599104834Sobrien /* 600104834Sobrien * Record statistics for this interface address. 601104834Sobrien * With CSUM_TSO the byte/packet count will be slightly 602104834Sobrien * incorrect because we count the IP+TCP headers only 603104834Sobrien * once instead of for every generated packet. 604104834Sobrien */ 605104834Sobrien if (!(flags & IP_FORWARDING) && ia) { 606104834Sobrien if (m->m_pkthdr.csum_flags & CSUM_TSO) 607104834Sobrien ia->ia_ifa.if_opackets += 608104834Sobrien m->m_pkthdr.len / m->m_pkthdr.tso_segsz; 609130561Sobrien else 61060484Sobrien ia->ia_ifa.if_opackets++; 61160484Sobrien ia->ia_ifa.if_obytes += m->m_pkthdr.len; 61289857Sobrien } 61389857Sobrien#ifdef MBUF_STRESS_TEST 61460484Sobrien if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 61589857Sobrien m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 61660484Sobrien#endif 61760484Sobrien /* 61889857Sobrien * Reset layer specific mbuf flags 619104834Sobrien * to avoid confusing lower layers. 620104834Sobrien */ 621104834Sobrien m->m_flags &= ~(M_PROTOFLAGS); 622104834Sobrien error = (*ifp->if_output)(ifp, m, 623104834Sobrien (struct sockaddr *)dst, ro); 624104834Sobrien goto done; 625104834Sobrien } 62689857Sobrien 62789857Sobrien /* Balk when DF bit is set or the interface didn't support TSO. */ 62889857Sobrien if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 62989857Sobrien error = EMSGSIZE; 63060484Sobrien IPSTAT_INC(ips_cantfrag); 63160484Sobrien goto bad; 63260484Sobrien } 633104834Sobrien 634104834Sobrien /* 635104834Sobrien * Too large for interface; fragment if possible. If successful, 636104834Sobrien * on return, m will point to a list of packets to be sent. 63789857Sobrien */ 63889857Sobrien error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum); 63989857Sobrien if (error) 64089857Sobrien goto bad; 64189857Sobrien for (; m; m = m0) { 64289857Sobrien m0 = m->m_nextpkt; 64389857Sobrien m->m_nextpkt = 0; 64489857Sobrien if (error == 0) { 64589857Sobrien /* Record statistics for this interface address. */ 64689857Sobrien if (ia != NULL) { 64789857Sobrien ia->ia_ifa.if_opackets++; 64889857Sobrien ia->ia_ifa.if_obytes += m->m_pkthdr.len; 64989857Sobrien } 65089857Sobrien /* 65189857Sobrien * Reset layer specific mbuf flags 65260484Sobrien * to avoid confusing upper layers. 65389857Sobrien */ 65489857Sobrien m->m_flags &= ~(M_PROTOFLAGS); 65560484Sobrien 656104834Sobrien error = (*ifp->if_output)(ifp, m, 657104834Sobrien (struct sockaddr *)dst, ro); 658104834Sobrien } else 659104834Sobrien m_freem(m); 660104834Sobrien } 661104834Sobrien 662104834Sobrien if (error == 0) 663104834Sobrien IPSTAT_INC(ips_fragmented); 664104834Sobrien 665104834Sobriendone: 666104834Sobrien if (ro == &iproute && ro->ro_rt && !nortfree) { 667104834Sobrien RTFREE(ro->ro_rt); 668104834Sobrien } 669104834Sobrien if (ia != NULL) 670104834Sobrien ifa_free(&ia->ia_ifa); 671104834Sobrien return (error); 672104834Sobrienbad: 673104834Sobrien m_freem(m); 674104834Sobrien goto done; 675218822Sdim} 676104834Sobrien 67789857Sobrien/* 678104834Sobrien * Create a chain of fragments which fit the given mtu. m_frag points to the 679104834Sobrien * mbuf to be fragmented; on return it points to the chain with the fragments. 68089857Sobrien * Return 0 if no error. If error, m_frag may contain a partially built 68160484Sobrien * chain of fragments that should be freed by the caller. 68289857Sobrien * 68389857Sobrien * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 68489857Sobrien * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 68560484Sobrien */ 68689857Sobrienint 68789857Sobrienip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 68860484Sobrien u_long if_hwassist_flags, int sw_csum) 68989857Sobrien{ 69089857Sobrien int error = 0; 69160484Sobrien int hlen = ip->ip_hl << 2; 69289857Sobrien int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 69389857Sobrien int off; 69489857Sobrien struct mbuf *m0 = *m_frag; /* the original packet */ 69589857Sobrien int firstlen; 69689857Sobrien struct mbuf **mnext; 69789857Sobrien int nfrags; 69889857Sobrien 69989857Sobrien if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 70089857Sobrien IPSTAT_INC(ips_cantfrag); 70189857Sobrien return EMSGSIZE; 70260484Sobrien } 70389857Sobrien 70489857Sobrien /* 70589857Sobrien * Must be able to put at least 8 bytes per fragment. 70689857Sobrien */ 70760484Sobrien if (len < 8) 70860484Sobrien return EMSGSIZE; 70960484Sobrien 710218822Sdim /* 711218822Sdim * If the interface will not calculate checksums on 712218822Sdim * fragmented packets, then do it here. 713218822Sdim */ 714218822Sdim if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 715218822Sdim (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 716218822Sdim in_delayed_cksum(m0); 717218822Sdim m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 718218822Sdim } 719218822Sdim#ifdef SCTP 720218822Sdim if (m0->m_pkthdr.csum_flags & CSUM_SCTP && 72160484Sobrien (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 72260484Sobrien sctp_delayed_cksum(m0); 72360484Sobrien m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 724218822Sdim } 72560484Sobrien#endif 72660484Sobrien if (len > PAGE_SIZE) { 72760484Sobrien /* 72860484Sobrien * Fragment large datagrams such that each segment 72960484Sobrien * contains a multiple of PAGE_SIZE amount of data, 73060484Sobrien * plus headers. This enables a receiver to perform 73160484Sobrien * page-flipping zero-copy optimizations. 73260484Sobrien * 733130561Sobrien * XXX When does this help given that sender and receiver 73460484Sobrien * could have different page sizes, and also mtu could 73560484Sobrien * be less than the receiver's page size ? 73660484Sobrien */ 73760484Sobrien int newlen; 73860484Sobrien struct mbuf *m; 73960484Sobrien 74060484Sobrien for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 74160484Sobrien off += m->m_len; 74260484Sobrien 74360484Sobrien /* 74460484Sobrien * firstlen (off - hlen) must be aligned on an 74560484Sobrien * 8-byte boundary 74660484Sobrien */ 74760484Sobrien if (off < hlen) 74860484Sobrien goto smart_frag_failure; 74960484Sobrien off = ((off - hlen) & ~7) + hlen; 75060484Sobrien newlen = (~PAGE_MASK) & mtu; 75160484Sobrien if ((newlen + sizeof (struct ip)) > mtu) { 75260484Sobrien /* we failed, go back the default */ 75360484Sobriensmart_frag_failure: 75460484Sobrien newlen = len; 75560484Sobrien off = hlen + len; 75660484Sobrien } 75760484Sobrien len = newlen; 75860484Sobrien 75960484Sobrien } else { 76060484Sobrien off = hlen + len; 76160484Sobrien } 76260484Sobrien 76360484Sobrien firstlen = off - hlen; 76460484Sobrien mnext = &m0->m_nextpkt; /* pointer to next packet */ 76560484Sobrien 76660484Sobrien /* 76760484Sobrien * Loop through length of segment after first fragment, 76860484Sobrien * make new header and copy data of each part and link onto chain. 76960484Sobrien * Here, m0 is the original packet, m is the fragment being created. 77091041Sobrien * The fragments are linked off the m_nextpkt of the original 77160484Sobrien * packet, which after processing serves as the first fragment. 77260484Sobrien */ 77360484Sobrien for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 774104834Sobrien struct ip *mhip; /* ip header on the fragment */ 775104834Sobrien struct mbuf *m; 776218822Sdim int mhlen = sizeof (struct ip); 77760484Sobrien 77860484Sobrien MGETHDR(m, M_DONTWAIT, MT_DATA); 77960484Sobrien if (m == NULL) { 78089857Sobrien error = ENOBUFS; 78191041Sobrien IPSTAT_INC(ips_odropped); 782104834Sobrien goto done; 783104834Sobrien } 784104834Sobrien m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 785104834Sobrien /* 786104834Sobrien * In the first mbuf, leave room for the link header, then 787104834Sobrien * copy the original IP header including options. The payload 788104834Sobrien * goes into an additional mbuf chain returned by m_copym(). 789104834Sobrien */ 790104834Sobrien m->m_data += max_linkhdr; 791104834Sobrien mhip = mtod(m, struct ip *); 79260484Sobrien *mhip = *ip; 79360484Sobrien if (hlen > sizeof (struct ip)) { 79460484Sobrien mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 79560484Sobrien mhip->ip_v = IPVERSION; 79660484Sobrien mhip->ip_hl = mhlen >> 2; 79777298Sobrien } 79860484Sobrien m->m_len = mhlen; 79960484Sobrien /* XXX do we need to add ip->ip_off below ? */ 80060484Sobrien mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 80160484Sobrien if (off + len >= ip->ip_len) { /* last fragment */ 80260484Sobrien len = ip->ip_len - off; 80360484Sobrien m->m_flags |= M_LASTFRAG; 80433965Sjdp } else 80533965Sjdp mhip->ip_off |= IP_MF; 80633965Sjdp mhip->ip_len = htons((u_short)(len + mhlen)); 807218822Sdim m->m_next = m_copym(m0, off, len, M_DONTWAIT); 808218822Sdim if (m->m_next == NULL) { /* copy failed */ 809218822Sdim m_free(m); 81033965Sjdp error = ENOBUFS; /* ??? */ 81133965Sjdp IPSTAT_INC(ips_odropped); 81233965Sjdp goto done; 81333965Sjdp } 81460484Sobrien m->m_pkthdr.len = mhlen + len; 81560484Sobrien m->m_pkthdr.rcvif = NULL; 81660484Sobrien#ifdef MAC 817130561Sobrien mac_netinet_fragment(m0, m); 81860484Sobrien#endif 81960484Sobrien m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 82060484Sobrien mhip->ip_off = htons(mhip->ip_off); 82160484Sobrien mhip->ip_sum = 0; 82260484Sobrien if (sw_csum & CSUM_DELAY_IP) 823130561Sobrien mhip->ip_sum = in_cksum(m, mhlen); 82433965Sjdp *mnext = m; 82533965Sjdp mnext = &m->m_nextpkt; 82633965Sjdp } 827130561Sobrien IPSTAT_ADD(ips_ofragments, nfrags); 82833965Sjdp 82933965Sjdp /* set first marker for fragment chain */ 83033965Sjdp m0->m_flags |= M_FIRSTFRAG | M_FRAG; 83189857Sobrien m0->m_pkthdr.csum_data = nfrags; 83289857Sobrien 83389857Sobrien /* 83489857Sobrien * Update first fragment by trimming what's been copied out 83589857Sobrien * and updating header. 83633965Sjdp */ 83733965Sjdp m_adj(m0, hlen + firstlen - ip->ip_len); 83833965Sjdp m0->m_pkthdr.len = hlen + firstlen; 83933965Sjdp ip->ip_len = htons((u_short)m0->m_pkthdr.len); 84033965Sjdp ip->ip_off |= IP_MF; 84133965Sjdp ip->ip_off = htons(ip->ip_off); 84233965Sjdp ip->ip_sum = 0; 84333965Sjdp if (sw_csum & CSUM_DELAY_IP) 84460484Sobrien ip->ip_sum = in_cksum(m0, hlen); 84560484Sobrien 84660484Sobriendone: 84760484Sobrien *m_frag = m0; 84860484Sobrien return error; 84960484Sobrien} 850130561Sobrien 851130561Sobrienvoid 85260484Sobrienin_delayed_cksum(struct mbuf *m) 85360484Sobrien{ 85460484Sobrien struct ip *ip; 85560484Sobrien u_short csum, offset; 85660484Sobrien 85760484Sobrien ip = mtod(m, struct ip *); 85860484Sobrien offset = ip->ip_hl << 2 ; 85989857Sobrien csum = in_cksum_skip(m, ip->ip_len, offset); 860104834Sobrien if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 86189857Sobrien csum = 0xffff; 86289857Sobrien offset += m->m_pkthdr.csum_data; /* checksum offset */ 86360484Sobrien 864130561Sobrien if (offset + sizeof(u_short) > m->m_len) { 86589857Sobrien printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 86660484Sobrien m->m_len, offset, ip->ip_p); 86760484Sobrien /* 86860484Sobrien * XXX 86960484Sobrien * this shouldn't happen, but if it does, the 87060484Sobrien * correct behavior may be to insert the checksum 87160484Sobrien * in the appropriate next mbuf in the chain. 87289857Sobrien */ 87389857Sobrien return; 87489857Sobrien } 87589857Sobrien *(u_short *)(m->m_data + offset) = csum; 87689857Sobrien} 87760484Sobrien 87889857Sobrien/* 87960484Sobrien * IP socket option processing. 88060484Sobrien */ 88160484Sobrienint 88289857Sobrienip_ctloutput(struct socket *so, struct sockopt *sopt) 883104834Sobrien{ 88489857Sobrien struct inpcb *inp = sotoinpcb(so); 88560484Sobrien int error, optval; 88689857Sobrien 88789857Sobrien error = optval = 0; 88889857Sobrien if (sopt->sopt_level != IPPROTO_IP) { 88989857Sobrien if ((sopt->sopt_level == SOL_SOCKET) && 89089857Sobrien (sopt->sopt_name == SO_SETFIB)) { 89189857Sobrien inp->inp_inc.inc_fibnum = so->so_fibnum; 89289857Sobrien return (0); 89360484Sobrien } 89489857Sobrien return (EINVAL); 89560484Sobrien } 89689857Sobrien 89789857Sobrien switch (sopt->sopt_dir) { 89889857Sobrien case SOPT_SET: 89989857Sobrien switch (sopt->sopt_name) { 90089857Sobrien case IP_OPTIONS: 90160484Sobrien#ifdef notyet 90289857Sobrien case IP_RETOPTS: 90360484Sobrien#endif 904130561Sobrien { 90589857Sobrien struct mbuf *m; 90689857Sobrien if (sopt->sopt_valsize > MLEN) { 90789857Sobrien error = EMSGSIZE; 90889857Sobrien break; 90989857Sobrien } 91089857Sobrien MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 91189857Sobrien if (m == NULL) { 91260484Sobrien error = ENOBUFS; 91360484Sobrien break; 91460484Sobrien } 91560484Sobrien m->m_len = sopt->sopt_valsize; 91660484Sobrien error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 91760484Sobrien m->m_len); 91889857Sobrien if (error) { 91960484Sobrien m_free(m); 92033965Sjdp break; 92133965Sjdp } 92233965Sjdp INP_WLOCK(inp); 92333965Sjdp error = ip_pcbopts(inp, sopt->sopt_name, m); 92433965Sjdp INP_WUNLOCK(inp); 92533965Sjdp return (error); 92633965Sjdp } 92733965Sjdp 92833965Sjdp case IP_BINDANY: 92933965Sjdp if (sopt->sopt_td != NULL) { 93033965Sjdp error = priv_check(sopt->sopt_td, 93177298Sobrien PRIV_NETINET_BINDANY); 93233965Sjdp if (error) 93333965Sjdp break; 93433965Sjdp } 93533965Sjdp /* FALLTHROUGH */ 93633965Sjdp case IP_TOS: 93733965Sjdp case IP_TTL: 93833965Sjdp case IP_MINTTL: 93933965Sjdp case IP_RECVOPTS: 94033965Sjdp case IP_RECVRETOPTS: 94133965Sjdp case IP_RECVDSTADDR: 94233965Sjdp case IP_RECVTTL: 943130561Sobrien case IP_RECVIF: 94433965Sjdp case IP_FAITH: 94533965Sjdp case IP_ONESBCAST: 94633965Sjdp case IP_DONTFRAG: 94733965Sjdp error = sooptcopyin(sopt, &optval, sizeof optval, 94833965Sjdp sizeof optval); 94933965Sjdp if (error) 95060484Sobrien break; 95133965Sjdp 95233965Sjdp switch (sopt->sopt_name) { 95333965Sjdp case IP_TOS: 95433965Sjdp inp->inp_ip_tos = optval; 95533965Sjdp break; 95633965Sjdp 95733965Sjdp case IP_TTL: 95833965Sjdp inp->inp_ip_ttl = optval; 95933965Sjdp break; 96033965Sjdp 96133965Sjdp case IP_MINTTL: 96233965Sjdp if (optval >= 0 && optval <= MAXTTL) 96333965Sjdp inp->inp_ip_minttl = optval; 96433965Sjdp else 96533965Sjdp error = EINVAL; 96660484Sobrien break; 96760484Sobrien 96860484Sobrien#define OPTSET(bit) do { \ 96960484Sobrien INP_WLOCK(inp); \ 97089857Sobrien if (optval) \ 97189857Sobrien inp->inp_flags |= bit; \ 97289857Sobrien else \ 97360484Sobrien inp->inp_flags &= ~bit; \ 97433965Sjdp INP_WUNLOCK(inp); \ 97533965Sjdp} while (0) 97633965Sjdp 97789857Sobrien case IP_RECVOPTS: 97889857Sobrien OPTSET(INP_RECVOPTS); 97989857Sobrien break; 98033965Sjdp 98160484Sobrien case IP_RECVRETOPTS: 98260484Sobrien OPTSET(INP_RECVRETOPTS); 983130561Sobrien break; 98489857Sobrien 98589857Sobrien case IP_RECVDSTADDR: 98689857Sobrien OPTSET(INP_RECVDSTADDR); 98760484Sobrien break; 98860484Sobrien 98933965Sjdp case IP_RECVTTL: 99033965Sjdp OPTSET(INP_RECVTTL); 99133965Sjdp break; 99233965Sjdp 99333965Sjdp case IP_RECVIF: 99433965Sjdp OPTSET(INP_RECVIF); 995218822Sdim break; 996218822Sdim 997218822Sdim case IP_FAITH: 998218822Sdim OPTSET(INP_FAITH); 99933965Sjdp break; 100033965Sjdp 100133965Sjdp case IP_ONESBCAST: 100233965Sjdp OPTSET(INP_ONESBCAST); 100333965Sjdp break; 100433965Sjdp case IP_DONTFRAG: 100533965Sjdp OPTSET(INP_DONTFRAG); 100633965Sjdp break; 100733965Sjdp case IP_BINDANY: 100833965Sjdp OPTSET(INP_BINDANY); 1009130561Sobrien break; 101033965Sjdp } 101133965Sjdp break; 101233965Sjdp#undef OPTSET 101333965Sjdp 101433965Sjdp /* 101533965Sjdp * Multicast socket options are processed by the in_mcast 101633965Sjdp * module. 101733965Sjdp */ 101833965Sjdp case IP_MULTICAST_IF: 1019130561Sobrien case IP_MULTICAST_VIF: 102033965Sjdp case IP_MULTICAST_TTL: 102133965Sjdp case IP_MULTICAST_LOOP: 102233965Sjdp case IP_ADD_MEMBERSHIP: 1023130561Sobrien case IP_DROP_MEMBERSHIP: 102433965Sjdp case IP_ADD_SOURCE_MEMBERSHIP: 102533965Sjdp case IP_DROP_SOURCE_MEMBERSHIP: 102633965Sjdp case IP_BLOCK_SOURCE: 102733965Sjdp case IP_UNBLOCK_SOURCE: 102833965Sjdp case IP_MSFILTER: 102933965Sjdp case MCAST_JOIN_GROUP: 103033965Sjdp case MCAST_LEAVE_GROUP: 1031130561Sobrien case MCAST_JOIN_SOURCE_GROUP: 103233965Sjdp case MCAST_LEAVE_SOURCE_GROUP: 103333965Sjdp case MCAST_BLOCK_SOURCE: 103433965Sjdp case MCAST_UNBLOCK_SOURCE: 103533965Sjdp error = inp_setmoptions(inp, sopt); 103677298Sobrien break; 103733965Sjdp 103833965Sjdp case IP_PORTRANGE: 103933965Sjdp error = sooptcopyin(sopt, &optval, sizeof optval, 104033965Sjdp sizeof optval); 104177298Sobrien if (error) 104233965Sjdp break; 104333965Sjdp 104477298Sobrien INP_WLOCK(inp); 104533965Sjdp switch (optval) { 104677298Sobrien case IP_PORTRANGE_DEFAULT: 104733965Sjdp inp->inp_flags &= ~(INP_LOWPORT); 104833965Sjdp inp->inp_flags &= ~(INP_HIGHPORT); 104977298Sobrien break; 105033965Sjdp 105177298Sobrien case IP_PORTRANGE_HIGH: 105233965Sjdp inp->inp_flags &= ~(INP_LOWPORT); 105333965Sjdp inp->inp_flags |= INP_HIGHPORT; 105433965Sjdp break; 105533965Sjdp 105633965Sjdp case IP_PORTRANGE_LOW: 105733965Sjdp inp->inp_flags &= ~(INP_HIGHPORT); 105833965Sjdp inp->inp_flags |= INP_LOWPORT; 105933965Sjdp break; 106033965Sjdp 106133965Sjdp default: 106233965Sjdp error = EINVAL; 106333965Sjdp break; 106433965Sjdp } 106533965Sjdp INP_WUNLOCK(inp); 106660484Sobrien break; 106760484Sobrien 106860484Sobrien#ifdef IPSEC 106933965Sjdp case IP_IPSEC_POLICY: 107033965Sjdp { 107133965Sjdp caddr_t req; 107233965Sjdp struct mbuf *m; 1073130561Sobrien 107477298Sobrien if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 107533965Sjdp break; 107633965Sjdp if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 107733965Sjdp break; 107833965Sjdp req = mtod(m, caddr_t); 1079130561Sobrien error = ipsec_set_policy(inp, sopt->sopt_name, req, 108033965Sjdp m->m_len, (sopt->sopt_td != NULL) ? 108133965Sjdp sopt->sopt_td->td_ucred : NULL); 108233965Sjdp m_freem(m); 108333965Sjdp break; 108433965Sjdp } 108533965Sjdp#endif /* IPSEC */ 108633965Sjdp 108733965Sjdp default: 1088130561Sobrien error = ENOPROTOOPT; 108977298Sobrien break; 109033965Sjdp } 109133965Sjdp break; 109277298Sobrien 109333965Sjdp case SOPT_GET: 109433965Sjdp switch (sopt->sopt_name) { 109533965Sjdp case IP_OPTIONS: 109633965Sjdp case IP_RETOPTS: 109733965Sjdp if (inp->inp_options) 109833965Sjdp error = sooptcopyout(sopt, 109933965Sjdp mtod(inp->inp_options, 110033965Sjdp char *), 110133965Sjdp inp->inp_options->m_len); 110233965Sjdp else 110333965Sjdp sopt->sopt_valsize = 0; 1104218822Sdim break; 1105218822Sdim 1106218822Sdim case IP_TOS: 1107218822Sdim case IP_TTL: 110833965Sjdp case IP_MINTTL: 110933965Sjdp case IP_RECVOPTS: 111033965Sjdp case IP_RECVRETOPTS: 1111218822Sdim case IP_RECVDSTADDR: 111233965Sjdp case IP_RECVTTL: 111333965Sjdp case IP_RECVIF: 111433965Sjdp case IP_PORTRANGE: 111533965Sjdp case IP_FAITH: 111633965Sjdp case IP_ONESBCAST: 111733965Sjdp case IP_DONTFRAG: 111833965Sjdp switch (sopt->sopt_name) { 111989857Sobrien 112033965Sjdp case IP_TOS: 112133965Sjdp optval = inp->inp_ip_tos; 112233965Sjdp break; 112360484Sobrien 112433965Sjdp case IP_TTL: 112560484Sobrien optval = inp->inp_ip_ttl; 112677298Sobrien break; 112733965Sjdp 112833965Sjdp case IP_MINTTL: 112960484Sobrien optval = inp->inp_ip_minttl; 113033965Sjdp break; 113133965Sjdp 1132218822Sdim#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1133218822Sdim 113433965Sjdp case IP_RECVOPTS: 113533965Sjdp optval = OPTBIT(INP_RECVOPTS); 1136218822Sdim break; 1137218822Sdim 1138218822Sdim case IP_RECVRETOPTS: 113933965Sjdp optval = OPTBIT(INP_RECVRETOPTS); 114033965Sjdp break; 114133965Sjdp 1142218822Sdim case IP_RECVDSTADDR: 114333965Sjdp optval = OPTBIT(INP_RECVDSTADDR); 1144218822Sdim break; 1145218822Sdim 1146218822Sdim case IP_RECVTTL: 1147218822Sdim optval = OPTBIT(INP_RECVTTL); 1148218822Sdim break; 1149218822Sdim 115033965Sjdp case IP_RECVIF: 1151218822Sdim optval = OPTBIT(INP_RECVIF); 115233965Sjdp break; 115333965Sjdp 115433965Sjdp case IP_PORTRANGE: 115533965Sjdp if (inp->inp_flags & INP_HIGHPORT) 115633965Sjdp optval = IP_PORTRANGE_HIGH; 115733965Sjdp else if (inp->inp_flags & INP_LOWPORT) 115833965Sjdp optval = IP_PORTRANGE_LOW; 115933965Sjdp else 116033965Sjdp optval = 0; 116133965Sjdp break; 116233965Sjdp 116333965Sjdp case IP_FAITH: 116433965Sjdp optval = OPTBIT(INP_FAITH); 116533965Sjdp break; 116633965Sjdp 116733965Sjdp case IP_ONESBCAST: 116860484Sobrien optval = OPTBIT(INP_ONESBCAST); 1169130561Sobrien break; 117060484Sobrien case IP_DONTFRAG: 117189857Sobrien optval = OPTBIT(INP_DONTFRAG); 117289857Sobrien break; 117389857Sobrien } 117433965Sjdp error = sooptcopyout(sopt, &optval, sizeof optval); 117589857Sobrien break; 1176218822Sdim 1177218822Sdim /* 1178218822Sdim * Multicast socket options are processed by the in_mcast 1179218822Sdim * module. 118033965Sjdp */ 118189857Sobrien case IP_MULTICAST_IF: 118233965Sjdp case IP_MULTICAST_VIF: 118333965Sjdp case IP_MULTICAST_TTL: 118433965Sjdp case IP_MULTICAST_LOOP: 118533965Sjdp case IP_MSFILTER: 118633965Sjdp error = inp_getmoptions(inp, sopt); 118733965Sjdp break; 118833965Sjdp 118933965Sjdp#ifdef IPSEC 119033965Sjdp case IP_IPSEC_POLICY: 119133965Sjdp { 119233965Sjdp struct mbuf *m = NULL; 119333965Sjdp caddr_t req = NULL; 1194130561Sobrien size_t len = 0; 119533965Sjdp 119633965Sjdp if (m != 0) { 119733965Sjdp req = mtod(m, caddr_t); 119833965Sjdp len = m->m_len; 119933965Sjdp } 120033965Sjdp error = ipsec_get_policy(sotoinpcb(so), req, len, &m); 120133965Sjdp if (error == 0) 120233965Sjdp error = soopt_mcopyout(sopt, m); /* XXX */ 120333965Sjdp if (error == 0) 120433965Sjdp m_freem(m); 1205218822Sdim break; 1206218822Sdim } 120733965Sjdp#endif /* IPSEC */ 120833965Sjdp 120960484Sobrien default: 121033965Sjdp error = ENOPROTOOPT; 1211218822Sdim break; 121233965Sjdp } 121333965Sjdp break; 121433965Sjdp } 121533965Sjdp return (error); 121660484Sobrien} 121733965Sjdp 121860484Sobrien/* 121933965Sjdp * Routine called from ip_output() to loop back a copy of an IP multicast 122033965Sjdp * packet to the input queue of a specified interface. Note that this 122133965Sjdp * calls the output routine of the loopback "driver", but with an interface 122233965Sjdp * pointer that might NOT be a loopback interface -- evil, but easier than 122333965Sjdp * replicating that code here. 1224218822Sdim */ 122533965Sjdpstatic void 122633965Sjdpip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, 122733965Sjdp int hlen) 122833965Sjdp{ 122933965Sjdp register struct ip *ip; 123033965Sjdp struct mbuf *copym; 123133965Sjdp 123289857Sobrien /* 123333965Sjdp * Make a deep copy of the packet because we're going to 123433965Sjdp * modify the pack in order to generate checksums. 1235218822Sdim */ 123633965Sjdp copym = m_dup(m, M_DONTWAIT); 123733965Sjdp if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 123833965Sjdp copym = m_pullup(copym, hlen); 123933965Sjdp if (copym != NULL) { 124060484Sobrien /* If needed, compute the checksum and mark it as valid. */ 124133965Sjdp if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 124260484Sobrien in_delayed_cksum(copym); 124333965Sjdp copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 124433965Sjdp copym->m_pkthdr.csum_flags |= 124533965Sjdp CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 124633965Sjdp copym->m_pkthdr.csum_data = 0xffff; 124733965Sjdp } 124833965Sjdp /* 124938889Sjdp * We don't bother to fragment if the IP length is greater 125038889Sjdp * than the interface's MTU. Can this possibly matter? 125138889Sjdp */ 125277298Sobrien ip = mtod(copym, struct ip *); 125377298Sobrien ip->ip_len = htons(ip->ip_len); 125477298Sobrien ip->ip_off = htons(ip->ip_off); 125533965Sjdp ip->ip_sum = 0; 125633965Sjdp ip->ip_sum = in_cksum(copym, hlen); 125733965Sjdp#if 1 /* XXX */ 125833965Sjdp if (dst->sin_family != AF_INET) { 125933965Sjdp printf("ip_mloopback: bad address family %d\n", 1260218822Sdim dst->sin_family); 126133965Sjdp dst->sin_family = AF_INET; 126233965Sjdp } 126333965Sjdp#endif 126433965Sjdp if_simloop(ifp, copym, dst->sin_family, 0); 126533965Sjdp } 126633965Sjdp} 126789857Sobrien