ip_output.c revision 193217
1226031Sstas/*- 2226031Sstas * Copyright (c) 1982, 1986, 1988, 1990, 1993 3226031Sstas * The Regents of the University of California. All rights reserved. 4226031Sstas * 5226031Sstas * Redistribution and use in source and binary forms, with or without 6226031Sstas * modification, are permitted provided that the following conditions 7226031Sstas * are met: 8226031Sstas * 1. Redistributions of source code must retain the above copyright 9226031Sstas * notice, this list of conditions and the following disclaimer. 10226031Sstas * 2. Redistributions in binary form must reproduce the above copyright 11226031Sstas * notice, this list of conditions and the following disclaimer in the 12226031Sstas * documentation and/or other materials provided with the distribution. 13226031Sstas * 4. Neither the name of the University nor the names of its contributors 14226031Sstas * may be used to endorse or promote products derived from this software 15226031Sstas * without specific prior written permission. 16226031Sstas * 17226031Sstas * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18226031Sstas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19226031Sstas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20226031Sstas * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21226031Sstas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22226031Sstas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23226031Sstas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24226031Sstas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25226031Sstas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26226031Sstas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27226031Sstas * SUCH DAMAGE. 28226031Sstas * 29226031Sstas * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30226031Sstas */ 31226031Sstas 32226031Sstas#include <sys/cdefs.h> 33226031Sstas__FBSDID("$FreeBSD: head/sys/netinet/ip_output.c 193217 2009-06-01 10:30:00Z pjd $"); 34226031Sstas 35226031Sstas#include "opt_ipfw.h" 36226031Sstas#include "opt_ipsec.h" 37226031Sstas#include "opt_route.h" 38226031Sstas#include "opt_mac.h" 39226031Sstas#include "opt_mbuf_stress_test.h" 40226031Sstas#include "opt_mpath.h" 41226031Sstas#include "opt_sctp.h" 42226031Sstas 43226031Sstas#include <sys/param.h> 44226031Sstas#include <sys/systm.h> 45226031Sstas#include <sys/kernel.h> 46226031Sstas#include <sys/malloc.h> 47226031Sstas#include <sys/mbuf.h> 48226031Sstas#include <sys/priv.h> 49226031Sstas#include <sys/proc.h> 50226031Sstas#include <sys/protosw.h> 51226031Sstas#include <sys/socket.h> 52226031Sstas#include <sys/socketvar.h> 53226031Sstas#include <sys/sysctl.h> 54226031Sstas#include <sys/ucred.h> 55226031Sstas#include <sys/vimage.h> 56226031Sstas 57226031Sstas#include <net/if.h> 58226031Sstas#include <net/netisr.h> 59226031Sstas#include <net/pfil.h> 60226031Sstas#include <net/route.h> 61226031Sstas#include <net/flowtable.h> 62226031Sstas#ifdef RADIX_MPATH 63226031Sstas#include <net/radix_mpath.h> 64226031Sstas#endif 65226031Sstas#include <net/vnet.h> 66226031Sstas 67226031Sstas#include <netinet/in.h> 68226031Sstas#include <netinet/in_systm.h> 69226031Sstas#include <netinet/ip.h> 70226031Sstas#include <netinet/in_pcb.h> 71226031Sstas#include <netinet/in_var.h> 72226031Sstas#include <netinet/ip_var.h> 73226031Sstas#include <netinet/ip_options.h> 74226031Sstas#include <netinet/vinet.h> 75226031Sstas#ifdef SCTP 76226031Sstas#include <netinet/sctp.h> 77226031Sstas#include <netinet/sctp_crc32.h> 78226031Sstas#endif 79226031Sstas 80226031Sstas#ifdef IPSEC 81226031Sstas#include <netinet/ip_ipsec.h> 82226031Sstas#include <netipsec/ipsec.h> 83226031Sstas#endif /* IPSEC*/ 84226031Sstas 85226031Sstas#include <machine/in_cksum.h> 86226031Sstas 87226031Sstas#include <security/mac/mac_framework.h> 88226031Sstas 89226031Sstas#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 90226031Sstas x, (ntohl(a.s_addr)>>24)&0xFF,\ 91226031Sstas (ntohl(a.s_addr)>>16)&0xFF,\ 92226031Sstas (ntohl(a.s_addr)>>8)&0xFF,\ 93226031Sstas (ntohl(a.s_addr))&0xFF, y); 94226031Sstas 95226031Sstas#ifdef VIMAGE_GLOBALS 96226031Sstasu_short ip_id; 97226031Sstas#endif 98226031Sstas 99226031Sstas#ifdef MBUF_STRESS_TEST 100226031Sstasint mbuf_frag_size = 0; 101226031SstasSYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 102226031Sstas &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 103226031Sstas#endif 104226031Sstas 105226031Sstasstatic void ip_mloopback 106226031Sstas (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 107226031Sstas 108226031Sstas 109226031Sstasextern int in_mcast_loop; 110226031Sstasextern struct protosw inetsw[]; 111226031Sstas 112226031Sstas/* 113226031Sstas * IP output. The packet in mbuf chain m contains a skeletal IP 114226031Sstas * header (with len, off, ttl, proto, tos, src, dst). 115226031Sstas * The mbuf chain containing the packet will be freed. 116226031Sstas * The mbuf opt, if present, will not be freed. 117226031Sstas * In the IP forwarding case, the packet will arrive with options already 118226031Sstas * inserted, so must have a NULL opt pointer. 119226031Sstas */ 120226031Sstasint 121226031Sstasip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 122226031Sstas struct ip_moptions *imo, struct inpcb *inp) 123226031Sstas{ 124226031Sstas INIT_VNET_NET(curvnet); 125226031Sstas INIT_VNET_INET(curvnet); 126226031Sstas struct ip *ip; 127226031Sstas struct ifnet *ifp = NULL; /* keep compiler happy */ 128226031Sstas struct mbuf *m0; 129226031Sstas int hlen = sizeof (struct ip); 130226031Sstas int mtu; 131226031Sstas int len, error = 0; 132226031Sstas int nortfree = 0; 133226031Sstas struct sockaddr_in *dst = NULL; /* keep compiler happy */ 134226031Sstas struct in_ifaddr *ia = NULL; 135226031Sstas int isbroadcast, sw_csum; 136226031Sstas struct route iproute; 137226031Sstas struct in_addr odst; 138226031Sstas#ifdef IPFIREWALL_FORWARD 139226031Sstas struct m_tag *fwd_tag = NULL; 140226031Sstas#endif 141226031Sstas#ifdef IPSEC 142226031Sstas int no_route_but_check_spd = 0; 143226031Sstas#endif 144226031Sstas M_ASSERTPKTHDR(m); 145226031Sstas 146226031Sstas if (inp != NULL) { 147226031Sstas INP_LOCK_ASSERT(inp); 148226031Sstas M_SETFIB(m, inp->inp_inc.inc_fibnum); 149226031Sstas if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) { 150226031Sstas m->m_pkthdr.flowid = inp->inp_flowid; 151226031Sstas m->m_flags |= M_FLOWID; 152226031Sstas } 153226031Sstas } 154226031Sstas 155226031Sstas if (ro == NULL) { 156226031Sstas ro = &iproute; 157226031Sstas bzero(ro, sizeof (*ro)); 158226031Sstas 159226031Sstas /* 160226031Sstas * The flow table returns route entries valid for up to 30 161226031Sstas * seconds; we rely on the remainder of ip_output() taking no 162226031Sstas * longer than that long for the stability of ro_rt. The 163226031Sstas * flow ID assignment must have happened before this point. 164226031Sstas */ 165226031Sstas if (flowtable_lookup(ip_ft, m, ro) == 0) 166226031Sstas nortfree = 1; 167226031Sstas } 168226031Sstas 169226031Sstas if (opt) { 170226031Sstas len = 0; 171226031Sstas m = ip_insertoptions(m, opt, &len); 172226031Sstas if (len != 0) 173226031Sstas hlen = len; 174226031Sstas } 175226031Sstas ip = mtod(m, struct ip *); 176226031Sstas 177226031Sstas /* 178226031Sstas * Fill in IP header. If we are not allowing fragmentation, 179226031Sstas * then the ip_id field is meaningless, but we don't set it 180226031Sstas * to zero. Doing so causes various problems when devices along 181226031Sstas * the path (routers, load balancers, firewalls, etc.) illegally 182226031Sstas * disable DF on our packet. Note that a 16-bit counter 183226031Sstas * will wrap around in less than 10 seconds at 100 Mbit/s on a 184226031Sstas * medium with MTU 1500. See Steven M. Bellovin, "A Technique 185226031Sstas * for Counting NATted Hosts", Proc. IMW'02, available at 186226031Sstas * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>. 187226031Sstas */ 188226031Sstas if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 189226031Sstas ip->ip_v = IPVERSION; 190226031Sstas ip->ip_hl = hlen >> 2; 191226031Sstas ip->ip_id = ip_newid(); 192226031Sstas IPSTAT_INC(ips_localout); 193226031Sstas } else { 194226031Sstas hlen = ip->ip_hl << 2; 195226031Sstas } 196226031Sstas 197226031Sstas dst = (struct sockaddr_in *)&ro->ro_dst; 198226031Sstasagain: 199226031Sstas /* 200226031Sstas * If there is a cached route, 201226031Sstas * check that it is to the same destination 202226031Sstas * and is still up. If not, free it and try again. 203226031Sstas * The address family should also be checked in case of sharing the 204226031Sstas * cache with IPv6. 205226031Sstas */ 206226031Sstas if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 207226031Sstas dst->sin_family != AF_INET || 208226031Sstas dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 209226031Sstas if (!nortfree) 210226031Sstas RTFREE(ro->ro_rt); 211226031Sstas ro->ro_rt = (struct rtentry *)NULL; 212226031Sstas } 213226031Sstas#ifdef IPFIREWALL_FORWARD 214226031Sstas if (ro->ro_rt == NULL && fwd_tag == NULL) { 215226031Sstas#else 216226031Sstas if (ro->ro_rt == NULL) { 217226031Sstas#endif 218226031Sstas bzero(dst, sizeof(*dst)); 219226031Sstas dst->sin_family = AF_INET; 220226031Sstas dst->sin_len = sizeof(*dst); 221226031Sstas dst->sin_addr = ip->ip_dst; 222226031Sstas } 223226031Sstas /* 224226031Sstas * If routing to interface only, short circuit routing lookup. 225226031Sstas * The use of an all-ones broadcast address implies this; an 226226031Sstas * interface is specified by the broadcast address of an interface, 227226031Sstas * or the destination address of a ptp interface. 228226031Sstas */ 229226031Sstas if (flags & IP_SENDONES) { 230226031Sstas if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL && 231226031Sstas (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) { 232226031Sstas IPSTAT_INC(ips_noroute); 233226031Sstas error = ENETUNREACH; 234226031Sstas goto bad; 235226031Sstas } 236226031Sstas ip->ip_dst.s_addr = INADDR_BROADCAST; 237226031Sstas dst->sin_addr = ip->ip_dst; 238226031Sstas ifp = ia->ia_ifp; 239226031Sstas ip->ip_ttl = 1; 240226031Sstas isbroadcast = 1; 241226031Sstas } else if (flags & IP_ROUTETOIF) { 242226031Sstas if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 243226031Sstas (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 244226031Sstas IPSTAT_INC(ips_noroute); 245226031Sstas error = ENETUNREACH; 246226031Sstas goto bad; 247226031Sstas } 248226031Sstas ifp = ia->ia_ifp; 249226031Sstas ip->ip_ttl = 1; 250226031Sstas isbroadcast = in_broadcast(dst->sin_addr, ifp); 251226031Sstas } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 252226031Sstas imo != NULL && imo->imo_multicast_ifp != NULL) { 253226031Sstas /* 254226031Sstas * Bypass the normal routing lookup for multicast 255226031Sstas * packets if the interface is specified. 256226031Sstas */ 257226031Sstas ifp = imo->imo_multicast_ifp; 258226031Sstas IFP_TO_IA(ifp, ia); 259226031Sstas isbroadcast = 0; /* fool gcc */ 260226031Sstas } else { 261226031Sstas /* 262226031Sstas * We want to do any cloning requested by the link layer, 263226031Sstas * as this is probably required in all cases for correct 264226031Sstas * operation (as it is for ARP). 265226031Sstas */ 266226031Sstas if (ro->ro_rt == NULL) 267226031Sstas#ifdef RADIX_MPATH 268226031Sstas rtalloc_mpath_fib(ro, 269226031Sstas ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 270226031Sstas inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); 271226031Sstas#else 272226031Sstas in_rtalloc_ign(ro, 0, 273226031Sstas inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); 274226031Sstas#endif 275226031Sstas if (ro->ro_rt == NULL) { 276226031Sstas#ifdef IPSEC 277226031Sstas /* 278226031Sstas * There is no route for this packet, but it is 279226031Sstas * possible that a matching SPD entry exists. 280226031Sstas */ 281226031Sstas no_route_but_check_spd = 1; 282226031Sstas mtu = 0; /* Silence GCC warning. */ 283226031Sstas goto sendit; 284226031Sstas#endif 285226031Sstas IPSTAT_INC(ips_noroute); 286226031Sstas error = EHOSTUNREACH; 287226031Sstas goto bad; 288226031Sstas } 289226031Sstas ia = ifatoia(ro->ro_rt->rt_ifa); 290226031Sstas ifp = ro->ro_rt->rt_ifp; 291226031Sstas ro->ro_rt->rt_rmx.rmx_pksent++; 292226031Sstas if (ro->ro_rt->rt_flags & RTF_GATEWAY) 293226031Sstas dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 294226031Sstas if (ro->ro_rt->rt_flags & RTF_HOST) 295226031Sstas isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 296226031Sstas else 297226031Sstas isbroadcast = in_broadcast(dst->sin_addr, ifp); 298226031Sstas } 299226031Sstas /* 300226031Sstas * Calculate MTU. If we have a route that is up, use that, 301226031Sstas * otherwise use the interface's MTU. 302226031Sstas */ 303226031Sstas if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) { 304226031Sstas /* 305226031Sstas * This case can happen if the user changed the MTU 306226031Sstas * of an interface after enabling IP on it. Because 307226031Sstas * most netifs don't keep track of routes pointing to 308226031Sstas * them, there is no way for one to update all its 309226031Sstas * routes when the MTU is changed. 310226031Sstas */ 311226031Sstas if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu) 312226031Sstas ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 313226031Sstas mtu = ro->ro_rt->rt_rmx.rmx_mtu; 314226031Sstas } else { 315226031Sstas mtu = ifp->if_mtu; 316226031Sstas } 317226031Sstas if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 318226031Sstas m->m_flags |= M_MCAST; 319226031Sstas /* 320226031Sstas * IP destination address is multicast. Make sure "dst" 321226031Sstas * still points to the address in "ro". (It may have been 322226031Sstas * changed to point to a gateway address, above.) 323226031Sstas */ 324226031Sstas dst = (struct sockaddr_in *)&ro->ro_dst; 325226031Sstas /* 326226031Sstas * See if the caller provided any multicast options 327226031Sstas */ 328226031Sstas if (imo != NULL) { 329226031Sstas ip->ip_ttl = imo->imo_multicast_ttl; 330226031Sstas if (imo->imo_multicast_vif != -1) 331226031Sstas ip->ip_src.s_addr = 332226031Sstas ip_mcast_src ? 333226031Sstas ip_mcast_src(imo->imo_multicast_vif) : 334226031Sstas INADDR_ANY; 335226031Sstas } else 336226031Sstas ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 337226031Sstas /* 338226031Sstas * Confirm that the outgoing interface supports multicast. 339226031Sstas */ 340226031Sstas if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 341226031Sstas if ((ifp->if_flags & IFF_MULTICAST) == 0) { 342226031Sstas IPSTAT_INC(ips_noroute); 343226031Sstas error = ENETUNREACH; 344226031Sstas goto bad; 345226031Sstas } 346226031Sstas } 347226031Sstas /* 348226031Sstas * If source address not specified yet, use address 349226031Sstas * of outgoing interface. 350226031Sstas */ 351226031Sstas if (ip->ip_src.s_addr == INADDR_ANY) { 352226031Sstas /* Interface may have no addresses. */ 353226031Sstas if (ia != NULL) 354226031Sstas ip->ip_src = IA_SIN(ia)->sin_addr; 355226031Sstas } 356226031Sstas 357226031Sstas if ((imo == NULL && in_mcast_loop) || 358226031Sstas (imo && imo->imo_multicast_loop)) { 359226031Sstas /* 360226031Sstas * Loop back multicast datagram if not expressly 361226031Sstas * forbidden to do so, even if we are not a member 362226031Sstas * of the group; ip_input() will filter it later, 363226031Sstas * thus deferring a hash lookup and mutex acquisition 364226031Sstas * at the expense of a cheap copy using m_copym(). 365226031Sstas */ 366226031Sstas ip_mloopback(ifp, m, dst, hlen); 367226031Sstas } else { 368226031Sstas /* 369226031Sstas * If we are acting as a multicast router, perform 370226031Sstas * multicast forwarding as if the packet had just 371226031Sstas * arrived on the interface to which we are about 372226031Sstas * to send. The multicast forwarding function 373226031Sstas * recursively calls this function, using the 374226031Sstas * IP_FORWARDING flag to prevent infinite recursion. 375226031Sstas * 376226031Sstas * Multicasts that are looped back by ip_mloopback(), 377226031Sstas * above, will be forwarded by the ip_input() routine, 378226031Sstas * if necessary. 379226031Sstas */ 380226031Sstas if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 381226031Sstas /* 382226031Sstas * If rsvp daemon is not running, do not 383226031Sstas * set ip_moptions. This ensures that the packet 384226031Sstas * is multicast and not just sent down one link 385226031Sstas * as prescribed by rsvpd. 386226031Sstas */ 387226031Sstas if (!V_rsvp_on) 388226031Sstas imo = NULL; 389226031Sstas if (ip_mforward && 390226031Sstas ip_mforward(ip, ifp, m, imo) != 0) { 391226031Sstas m_freem(m); 392226031Sstas goto done; 393226031Sstas } 394226031Sstas } 395226031Sstas } 396226031Sstas 397226031Sstas /* 398226031Sstas * Multicasts with a time-to-live of zero may be looped- 399226031Sstas * back, above, but must not be transmitted on a network. 400226031Sstas * Also, multicasts addressed to the loopback interface 401226031Sstas * are not sent -- the above call to ip_mloopback() will 402226031Sstas * loop back a copy. ip_input() will drop the copy if 403226031Sstas * this host does not belong to the destination group on 404226031Sstas * the loopback interface. 405226031Sstas */ 406226031Sstas if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 407226031Sstas m_freem(m); 408226031Sstas goto done; 409226031Sstas } 410226031Sstas 411226031Sstas goto sendit; 412226031Sstas } 413226031Sstas 414226031Sstas /* 415226031Sstas * If the source address is not specified yet, use the address 416226031Sstas * of the outoing interface. 417226031Sstas */ 418226031Sstas if (ip->ip_src.s_addr == INADDR_ANY) { 419226031Sstas /* Interface may have no addresses. */ 420226031Sstas if (ia != NULL) { 421226031Sstas ip->ip_src = IA_SIN(ia)->sin_addr; 422226031Sstas } 423226031Sstas } 424226031Sstas 425226031Sstas /* 426226031Sstas * Verify that we have any chance at all of being able to queue the 427226031Sstas * packet or packet fragments, unless ALTQ is enabled on the given 428226031Sstas * interface in which case packetdrop should be done by queueing. 429226031Sstas */ 430226031Sstas#ifdef ALTQ 431226031Sstas if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) && 432226031Sstas ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 433226031Sstas ifp->if_snd.ifq_maxlen)) 434226031Sstas#else 435226031Sstas if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 436226031Sstas ifp->if_snd.ifq_maxlen) 437226031Sstas#endif /* ALTQ */ 438226031Sstas { 439226031Sstas error = ENOBUFS; 440226031Sstas IPSTAT_INC(ips_odropped); 441226031Sstas ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1); 442226031Sstas goto bad; 443226031Sstas } 444226031Sstas 445226031Sstas /* 446226031Sstas * Look for broadcast address and 447226031Sstas * verify user is allowed to send 448226031Sstas * such a packet. 449226031Sstas */ 450226031Sstas if (isbroadcast) { 451226031Sstas if ((ifp->if_flags & IFF_BROADCAST) == 0) { 452226031Sstas error = EADDRNOTAVAIL; 453226031Sstas goto bad; 454226031Sstas } 455226031Sstas if ((flags & IP_ALLOWBROADCAST) == 0) { 456226031Sstas error = EACCES; 457226031Sstas goto bad; 458226031Sstas } 459226031Sstas /* don't allow broadcast messages to be fragmented */ 460226031Sstas if (ip->ip_len > mtu) { 461226031Sstas error = EMSGSIZE; 462226031Sstas goto bad; 463226031Sstas } 464226031Sstas m->m_flags |= M_BCAST; 465226031Sstas } else { 466226031Sstas m->m_flags &= ~M_BCAST; 467226031Sstas } 468226031Sstas 469226031Sstassendit: 470226031Sstas#ifdef IPSEC 471226031Sstas switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) { 472226031Sstas case 1: 473226031Sstas goto bad; 474226031Sstas case -1: 475226031Sstas goto done; 476226031Sstas case 0: 477226031Sstas default: 478226031Sstas break; /* Continue with packet processing. */ 479226031Sstas } 480226031Sstas /* 481226031Sstas * Check if there was a route for this packet; return error if not. 482226031Sstas */ 483226031Sstas if (no_route_but_check_spd) { 484226031Sstas IPSTAT_INC(ips_noroute); 485226031Sstas error = EHOSTUNREACH; 486226031Sstas goto bad; 487226031Sstas } 488226031Sstas /* Update variables that are affected by ipsec4_output(). */ 489226031Sstas ip = mtod(m, struct ip *); 490226031Sstas hlen = ip->ip_hl << 2; 491226031Sstas#endif /* IPSEC */ 492226031Sstas 493226031Sstas /* Jump over all PFIL processing if hooks are not active. */ 494226031Sstas if (!PFIL_HOOKED(&inet_pfil_hook)) 495226031Sstas goto passout; 496226031Sstas 497226031Sstas /* Run through list of hooks for output packets. */ 498226031Sstas odst.s_addr = ip->ip_dst.s_addr; 499226031Sstas error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp); 500226031Sstas if (error != 0 || m == NULL) 501226031Sstas goto done; 502226031Sstas 503226031Sstas ip = mtod(m, struct ip *); 504226031Sstas 505226031Sstas /* See if destination IP address was changed by packet filter. */ 506226031Sstas if (odst.s_addr != ip->ip_dst.s_addr) { 507226031Sstas m->m_flags |= M_SKIP_FIREWALL; 508226031Sstas /* If destination is now ourself drop to ip_input(). */ 509226031Sstas if (in_localip(ip->ip_dst)) { 510226031Sstas m->m_flags |= M_FASTFWD_OURS; 511226031Sstas if (m->m_pkthdr.rcvif == NULL) 512226031Sstas m->m_pkthdr.rcvif = V_loif; 513226031Sstas if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 514226031Sstas m->m_pkthdr.csum_flags |= 515226031Sstas CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 516226031Sstas m->m_pkthdr.csum_data = 0xffff; 517226031Sstas } 518226031Sstas m->m_pkthdr.csum_flags |= 519226031Sstas CSUM_IP_CHECKED | CSUM_IP_VALID; 520226031Sstas#ifdef SCTP 521226031Sstas if (m->m_pkthdr.csum_flags & CSUM_SCTP) 522226031Sstas m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 523226031Sstas#endif 524226031Sstas error = netisr_queue(NETISR_IP, m); 525226031Sstas goto done; 526226031Sstas } else 527226031Sstas goto again; /* Redo the routing table lookup. */ 528226031Sstas } 529226031Sstas 530226031Sstas#ifdef IPFIREWALL_FORWARD 531226031Sstas /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 532226031Sstas if (m->m_flags & M_FASTFWD_OURS) { 533226031Sstas if (m->m_pkthdr.rcvif == NULL) 534226031Sstas m->m_pkthdr.rcvif = V_loif; 535226031Sstas if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 536226031Sstas m->m_pkthdr.csum_flags |= 537226031Sstas CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 538226031Sstas m->m_pkthdr.csum_data = 0xffff; 539226031Sstas } 540226031Sstas#ifdef SCTP 541226031Sstas if (m->m_pkthdr.csum_flags & CSUM_SCTP) 542226031Sstas m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 543226031Sstas#endif 544226031Sstas m->m_pkthdr.csum_flags |= 545226031Sstas CSUM_IP_CHECKED | CSUM_IP_VALID; 546226031Sstas 547226031Sstas error = netisr_queue(NETISR_IP, m); 548226031Sstas goto done; 549226031Sstas } 550226031Sstas /* Or forward to some other address? */ 551226031Sstas fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 552226031Sstas if (fwd_tag) { 553226031Sstas dst = (struct sockaddr_in *)&ro->ro_dst; 554226031Sstas bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 555226031Sstas m->m_flags |= M_SKIP_FIREWALL; 556226031Sstas m_tag_delete(m, fwd_tag); 557226031Sstas goto again; 558226031Sstas } 559226031Sstas#endif /* IPFIREWALL_FORWARD */ 560226031Sstas 561226031Sstaspassout: 562226031Sstas /* 127/8 must not appear on wire - RFC1122. */ 563226031Sstas if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 564226031Sstas (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 565226031Sstas if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 566226031Sstas IPSTAT_INC(ips_badaddr); 567226031Sstas error = EADDRNOTAVAIL; 568226031Sstas goto bad; 569226031Sstas } 570226031Sstas } 571226031Sstas 572226031Sstas m->m_pkthdr.csum_flags |= CSUM_IP; 573226031Sstas sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 574226031Sstas if (sw_csum & CSUM_DELAY_DATA) { 575226031Sstas in_delayed_cksum(m); 576226031Sstas sw_csum &= ~CSUM_DELAY_DATA; 577226031Sstas } 578226031Sstas#ifdef SCTP 579226031Sstas if (sw_csum & CSUM_SCTP) { 580226031Sstas sctp_delayed_cksum(m); 581226031Sstas sw_csum &= ~CSUM_SCTP; 582226031Sstas } 583226031Sstas#endif 584226031Sstas m->m_pkthdr.csum_flags &= ifp->if_hwassist; 585226031Sstas 586226031Sstas /* 587226031Sstas * If small enough for interface, or the interface will take 588226031Sstas * care of the fragmentation for us, we can just send directly. 589226031Sstas */ 590226031Sstas if (ip->ip_len <= mtu || 591226031Sstas (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || 592226031Sstas ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { 593226031Sstas ip->ip_len = htons(ip->ip_len); 594226031Sstas ip->ip_off = htons(ip->ip_off); 595226031Sstas ip->ip_sum = 0; 596226031Sstas if (sw_csum & CSUM_DELAY_IP) 597226031Sstas ip->ip_sum = in_cksum(m, hlen); 598226031Sstas 599226031Sstas /* 600226031Sstas * Record statistics for this interface address. 601226031Sstas * With CSUM_TSO the byte/packet count will be slightly 602226031Sstas * incorrect because we count the IP+TCP headers only 603226031Sstas * once instead of for every generated packet. 604226031Sstas */ 605226031Sstas if (!(flags & IP_FORWARDING) && ia) { 606226031Sstas if (m->m_pkthdr.csum_flags & CSUM_TSO) 607226031Sstas ia->ia_ifa.if_opackets += 608226031Sstas m->m_pkthdr.len / m->m_pkthdr.tso_segsz; 609226031Sstas else 610226031Sstas ia->ia_ifa.if_opackets++; 611226031Sstas ia->ia_ifa.if_obytes += m->m_pkthdr.len; 612226031Sstas } 613226031Sstas#ifdef MBUF_STRESS_TEST 614226031Sstas if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 615226031Sstas m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 616226031Sstas#endif 617226031Sstas /* 618226031Sstas * Reset layer specific mbuf flags 619226031Sstas * to avoid confusing lower layers. 620226031Sstas */ 621226031Sstas m->m_flags &= ~(M_PROTOFLAGS); 622226031Sstas error = (*ifp->if_output)(ifp, m, 623226031Sstas (struct sockaddr *)dst, ro); 624226031Sstas goto done; 625226031Sstas } 626226031Sstas 627226031Sstas /* Balk when DF bit is set or the interface didn't support TSO. */ 628226031Sstas if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 629226031Sstas error = EMSGSIZE; 630226031Sstas IPSTAT_INC(ips_cantfrag); 631226031Sstas goto bad; 632226031Sstas } 633226031Sstas 634226031Sstas /* 635226031Sstas * Too large for interface; fragment if possible. If successful, 636226031Sstas * on return, m will point to a list of packets to be sent. 637226031Sstas */ 638226031Sstas error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum); 639226031Sstas if (error) 640226031Sstas goto bad; 641226031Sstas for (; m; m = m0) { 642226031Sstas m0 = m->m_nextpkt; 643226031Sstas m->m_nextpkt = 0; 644226031Sstas if (error == 0) { 645226031Sstas /* Record statistics for this interface address. */ 646226031Sstas if (ia != NULL) { 647226031Sstas ia->ia_ifa.if_opackets++; 648226031Sstas ia->ia_ifa.if_obytes += m->m_pkthdr.len; 649226031Sstas } 650226031Sstas /* 651226031Sstas * Reset layer specific mbuf flags 652226031Sstas * to avoid confusing upper layers. 653226031Sstas */ 654226031Sstas m->m_flags &= ~(M_PROTOFLAGS); 655226031Sstas 656226031Sstas error = (*ifp->if_output)(ifp, m, 657226031Sstas (struct sockaddr *)dst, ro); 658226031Sstas } else 659226031Sstas m_freem(m); 660226031Sstas } 661226031Sstas 662226031Sstas if (error == 0) 663226031Sstas IPSTAT_INC(ips_fragmented); 664226031Sstas 665226031Sstasdone: 666226031Sstas if (ro == &iproute && ro->ro_rt && !nortfree) { 667226031Sstas RTFREE(ro->ro_rt); 668226031Sstas } 669226031Sstas return (error); 670226031Sstasbad: 671226031Sstas m_freem(m); 672226031Sstas goto done; 673226031Sstas} 674226031Sstas 675226031Sstas/* 676226031Sstas * Create a chain of fragments which fit the given mtu. m_frag points to the 677226031Sstas * mbuf to be fragmented; on return it points to the chain with the fragments. 678226031Sstas * Return 0 if no error. If error, m_frag may contain a partially built 679226031Sstas * chain of fragments that should be freed by the caller. 680226031Sstas * 681226031Sstas * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 682226031Sstas * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 683226031Sstas */ 684226031Sstasint 685226031Sstasip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 686226031Sstas u_long if_hwassist_flags, int sw_csum) 687226031Sstas{ 688226031Sstas INIT_VNET_INET(curvnet); 689226031Sstas int error = 0; 690226031Sstas int hlen = ip->ip_hl << 2; 691226031Sstas int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 692226031Sstas int off; 693226031Sstas struct mbuf *m0 = *m_frag; /* the original packet */ 694226031Sstas int firstlen; 695226031Sstas struct mbuf **mnext; 696226031Sstas int nfrags; 697226031Sstas 698226031Sstas if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 699226031Sstas IPSTAT_INC(ips_cantfrag); 700226031Sstas return EMSGSIZE; 701226031Sstas } 702226031Sstas 703226031Sstas /* 704226031Sstas * Must be able to put at least 8 bytes per fragment. 705226031Sstas */ 706226031Sstas if (len < 8) 707226031Sstas return EMSGSIZE; 708226031Sstas 709226031Sstas /* 710226031Sstas * If the interface will not calculate checksums on 711226031Sstas * fragmented packets, then do it here. 712226031Sstas */ 713226031Sstas if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 714226031Sstas (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 715226031Sstas in_delayed_cksum(m0); 716226031Sstas m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 717226031Sstas } 718226031Sstas#ifdef SCTP 719226031Sstas if (m0->m_pkthdr.csum_flags & CSUM_SCTP && 720226031Sstas (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 721226031Sstas sctp_delayed_cksum(m0); 722226031Sstas m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 723226031Sstas } 724226031Sstas#endif 725226031Sstas if (len > PAGE_SIZE) { 726226031Sstas /* 727226031Sstas * Fragment large datagrams such that each segment 728226031Sstas * contains a multiple of PAGE_SIZE amount of data, 729226031Sstas * plus headers. This enables a receiver to perform 730226031Sstas * page-flipping zero-copy optimizations. 731226031Sstas * 732226031Sstas * XXX When does this help given that sender and receiver 733226031Sstas * could have different page sizes, and also mtu could 734226031Sstas * be less than the receiver's page size ? 735226031Sstas */ 736226031Sstas int newlen; 737226031Sstas struct mbuf *m; 738226031Sstas 739226031Sstas for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 740226031Sstas off += m->m_len; 741226031Sstas 742226031Sstas /* 743226031Sstas * firstlen (off - hlen) must be aligned on an 744226031Sstas * 8-byte boundary 745226031Sstas */ 746226031Sstas if (off < hlen) 747226031Sstas goto smart_frag_failure; 748226031Sstas off = ((off - hlen) & ~7) + hlen; 749226031Sstas newlen = (~PAGE_MASK) & mtu; 750226031Sstas if ((newlen + sizeof (struct ip)) > mtu) { 751226031Sstas /* we failed, go back the default */ 752226031Sstassmart_frag_failure: 753226031Sstas newlen = len; 754226031Sstas off = hlen + len; 755226031Sstas } 756226031Sstas len = newlen; 757226031Sstas 758226031Sstas } else { 759226031Sstas off = hlen + len; 760226031Sstas } 761226031Sstas 762226031Sstas firstlen = off - hlen; 763226031Sstas mnext = &m0->m_nextpkt; /* pointer to next packet */ 764226031Sstas 765226031Sstas /* 766226031Sstas * Loop through length of segment after first fragment, 767226031Sstas * make new header and copy data of each part and link onto chain. 768226031Sstas * Here, m0 is the original packet, m is the fragment being created. 769226031Sstas * The fragments are linked off the m_nextpkt of the original 770226031Sstas * packet, which after processing serves as the first fragment. 771226031Sstas */ 772226031Sstas for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 773226031Sstas struct ip *mhip; /* ip header on the fragment */ 774226031Sstas struct mbuf *m; 775226031Sstas int mhlen = sizeof (struct ip); 776226031Sstas 777226031Sstas MGETHDR(m, M_DONTWAIT, MT_DATA); 778226031Sstas if (m == NULL) { 779226031Sstas error = ENOBUFS; 780226031Sstas IPSTAT_INC(ips_odropped); 781226031Sstas goto done; 782226031Sstas } 783226031Sstas m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 784226031Sstas /* 785226031Sstas * In the first mbuf, leave room for the link header, then 786226031Sstas * copy the original IP header including options. The payload 787226031Sstas * goes into an additional mbuf chain returned by m_copym(). 788226031Sstas */ 789226031Sstas m->m_data += max_linkhdr; 790226031Sstas mhip = mtod(m, struct ip *); 791226031Sstas *mhip = *ip; 792226031Sstas if (hlen > sizeof (struct ip)) { 793226031Sstas mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 794226031Sstas mhip->ip_v = IPVERSION; 795226031Sstas mhip->ip_hl = mhlen >> 2; 796226031Sstas } 797226031Sstas m->m_len = mhlen; 798226031Sstas /* XXX do we need to add ip->ip_off below ? */ 799226031Sstas mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 800226031Sstas if (off + len >= ip->ip_len) { /* last fragment */ 801226031Sstas len = ip->ip_len - off; 802226031Sstas m->m_flags |= M_LASTFRAG; 803226031Sstas } else 804226031Sstas mhip->ip_off |= IP_MF; 805226031Sstas mhip->ip_len = htons((u_short)(len + mhlen)); 806226031Sstas m->m_next = m_copym(m0, off, len, M_DONTWAIT); 807226031Sstas if (m->m_next == NULL) { /* copy failed */ 808226031Sstas m_free(m); 809226031Sstas error = ENOBUFS; /* ??? */ 810226031Sstas IPSTAT_INC(ips_odropped); 811226031Sstas goto done; 812226031Sstas } 813226031Sstas m->m_pkthdr.len = mhlen + len; 814226031Sstas m->m_pkthdr.rcvif = NULL; 815226031Sstas#ifdef MAC 816226031Sstas mac_netinet_fragment(m0, m); 817226031Sstas#endif 818226031Sstas m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 819226031Sstas mhip->ip_off = htons(mhip->ip_off); 820226031Sstas mhip->ip_sum = 0; 821226031Sstas if (sw_csum & CSUM_DELAY_IP) 822226031Sstas mhip->ip_sum = in_cksum(m, mhlen); 823226031Sstas *mnext = m; 824226031Sstas mnext = &m->m_nextpkt; 825226031Sstas } 826226031Sstas IPSTAT_ADD(ips_ofragments, nfrags); 827226031Sstas 828226031Sstas /* set first marker for fragment chain */ 829226031Sstas m0->m_flags |= M_FIRSTFRAG | M_FRAG; 830226031Sstas m0->m_pkthdr.csum_data = nfrags; 831226031Sstas 832226031Sstas /* 833226031Sstas * Update first fragment by trimming what's been copied out 834226031Sstas * and updating header. 835226031Sstas */ 836226031Sstas m_adj(m0, hlen + firstlen - ip->ip_len); 837226031Sstas m0->m_pkthdr.len = hlen + firstlen; 838226031Sstas ip->ip_len = htons((u_short)m0->m_pkthdr.len); 839226031Sstas ip->ip_off |= IP_MF; 840226031Sstas ip->ip_off = htons(ip->ip_off); 841226031Sstas ip->ip_sum = 0; 842226031Sstas if (sw_csum & CSUM_DELAY_IP) 843226031Sstas ip->ip_sum = in_cksum(m0, hlen); 844226031Sstas 845226031Sstasdone: 846226031Sstas *m_frag = m0; 847226031Sstas return error; 848226031Sstas} 849226031Sstas 850226031Sstasvoid 851226031Sstasin_delayed_cksum(struct mbuf *m) 852226031Sstas{ 853226031Sstas struct ip *ip; 854226031Sstas u_short csum, offset; 855226031Sstas 856226031Sstas ip = mtod(m, struct ip *); 857226031Sstas offset = ip->ip_hl << 2 ; 858226031Sstas csum = in_cksum_skip(m, ip->ip_len, offset); 859226031Sstas if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 860226031Sstas csum = 0xffff; 861226031Sstas offset += m->m_pkthdr.csum_data; /* checksum offset */ 862226031Sstas 863226031Sstas if (offset + sizeof(u_short) > m->m_len) { 864226031Sstas printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 865226031Sstas m->m_len, offset, ip->ip_p); 866226031Sstas /* 867226031Sstas * XXX 868226031Sstas * this shouldn't happen, but if it does, the 869226031Sstas * correct behavior may be to insert the checksum 870226031Sstas * in the appropriate next mbuf in the chain. 871226031Sstas */ 872226031Sstas return; 873226031Sstas } 874226031Sstas *(u_short *)(m->m_data + offset) = csum; 875226031Sstas} 876226031Sstas 877226031Sstas/* 878226031Sstas * IP socket option processing. 879226031Sstas */ 880226031Sstasint 881226031Sstasip_ctloutput(struct socket *so, struct sockopt *sopt) 882226031Sstas{ 883226031Sstas struct inpcb *inp = sotoinpcb(so); 884226031Sstas int error, optval; 885226031Sstas 886226031Sstas error = optval = 0; 887226031Sstas if (sopt->sopt_level != IPPROTO_IP) { 888226031Sstas if ((sopt->sopt_level == SOL_SOCKET) && 889226031Sstas (sopt->sopt_name == SO_SETFIB)) { 890226031Sstas inp->inp_inc.inc_fibnum = so->so_fibnum; 891226031Sstas return (0); 892226031Sstas } 893226031Sstas return (EINVAL); 894226031Sstas } 895226031Sstas 896226031Sstas switch (sopt->sopt_dir) { 897226031Sstas case SOPT_SET: 898226031Sstas switch (sopt->sopt_name) { 899226031Sstas case IP_OPTIONS: 900226031Sstas#ifdef notyet 901226031Sstas case IP_RETOPTS: 902226031Sstas#endif 903226031Sstas { 904226031Sstas struct mbuf *m; 905226031Sstas if (sopt->sopt_valsize > MLEN) { 906226031Sstas error = EMSGSIZE; 907226031Sstas break; 908226031Sstas } 909226031Sstas MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 910226031Sstas if (m == NULL) { 911226031Sstas error = ENOBUFS; 912226031Sstas break; 913226031Sstas } 914226031Sstas m->m_len = sopt->sopt_valsize; 915226031Sstas error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 916226031Sstas m->m_len); 917226031Sstas if (error) { 918226031Sstas m_free(m); 919226031Sstas break; 920226031Sstas } 921226031Sstas INP_WLOCK(inp); 922226031Sstas error = ip_pcbopts(inp, sopt->sopt_name, m); 923226031Sstas INP_WUNLOCK(inp); 924226031Sstas return (error); 925226031Sstas } 926226031Sstas 927226031Sstas case IP_BINDANY: 928226031Sstas if (sopt->sopt_td != NULL) { 929226031Sstas error = priv_check(sopt->sopt_td, 930226031Sstas PRIV_NETINET_BINDANY); 931226031Sstas if (error) 932226031Sstas break; 933226031Sstas } 934226031Sstas /* FALLTHROUGH */ 935226031Sstas case IP_TOS: 936226031Sstas case IP_TTL: 937226031Sstas case IP_MINTTL: 938226031Sstas case IP_RECVOPTS: 939226031Sstas case IP_RECVRETOPTS: 940226031Sstas case IP_RECVDSTADDR: 941226031Sstas case IP_RECVTTL: 942226031Sstas case IP_RECVIF: 943226031Sstas case IP_FAITH: 944226031Sstas case IP_ONESBCAST: 945226031Sstas case IP_DONTFRAG: 946226031Sstas error = sooptcopyin(sopt, &optval, sizeof optval, 947226031Sstas sizeof optval); 948226031Sstas if (error) 949226031Sstas break; 950226031Sstas 951226031Sstas switch (sopt->sopt_name) { 952226031Sstas case IP_TOS: 953226031Sstas inp->inp_ip_tos = optval; 954226031Sstas break; 955226031Sstas 956226031Sstas case IP_TTL: 957226031Sstas inp->inp_ip_ttl = optval; 958226031Sstas break; 959226031Sstas 960226031Sstas case IP_MINTTL: 961226031Sstas if (optval >= 0 && optval <= MAXTTL) 962226031Sstas inp->inp_ip_minttl = optval; 963226031Sstas else 964226031Sstas error = EINVAL; 965226031Sstas break; 966226031Sstas 967226031Sstas#define OPTSET(bit) do { \ 968226031Sstas INP_WLOCK(inp); \ 969226031Sstas if (optval) \ 970226031Sstas inp->inp_flags |= bit; \ 971226031Sstas else \ 972226031Sstas inp->inp_flags &= ~bit; \ 973226031Sstas INP_WUNLOCK(inp); \ 974226031Sstas} while (0) 975226031Sstas 976226031Sstas case IP_RECVOPTS: 977226031Sstas OPTSET(INP_RECVOPTS); 978226031Sstas break; 979226031Sstas 980226031Sstas case IP_RECVRETOPTS: 981226031Sstas OPTSET(INP_RECVRETOPTS); 982226031Sstas break; 983226031Sstas 984226031Sstas case IP_RECVDSTADDR: 985226031Sstas OPTSET(INP_RECVDSTADDR); 986226031Sstas break; 987226031Sstas 988226031Sstas case IP_RECVTTL: 989226031Sstas OPTSET(INP_RECVTTL); 990226031Sstas break; 991226031Sstas 992226031Sstas case IP_RECVIF: 993226031Sstas OPTSET(INP_RECVIF); 994226031Sstas break; 995226031Sstas 996226031Sstas case IP_FAITH: 997226031Sstas OPTSET(INP_FAITH); 998226031Sstas break; 999226031Sstas 1000226031Sstas case IP_ONESBCAST: 1001226031Sstas OPTSET(INP_ONESBCAST); 1002226031Sstas break; 1003226031Sstas case IP_DONTFRAG: 1004226031Sstas OPTSET(INP_DONTFRAG); 1005226031Sstas break; 1006226031Sstas case IP_BINDANY: 1007226031Sstas OPTSET(INP_BINDANY); 1008226031Sstas break; 1009226031Sstas } 1010226031Sstas break; 1011226031Sstas#undef OPTSET 1012226031Sstas 1013226031Sstas /* 1014226031Sstas * Multicast socket options are processed by the in_mcast 1015226031Sstas * module. 1016226031Sstas */ 1017226031Sstas case IP_MULTICAST_IF: 1018226031Sstas case IP_MULTICAST_VIF: 1019226031Sstas case IP_MULTICAST_TTL: 1020226031Sstas case IP_MULTICAST_LOOP: 1021226031Sstas case IP_ADD_MEMBERSHIP: 1022226031Sstas case IP_DROP_MEMBERSHIP: 1023226031Sstas case IP_ADD_SOURCE_MEMBERSHIP: 1024226031Sstas case IP_DROP_SOURCE_MEMBERSHIP: 1025226031Sstas case IP_BLOCK_SOURCE: 1026226031Sstas case IP_UNBLOCK_SOURCE: 1027226031Sstas case IP_MSFILTER: 1028226031Sstas case MCAST_JOIN_GROUP: 1029226031Sstas case MCAST_LEAVE_GROUP: 1030226031Sstas case MCAST_JOIN_SOURCE_GROUP: 1031226031Sstas case MCAST_LEAVE_SOURCE_GROUP: 1032226031Sstas case MCAST_BLOCK_SOURCE: 1033226031Sstas case MCAST_UNBLOCK_SOURCE: 1034226031Sstas error = inp_setmoptions(inp, sopt); 1035226031Sstas break; 1036226031Sstas 1037226031Sstas case IP_PORTRANGE: 1038226031Sstas error = sooptcopyin(sopt, &optval, sizeof optval, 1039226031Sstas sizeof optval); 1040226031Sstas if (error) 1041226031Sstas break; 1042226031Sstas 1043226031Sstas INP_WLOCK(inp); 1044226031Sstas switch (optval) { 1045226031Sstas case IP_PORTRANGE_DEFAULT: 1046226031Sstas inp->inp_flags &= ~(INP_LOWPORT); 1047226031Sstas inp->inp_flags &= ~(INP_HIGHPORT); 1048226031Sstas break; 1049226031Sstas 1050226031Sstas case IP_PORTRANGE_HIGH: 1051226031Sstas inp->inp_flags &= ~(INP_LOWPORT); 1052226031Sstas inp->inp_flags |= INP_HIGHPORT; 1053226031Sstas break; 1054226031Sstas 1055226031Sstas case IP_PORTRANGE_LOW: 1056226031Sstas inp->inp_flags &= ~(INP_HIGHPORT); 1057226031Sstas inp->inp_flags |= INP_LOWPORT; 1058226031Sstas break; 1059226031Sstas 1060226031Sstas default: 1061226031Sstas error = EINVAL; 1062226031Sstas break; 1063226031Sstas } 1064226031Sstas INP_WUNLOCK(inp); 1065226031Sstas break; 1066226031Sstas 1067226031Sstas#ifdef IPSEC 1068226031Sstas case IP_IPSEC_POLICY: 1069226031Sstas { 1070226031Sstas caddr_t req; 1071226031Sstas struct mbuf *m; 1072226031Sstas 1073226031Sstas if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1074226031Sstas break; 1075226031Sstas if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1076226031Sstas break; 1077226031Sstas req = mtod(m, caddr_t); 1078226031Sstas error = ipsec_set_policy(inp, sopt->sopt_name, req, 1079226031Sstas m->m_len, (sopt->sopt_td != NULL) ? 1080226031Sstas sopt->sopt_td->td_ucred : NULL); 1081226031Sstas m_freem(m); 1082226031Sstas break; 1083226031Sstas } 1084226031Sstas#endif /* IPSEC */ 1085226031Sstas 1086226031Sstas default: 1087226031Sstas error = ENOPROTOOPT; 1088226031Sstas break; 1089226031Sstas } 1090226031Sstas break; 1091226031Sstas 1092226031Sstas case SOPT_GET: 1093226031Sstas switch (sopt->sopt_name) { 1094226031Sstas case IP_OPTIONS: 1095226031Sstas case IP_RETOPTS: 1096226031Sstas if (inp->inp_options) 1097226031Sstas error = sooptcopyout(sopt, 1098226031Sstas mtod(inp->inp_options, 1099226031Sstas char *), 1100226031Sstas inp->inp_options->m_len); 1101226031Sstas else 1102226031Sstas sopt->sopt_valsize = 0; 1103226031Sstas break; 1104226031Sstas 1105226031Sstas case IP_TOS: 1106226031Sstas case IP_TTL: 1107226031Sstas case IP_MINTTL: 1108226031Sstas case IP_RECVOPTS: 1109226031Sstas case IP_RECVRETOPTS: 1110226031Sstas case IP_RECVDSTADDR: 1111226031Sstas case IP_RECVTTL: 1112226031Sstas case IP_RECVIF: 1113226031Sstas case IP_PORTRANGE: 1114226031Sstas case IP_FAITH: 1115226031Sstas case IP_ONESBCAST: 1116226031Sstas case IP_DONTFRAG: 1117226031Sstas switch (sopt->sopt_name) { 1118226031Sstas 1119226031Sstas case IP_TOS: 1120226031Sstas optval = inp->inp_ip_tos; 1121226031Sstas break; 1122226031Sstas 1123226031Sstas case IP_TTL: 1124226031Sstas optval = inp->inp_ip_ttl; 1125226031Sstas break; 1126226031Sstas 1127226031Sstas case IP_MINTTL: 1128226031Sstas optval = inp->inp_ip_minttl; 1129226031Sstas break; 1130226031Sstas 1131226031Sstas#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1132226031Sstas 1133226031Sstas case IP_RECVOPTS: 1134226031Sstas optval = OPTBIT(INP_RECVOPTS); 1135226031Sstas break; 1136226031Sstas 1137226031Sstas case IP_RECVRETOPTS: 1138226031Sstas optval = OPTBIT(INP_RECVRETOPTS); 1139226031Sstas break; 1140226031Sstas 1141226031Sstas case IP_RECVDSTADDR: 1142226031Sstas optval = OPTBIT(INP_RECVDSTADDR); 1143226031Sstas break; 1144226031Sstas 1145226031Sstas case IP_RECVTTL: 1146226031Sstas optval = OPTBIT(INP_RECVTTL); 1147226031Sstas break; 1148226031Sstas 1149226031Sstas case IP_RECVIF: 1150226031Sstas optval = OPTBIT(INP_RECVIF); 1151226031Sstas break; 1152226031Sstas 1153226031Sstas case IP_PORTRANGE: 1154226031Sstas if (inp->inp_flags & INP_HIGHPORT) 1155226031Sstas optval = IP_PORTRANGE_HIGH; 1156226031Sstas else if (inp->inp_flags & INP_LOWPORT) 1157226031Sstas optval = IP_PORTRANGE_LOW; 1158226031Sstas else 1159226031Sstas optval = 0; 1160226031Sstas break; 1161226031Sstas 1162226031Sstas case IP_FAITH: 1163226031Sstas optval = OPTBIT(INP_FAITH); 1164226031Sstas break; 1165226031Sstas 1166226031Sstas case IP_ONESBCAST: 1167226031Sstas optval = OPTBIT(INP_ONESBCAST); 1168226031Sstas break; 1169226031Sstas case IP_DONTFRAG: 1170226031Sstas optval = OPTBIT(INP_DONTFRAG); 1171226031Sstas break; 1172226031Sstas } 1173226031Sstas error = sooptcopyout(sopt, &optval, sizeof optval); 1174226031Sstas break; 1175226031Sstas 1176226031Sstas /* 1177226031Sstas * Multicast socket options are processed by the in_mcast 1178226031Sstas * module. 1179226031Sstas */ 1180226031Sstas case IP_MULTICAST_IF: 1181226031Sstas case IP_MULTICAST_VIF: 1182226031Sstas case IP_MULTICAST_TTL: 1183226031Sstas case IP_MULTICAST_LOOP: 1184226031Sstas case IP_MSFILTER: 1185226031Sstas error = inp_getmoptions(inp, sopt); 1186226031Sstas break; 1187226031Sstas 1188226031Sstas#ifdef IPSEC 1189226031Sstas case IP_IPSEC_POLICY: 1190226031Sstas { 1191226031Sstas struct mbuf *m = NULL; 1192226031Sstas caddr_t req = NULL; 1193226031Sstas size_t len = 0; 1194226031Sstas 1195226031Sstas if (m != 0) { 1196226031Sstas req = mtod(m, caddr_t); 1197226031Sstas len = m->m_len; 1198226031Sstas } 1199226031Sstas error = ipsec_get_policy(sotoinpcb(so), req, len, &m); 1200226031Sstas if (error == 0) 1201226031Sstas error = soopt_mcopyout(sopt, m); /* XXX */ 1202226031Sstas if (error == 0) 1203226031Sstas m_freem(m); 1204226031Sstas break; 1205226031Sstas } 1206226031Sstas#endif /* IPSEC */ 1207226031Sstas 1208226031Sstas default: 1209226031Sstas error = ENOPROTOOPT; 1210226031Sstas break; 1211226031Sstas } 1212226031Sstas break; 1213226031Sstas } 1214226031Sstas return (error); 1215226031Sstas} 1216226031Sstas 1217226031Sstas/* 1218226031Sstas * Routine called from ip_output() to loop back a copy of an IP multicast 1219226031Sstas * packet to the input queue of a specified interface. Note that this 1220226031Sstas * calls the output routine of the loopback "driver", but with an interface 1221226031Sstas * pointer that might NOT be a loopback interface -- evil, but easier than 1222226031Sstas * replicating that code here. 1223226031Sstas */ 1224226031Sstasstatic void 1225226031Sstasip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, 1226226031Sstas int hlen) 1227226031Sstas{ 1228226031Sstas register struct ip *ip; 1229226031Sstas struct mbuf *copym; 1230226031Sstas 1231226031Sstas /* 1232226031Sstas * Make a deep copy of the packet because we're going to 1233226031Sstas * modify the pack in order to generate checksums. 1234226031Sstas */ 1235226031Sstas copym = m_dup(m, M_DONTWAIT); 1236226031Sstas if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1237226031Sstas copym = m_pullup(copym, hlen); 1238226031Sstas if (copym != NULL) { 1239226031Sstas /* If needed, compute the checksum and mark it as valid. */ 1240226031Sstas if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1241226031Sstas in_delayed_cksum(copym); 1242226031Sstas copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1243226031Sstas copym->m_pkthdr.csum_flags |= 1244226031Sstas CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1245226031Sstas copym->m_pkthdr.csum_data = 0xffff; 1246226031Sstas } 1247226031Sstas /* 1248226031Sstas * We don't bother to fragment if the IP length is greater 1249226031Sstas * than the interface's MTU. Can this possibly matter? 1250226031Sstas */ 1251226031Sstas ip = mtod(copym, struct ip *); 1252226031Sstas ip->ip_len = htons(ip->ip_len); 1253226031Sstas ip->ip_off = htons(ip->ip_off); 1254226031Sstas ip->ip_sum = 0; 1255226031Sstas ip->ip_sum = in_cksum(copym, hlen); 1256226031Sstas#if 1 /* XXX */ 1257226031Sstas if (dst->sin_family != AF_INET) { 1258226031Sstas printf("ip_mloopback: bad address family %d\n", 1259226031Sstas dst->sin_family); 1260226031Sstas dst->sin_family = AF_INET; 1261226031Sstas } 1262226031Sstas#endif 1263226031Sstas if_simloop(ifp, copym, dst->sin_family, 0); 1264226031Sstas } 1265226031Sstas} 1266226031Sstas