ip_output.c revision 191148
1233176Sae/*- 2233176Sae * Copyright (c) 1982, 1986, 1988, 1990, 1993 3233176Sae * The Regents of the University of California. All rights reserved. 4233176Sae * 5233176Sae * Redistribution and use in source and binary forms, with or without 6233176Sae * modification, are permitted provided that the following conditions 7233176Sae * are met: 8233176Sae * 1. Redistributions of source code must retain the above copyright 9233176Sae * notice, this list of conditions and the following disclaimer. 10233176Sae * 2. Redistributions in binary form must reproduce the above copyright 11233176Sae * notice, this list of conditions and the following disclaimer in the 12233176Sae * documentation and/or other materials provided with the distribution. 13233176Sae * 4. Neither the name of the University nor the names of its contributors 14233176Sae * may be used to endorse or promote products derived from this software 15233176Sae * without specific prior written permission. 16233176Sae * 17233176Sae * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18233176Sae * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19233176Sae * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20233176Sae * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21233176Sae * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22233176Sae * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23233176Sae * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24233176Sae * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25233176Sae * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26233176Sae * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27233176Sae * SUCH DAMAGE. 28233176Sae * 29233176Sae * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30233176Sae */ 31233176Sae 32233176Sae#include <sys/cdefs.h> 33233176Sae__FBSDID("$FreeBSD: head/sys/netinet/ip_output.c 191148 2009-04-16 20:30:28Z kmacy $"); 34233176Sae 35233176Sae#include "opt_ipfw.h" 36233176Sae#include "opt_inet.h" 37233176Sae#include "opt_ipsec.h" 38233176Sae#include "opt_route.h" 39233176Sae#include "opt_mac.h" 40233176Sae#include "opt_mbuf_stress_test.h" 41233176Sae#include "opt_mpath.h" 42233176Sae#include "opt_sctp.h" 43233176Sae 44233176Sae#include <sys/param.h> 45233176Sae#include <sys/systm.h> 46233176Sae#include <sys/kernel.h> 47233176Sae#include <sys/malloc.h> 48233176Sae#include <sys/mbuf.h> 49233176Sae#include <sys/priv.h> 50233176Sae#include <sys/proc.h> 51233176Sae#include <sys/protosw.h> 52233176Sae#include <sys/socket.h> 53233176Sae#include <sys/socketvar.h> 54233176Sae#include <sys/sysctl.h> 55233176Sae#include <sys/ucred.h> 56233176Sae#include <sys/vimage.h> 57233176Sae 58233176Sae#include <net/if.h> 59233181Sae#include <net/netisr.h> 60233181Sae#include <net/pfil.h> 61233176Sae#include <net/route.h> 62233176Sae#ifdef RADIX_MPATH 63233176Sae#include <net/radix_mpath.h> 64233176Sae#endif 65233176Sae#include <net/vnet.h> 66233176Sae 67233176Sae#include <netinet/in.h> 68233181Sae#include <netinet/in_systm.h> 69233181Sae#include <netinet/ip.h> 70233176Sae#include <netinet/in_pcb.h> 71233176Sae#include <netinet/in_var.h> 72233176Sae#include <netinet/ip_var.h> 73233176Sae#include <netinet/ip_options.h> 74233176Sae#include <netinet/vinet.h> 75233176Sae#ifdef SCTP 76233176Sae#include <netinet/sctp.h> 77233176Sae#include <netinet/sctp_crc32.h> 78233176Sae#endif 79233176Sae 80233176Sae#ifdef IPSEC 81233176Sae#include <netinet/ip_ipsec.h> 82233176Sae#include <netipsec/ipsec.h> 83233176Sae#endif /* IPSEC*/ 84233176Sae 85233176Sae#include <machine/in_cksum.h> 86233176Sae 87233176Sae#include <security/mac/mac_framework.h> 88233176Sae 89233176Sae#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 90233176Sae x, (ntohl(a.s_addr)>>24)&0xFF,\ 91233176Sae (ntohl(a.s_addr)>>16)&0xFF,\ 92233176Sae (ntohl(a.s_addr)>>8)&0xFF,\ 93233176Sae (ntohl(a.s_addr))&0xFF, y); 94233176Sae 95233176Sae#ifdef VIMAGE_GLOBALS 96233176Saeu_short ip_id; 97233176Sae#endif 98233176Sae 99233176Sae#ifdef MBUF_STRESS_TEST 100233176Saeint mbuf_frag_size = 0; 101233176SaeSYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 102233176Sae &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 103233176Sae#endif 104233176Sae 105233176Sae#if defined(IP_NONLOCALBIND) 106233176Saestatic int ip_nonlocalok = 0; 107233176SaeSYSCTL_INT(_net_inet_ip, OID_AUTO, nonlocalok, 108233176Sae CTLFLAG_RW|CTLFLAG_SECURE, &ip_nonlocalok, 0, ""); 109233176Sae#endif 110233176Sae 111233176Saestatic void ip_mloopback 112233176Sae (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 113233176Sae 114233176Sae 115233176Saeextern int in_mcast_loop; 116233176Saeextern struct protosw inetsw[]; 117233176Sae 118233176Sae/* 119233176Sae * IP output. The packet in mbuf chain m contains a skeletal IP 120233176Sae * header (with len, off, ttl, proto, tos, src, dst). 121233176Sae * The mbuf chain containing the packet will be freed. 122233176Sae * The mbuf opt, if present, will not be freed. 123233176Sae * In the IP forwarding case, the packet will arrive with options already 124233176Sae * inserted, so must have a NULL opt pointer. 125233176Sae */ 126233176Saeint 127233176Saeip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 128233176Sae struct ip_moptions *imo, struct inpcb *inp) 129233176Sae{ 130233176Sae INIT_VNET_NET(curvnet); 131233176Sae INIT_VNET_INET(curvnet); 132233176Sae struct ip *ip; 133233176Sae struct ifnet *ifp = NULL; /* keep compiler happy */ 134233176Sae struct mbuf *m0; 135233176Sae int hlen = sizeof (struct ip); 136233176Sae int mtu; 137233176Sae int len, error = 0; 138233176Sae struct sockaddr_in *dst = NULL; /* keep compiler happy */ 139233176Sae struct in_ifaddr *ia = NULL; 140233176Sae int isbroadcast, sw_csum; 141233176Sae struct route iproute; 142233176Sae struct in_addr odst; 143233176Sae#ifdef IPFIREWALL_FORWARD 144233176Sae struct m_tag *fwd_tag = NULL; 145233176Sae#endif 146233176Sae M_ASSERTPKTHDR(m); 147233176Sae 148233176Sae if (ro == NULL) { 149233176Sae ro = &iproute; 150233176Sae bzero(ro, sizeof (*ro)); 151233176Sae } 152233176Sae 153233176Sae if (inp != NULL) { 154233176Sae M_SETFIB(m, inp->inp_inc.inc_fibnum); 155233176Sae INP_LOCK_ASSERT(inp); 156233176Sae if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) { 157233176Sae m->m_pkthdr.flowid = inp->inp_flowid; 158233176Sae m->m_flags |= M_FLOWID; 159233176Sae } 160233176Sae } 161233176Sae 162233176Sae if (opt) { 163233176Sae len = 0; 164233176Sae m = ip_insertoptions(m, opt, &len); 165233176Sae if (len != 0) 166233176Sae hlen = len; 167233176Sae } 168233176Sae ip = mtod(m, struct ip *); 169233176Sae 170233176Sae /* 171233176Sae * Fill in IP header. If we are not allowing fragmentation, 172233176Sae * then the ip_id field is meaningless, but we don't set it 173233176Sae * to zero. Doing so causes various problems when devices along 174233176Sae * the path (routers, load balancers, firewalls, etc.) illegally 175233176Sae * disable DF on our packet. Note that a 16-bit counter 176233176Sae * will wrap around in less than 10 seconds at 100 Mbit/s on a 177233176Sae * medium with MTU 1500. See Steven M. Bellovin, "A Technique 178233176Sae * for Counting NATted Hosts", Proc. IMW'02, available at 179233176Sae * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>. 180233176Sae */ 181233176Sae if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 182233176Sae ip->ip_v = IPVERSION; 183233176Sae ip->ip_hl = hlen >> 2; 184233176Sae ip->ip_id = ip_newid(); 185233176Sae IPSTAT_INC(ips_localout); 186233176Sae } else { 187233176Sae hlen = ip->ip_hl << 2; 188233176Sae } 189233176Sae 190233176Sae dst = (struct sockaddr_in *)&ro->ro_dst; 191233176Saeagain: 192233176Sae /* 193233176Sae * If there is a cached route, 194233176Sae * check that it is to the same destination 195233176Sae * and is still up. If not, free it and try again. 196233176Sae * The address family should also be checked in case of sharing the 197233176Sae * cache with IPv6. 198233176Sae */ 199233176Sae if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 200233176Sae dst->sin_family != AF_INET || 201233176Sae dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 202233176Sae RTFREE(ro->ro_rt); 203233176Sae ro->ro_rt = (struct rtentry *)NULL; 204233176Sae } 205233176Sae#ifdef IPFIREWALL_FORWARD 206233176Sae if (ro->ro_rt == NULL && fwd_tag == NULL) { 207233176Sae#else 208233176Sae if (ro->ro_rt == NULL) { 209233176Sae#endif 210233176Sae bzero(dst, sizeof(*dst)); 211233176Sae dst->sin_family = AF_INET; 212233176Sae dst->sin_len = sizeof(*dst); 213233176Sae dst->sin_addr = ip->ip_dst; 214233176Sae } 215233176Sae /* 216233176Sae * If routing to interface only, short circuit routing lookup. 217233176Sae * The use of an all-ones broadcast address implies this; an 218233176Sae * interface is specified by the broadcast address of an interface, 219233176Sae * or the destination address of a ptp interface. 220233176Sae */ 221233176Sae if (flags & IP_SENDONES) { 222233176Sae if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL && 223233176Sae (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) { 224233176Sae IPSTAT_INC(ips_noroute); 225233176Sae error = ENETUNREACH; 226233176Sae goto bad; 227233176Sae } 228233176Sae ip->ip_dst.s_addr = INADDR_BROADCAST; 229233176Sae dst->sin_addr = ip->ip_dst; 230233176Sae ifp = ia->ia_ifp; 231233176Sae ip->ip_ttl = 1; 232233176Sae isbroadcast = 1; 233233176Sae } else if (flags & IP_ROUTETOIF) { 234233176Sae if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 235233176Sae (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 236233176Sae IPSTAT_INC(ips_noroute); 237233176Sae error = ENETUNREACH; 238233176Sae goto bad; 239233176Sae } 240233176Sae ifp = ia->ia_ifp; 241233176Sae ip->ip_ttl = 1; 242233176Sae isbroadcast = in_broadcast(dst->sin_addr, ifp); 243233176Sae } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 244233176Sae imo != NULL && imo->imo_multicast_ifp != NULL) { 245233176Sae /* 246233176Sae * Bypass the normal routing lookup for multicast 247233176Sae * packets if the interface is specified. 248233176Sae */ 249233176Sae ifp = imo->imo_multicast_ifp; 250233176Sae IFP_TO_IA(ifp, ia); 251233176Sae isbroadcast = 0; /* fool gcc */ 252233176Sae } else { 253233176Sae /* 254233176Sae * We want to do any cloning requested by the link layer, 255233176Sae * as this is probably required in all cases for correct 256233176Sae * operation (as it is for ARP). 257233176Sae */ 258233176Sae if (ro->ro_rt == NULL) 259233176Sae#ifdef RADIX_MPATH 260233176Sae rtalloc_mpath_fib(ro, 261233176Sae ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 262233176Sae inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); 263233176Sae#else 264233176Sae in_rtalloc_ign(ro, 0, 265233176Sae inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); 266233176Sae#endif 267233176Sae if (ro->ro_rt == NULL) { 268233176Sae IPSTAT_INC(ips_noroute); 269233176Sae error = EHOSTUNREACH; 270233176Sae goto bad; 271233176Sae } 272233176Sae ia = ifatoia(ro->ro_rt->rt_ifa); 273233176Sae ifp = ro->ro_rt->rt_ifp; 274233176Sae ro->ro_rt->rt_rmx.rmx_pksent++; 275233176Sae if (ro->ro_rt->rt_flags & RTF_GATEWAY) 276233176Sae dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 277233176Sae if (ro->ro_rt->rt_flags & RTF_HOST) 278233176Sae isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 279233176Sae else 280233176Sae isbroadcast = in_broadcast(dst->sin_addr, ifp); 281233176Sae } 282233176Sae /* 283233176Sae * Calculate MTU. If we have a route that is up, use that, 284233176Sae * otherwise use the interface's MTU. 285233176Sae */ 286233176Sae if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) { 287233176Sae /* 288233176Sae * This case can happen if the user changed the MTU 289233176Sae * of an interface after enabling IP on it. Because 290233176Sae * most netifs don't keep track of routes pointing to 291233176Sae * them, there is no way for one to update all its 292233176Sae * routes when the MTU is changed. 293233176Sae */ 294233176Sae if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu) 295233176Sae ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 296233176Sae mtu = ro->ro_rt->rt_rmx.rmx_mtu; 297233176Sae } else { 298233176Sae mtu = ifp->if_mtu; 299233176Sae } 300233176Sae if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 301233176Sae m->m_flags |= M_MCAST; 302233176Sae /* 303233176Sae * IP destination address is multicast. Make sure "dst" 304233176Sae * still points to the address in "ro". (It may have been 305233176Sae * changed to point to a gateway address, above.) 306233176Sae */ 307233176Sae dst = (struct sockaddr_in *)&ro->ro_dst; 308233176Sae /* 309233176Sae * See if the caller provided any multicast options 310233176Sae */ 311233176Sae if (imo != NULL) { 312233176Sae ip->ip_ttl = imo->imo_multicast_ttl; 313233176Sae if (imo->imo_multicast_vif != -1) 314233176Sae ip->ip_src.s_addr = 315233176Sae ip_mcast_src ? 316233176Sae ip_mcast_src(imo->imo_multicast_vif) : 317233176Sae INADDR_ANY; 318233176Sae } else 319233176Sae ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 320233176Sae /* 321233176Sae * Confirm that the outgoing interface supports multicast. 322233176Sae */ 323233176Sae if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 324233176Sae if ((ifp->if_flags & IFF_MULTICAST) == 0) { 325233176Sae IPSTAT_INC(ips_noroute); 326233176Sae error = ENETUNREACH; 327233176Sae goto bad; 328233176Sae } 329233176Sae } 330233176Sae /* 331233176Sae * If source address not specified yet, use address 332233176Sae * of outgoing interface. 333233176Sae */ 334233176Sae if (ip->ip_src.s_addr == INADDR_ANY) { 335233176Sae /* Interface may have no addresses. */ 336233176Sae if (ia != NULL) 337233176Sae ip->ip_src = IA_SIN(ia)->sin_addr; 338233176Sae } 339233176Sae 340233176Sae if ((imo == NULL && in_mcast_loop) || 341233176Sae (imo && imo->imo_multicast_loop)) { 342233176Sae /* 343233176Sae * Loop back multicast datagram if not expressly 344233176Sae * forbidden to do so, even if we are not a member 345233176Sae * of the group; ip_input() will filter it later, 346233176Sae * thus deferring a hash lookup and mutex acquisition 347233176Sae * at the expense of a cheap copy using m_copym(). 348233176Sae */ 349233176Sae ip_mloopback(ifp, m, dst, hlen); 350233176Sae } else { 351233176Sae /* 352233176Sae * If we are acting as a multicast router, perform 353233176Sae * multicast forwarding as if the packet had just 354233176Sae * arrived on the interface to which we are about 355233176Sae * to send. The multicast forwarding function 356233176Sae * recursively calls this function, using the 357233176Sae * IP_FORWARDING flag to prevent infinite recursion. 358233176Sae * 359233176Sae * Multicasts that are looped back by ip_mloopback(), 360233176Sae * above, will be forwarded by the ip_input() routine, 361233176Sae * if necessary. 362233176Sae */ 363233176Sae if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 364233176Sae /* 365233176Sae * If rsvp daemon is not running, do not 366233176Sae * set ip_moptions. This ensures that the packet 367233176Sae * is multicast and not just sent down one link 368233176Sae * as prescribed by rsvpd. 369233176Sae */ 370233176Sae if (!V_rsvp_on) 371233176Sae imo = NULL; 372233176Sae if (ip_mforward && 373233176Sae ip_mforward(ip, ifp, m, imo) != 0) { 374233176Sae m_freem(m); 375233176Sae goto done; 376233176Sae } 377233176Sae } 378233176Sae } 379233176Sae 380233176Sae /* 381233176Sae * Multicasts with a time-to-live of zero may be looped- 382233176Sae * back, above, but must not be transmitted on a network. 383233176Sae * Also, multicasts addressed to the loopback interface 384233176Sae * are not sent -- the above call to ip_mloopback() will 385233176Sae * loop back a copy. ip_input() will drop the copy if 386233176Sae * this host does not belong to the destination group on 387233176Sae * the loopback interface. 388233176Sae */ 389233176Sae if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 390233176Sae m_freem(m); 391233176Sae goto done; 392233176Sae } 393233176Sae 394233176Sae goto sendit; 395233176Sae } 396233176Sae 397233176Sae /* 398233176Sae * If the source address is not specified yet, use the address 399233176Sae * of the outoing interface. 400233176Sae */ 401233176Sae if (ip->ip_src.s_addr == INADDR_ANY) { 402233176Sae /* Interface may have no addresses. */ 403233176Sae if (ia != NULL) { 404233176Sae ip->ip_src = IA_SIN(ia)->sin_addr; 405233176Sae } 406233176Sae } 407233176Sae 408233176Sae /* 409233176Sae * Verify that we have any chance at all of being able to queue the 410233176Sae * packet or packet fragments, unless ALTQ is enabled on the given 411233176Sae * interface in which case packetdrop should be done by queueing. 412233176Sae */ 413233176Sae#ifdef ALTQ 414233176Sae if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) && 415233176Sae ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 416233176Sae ifp->if_snd.ifq_maxlen)) 417233176Sae#else 418233176Sae if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 419233176Sae ifp->if_snd.ifq_maxlen) 420233176Sae#endif /* ALTQ */ 421233176Sae { 422233176Sae error = ENOBUFS; 423233176Sae IPSTAT_INC(ips_odropped); 424233176Sae ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1); 425233176Sae goto bad; 426233176Sae } 427233176Sae 428233176Sae /* 429233176Sae * Look for broadcast address and 430233176Sae * verify user is allowed to send 431233176Sae * such a packet. 432233176Sae */ 433233176Sae if (isbroadcast) { 434233176Sae if ((ifp->if_flags & IFF_BROADCAST) == 0) { 435233176Sae error = EADDRNOTAVAIL; 436233176Sae goto bad; 437233176Sae } 438233176Sae if ((flags & IP_ALLOWBROADCAST) == 0) { 439233176Sae error = EACCES; 440233176Sae goto bad; 441233176Sae } 442233176Sae /* don't allow broadcast messages to be fragmented */ 443233176Sae if (ip->ip_len > mtu) { 444233176Sae error = EMSGSIZE; 445233176Sae goto bad; 446233176Sae } 447233176Sae m->m_flags |= M_BCAST; 448233176Sae } else { 449233176Sae m->m_flags &= ~M_BCAST; 450233176Sae } 451233176Sae 452233176Saesendit: 453233176Sae#ifdef IPSEC 454233176Sae switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) { 455233176Sae case 1: 456233176Sae goto bad; 457233176Sae case -1: 458233176Sae goto done; 459233176Sae case 0: 460233176Sae default: 461233176Sae break; /* Continue with packet processing. */ 462233176Sae } 463233176Sae /* Update variables that are affected by ipsec4_output(). */ 464233176Sae ip = mtod(m, struct ip *); 465233176Sae hlen = ip->ip_hl << 2; 466233176Sae#endif /* IPSEC */ 467233176Sae 468233176Sae /* Jump over all PFIL processing if hooks are not active. */ 469233176Sae if (!PFIL_HOOKED(&inet_pfil_hook)) 470233176Sae goto passout; 471233176Sae 472233176Sae /* Run through list of hooks for output packets. */ 473233176Sae odst.s_addr = ip->ip_dst.s_addr; 474233176Sae error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp); 475233176Sae if (error != 0 || m == NULL) 476233176Sae goto done; 477233176Sae 478233176Sae ip = mtod(m, struct ip *); 479233176Sae 480233176Sae /* See if destination IP address was changed by packet filter. */ 481233176Sae if (odst.s_addr != ip->ip_dst.s_addr) { 482233176Sae m->m_flags |= M_SKIP_FIREWALL; 483233176Sae /* If destination is now ourself drop to ip_input(). */ 484233176Sae if (in_localip(ip->ip_dst)) { 485233176Sae m->m_flags |= M_FASTFWD_OURS; 486233176Sae if (m->m_pkthdr.rcvif == NULL) 487233176Sae m->m_pkthdr.rcvif = V_loif; 488233176Sae if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 489233176Sae m->m_pkthdr.csum_flags |= 490233176Sae CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 491233176Sae m->m_pkthdr.csum_data = 0xffff; 492233652Sae } 493233176Sae m->m_pkthdr.csum_flags |= 494233176Sae CSUM_IP_CHECKED | CSUM_IP_VALID; 495233176Sae#ifdef SCTP 496233176Sae if (m->m_pkthdr.csum_flags & CSUM_SCTP) 497233176Sae m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 498233176Sae#endif 499233176Sae error = netisr_queue(NETISR_IP, m); 500233176Sae goto done; 501233176Sae } else 502233176Sae goto again; /* Redo the routing table lookup. */ 503233176Sae } 504233176Sae 505233176Sae#ifdef IPFIREWALL_FORWARD 506233176Sae /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 507233176Sae if (m->m_flags & M_FASTFWD_OURS) { 508233176Sae if (m->m_pkthdr.rcvif == NULL) 509233176Sae m->m_pkthdr.rcvif = V_loif; 510233176Sae if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 511233176Sae m->m_pkthdr.csum_flags |= 512233176Sae CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 513233176Sae m->m_pkthdr.csum_data = 0xffff; 514233176Sae } 515233176Sae#ifdef SCTP 516233176Sae if (m->m_pkthdr.csum_flags & CSUM_SCTP) 517233176Sae m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 518233176Sae#endif 519233176Sae m->m_pkthdr.csum_flags |= 520233176Sae CSUM_IP_CHECKED | CSUM_IP_VALID; 521233176Sae 522233176Sae error = netisr_queue(NETISR_IP, m); 523233176Sae goto done; 524233176Sae } 525233176Sae /* Or forward to some other address? */ 526233176Sae fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 527233176Sae if (fwd_tag) { 528233176Sae dst = (struct sockaddr_in *)&ro->ro_dst; 529233176Sae bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 530233176Sae m->m_flags |= M_SKIP_FIREWALL; 531233176Sae m_tag_delete(m, fwd_tag); 532233176Sae goto again; 533233176Sae } 534233176Sae#endif /* IPFIREWALL_FORWARD */ 535233176Sae 536233176Saepassout: 537233176Sae /* 127/8 must not appear on wire - RFC1122. */ 538233176Sae if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 539233176Sae (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 540233176Sae if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 541233176Sae IPSTAT_INC(ips_badaddr); 542233176Sae error = EADDRNOTAVAIL; 543233176Sae goto bad; 544233176Sae } 545233176Sae } 546233176Sae 547233176Sae m->m_pkthdr.csum_flags |= CSUM_IP; 548233176Sae sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 549233176Sae if (sw_csum & CSUM_DELAY_DATA) { 550233176Sae in_delayed_cksum(m); 551233176Sae sw_csum &= ~CSUM_DELAY_DATA; 552233176Sae } 553233176Sae#ifdef SCTP 554233176Sae if (sw_csum & CSUM_SCTP) { 555233176Sae sctp_delayed_cksum(m); 556233176Sae sw_csum &= ~CSUM_SCTP; 557233176Sae } 558233176Sae#endif 559233176Sae m->m_pkthdr.csum_flags &= ifp->if_hwassist; 560233176Sae 561233176Sae /* 562233176Sae * If small enough for interface, or the interface will take 563233176Sae * care of the fragmentation for us, we can just send directly. 564233176Sae */ 565233176Sae if (ip->ip_len <= mtu || 566233176Sae (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || 567233176Sae ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { 568233176Sae ip->ip_len = htons(ip->ip_len); 569233176Sae ip->ip_off = htons(ip->ip_off); 570233176Sae ip->ip_sum = 0; 571233176Sae if (sw_csum & CSUM_DELAY_IP) 572233176Sae ip->ip_sum = in_cksum(m, hlen); 573233176Sae 574233176Sae /* 575233176Sae * Record statistics for this interface address. 576233176Sae * With CSUM_TSO the byte/packet count will be slightly 577233176Sae * incorrect because we count the IP+TCP headers only 578233176Sae * once instead of for every generated packet. 579233176Sae */ 580233176Sae if (!(flags & IP_FORWARDING) && ia) { 581233176Sae if (m->m_pkthdr.csum_flags & CSUM_TSO) 582233176Sae ia->ia_ifa.if_opackets += 583233176Sae m->m_pkthdr.len / m->m_pkthdr.tso_segsz; 584233176Sae else 585233176Sae ia->ia_ifa.if_opackets++; 586233176Sae ia->ia_ifa.if_obytes += m->m_pkthdr.len; 587233176Sae } 588233176Sae#ifdef MBUF_STRESS_TEST 589233176Sae if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 590233176Sae m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 591233176Sae#endif 592233176Sae /* 593233176Sae * Reset layer specific mbuf flags 594233176Sae * to avoid confusing lower layers. 595233176Sae */ 596233176Sae m->m_flags &= ~(M_PROTOFLAGS); 597233176Sae error = (*ifp->if_output)(ifp, m, 598233176Sae (struct sockaddr *)dst, ro); 599233176Sae goto done; 600233176Sae } 601233176Sae 602233176Sae /* Balk when DF bit is set or the interface didn't support TSO. */ 603233176Sae if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 604233176Sae error = EMSGSIZE; 605233176Sae IPSTAT_INC(ips_cantfrag); 606233176Sae goto bad; 607233176Sae } 608233176Sae 609233176Sae /* 610233176Sae * Too large for interface; fragment if possible. If successful, 611233176Sae * on return, m will point to a list of packets to be sent. 612233176Sae */ 613233176Sae error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum); 614233176Sae if (error) 615233176Sae goto bad; 616233176Sae for (; m; m = m0) { 617233176Sae m0 = m->m_nextpkt; 618233176Sae m->m_nextpkt = 0; 619233176Sae if (error == 0) { 620233176Sae /* Record statistics for this interface address. */ 621233176Sae if (ia != NULL) { 622233176Sae ia->ia_ifa.if_opackets++; 623233176Sae ia->ia_ifa.if_obytes += m->m_pkthdr.len; 624233176Sae } 625233176Sae /* 626233176Sae * Reset layer specific mbuf flags 627233176Sae * to avoid confusing upper layers. 628233176Sae */ 629233176Sae m->m_flags &= ~(M_PROTOFLAGS); 630233176Sae 631233176Sae error = (*ifp->if_output)(ifp, m, 632233176Sae (struct sockaddr *)dst, ro); 633233176Sae } else 634233176Sae m_freem(m); 635233176Sae } 636233176Sae 637233176Sae if (error == 0) 638233176Sae IPSTAT_INC(ips_fragmented); 639233176Sae 640233176Saedone: 641233176Sae if (ro == &iproute && ro->ro_rt) { 642233176Sae RTFREE(ro->ro_rt); 643233176Sae } 644233176Sae return (error); 645233176Saebad: 646233176Sae m_freem(m); 647233176Sae goto done; 648233176Sae} 649233176Sae 650233176Sae/* 651233176Sae * Create a chain of fragments which fit the given mtu. m_frag points to the 652233176Sae * mbuf to be fragmented; on return it points to the chain with the fragments. 653233176Sae * Return 0 if no error. If error, m_frag may contain a partially built 654233176Sae * chain of fragments that should be freed by the caller. 655233176Sae * 656233176Sae * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 657233176Sae * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 658233176Sae */ 659233176Saeint 660233176Saeip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 661233176Sae u_long if_hwassist_flags, int sw_csum) 662233176Sae{ 663233176Sae INIT_VNET_INET(curvnet); 664233176Sae int error = 0; 665233176Sae int hlen = ip->ip_hl << 2; 666233176Sae int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 667233176Sae int off; 668233176Sae struct mbuf *m0 = *m_frag; /* the original packet */ 669233176Sae int firstlen; 670233176Sae struct mbuf **mnext; 671233176Sae int nfrags; 672233176Sae 673233176Sae if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 674233176Sae IPSTAT_INC(ips_cantfrag); 675233176Sae return EMSGSIZE; 676233176Sae } 677233176Sae 678233176Sae /* 679233176Sae * Must be able to put at least 8 bytes per fragment. 680233176Sae */ 681233176Sae if (len < 8) 682233176Sae return EMSGSIZE; 683233176Sae 684233176Sae /* 685233176Sae * If the interface will not calculate checksums on 686233176Sae * fragmented packets, then do it here. 687233176Sae */ 688233176Sae if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 689233176Sae (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 690233176Sae in_delayed_cksum(m0); 691233176Sae m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 692233176Sae } 693233176Sae#ifdef SCTP 694233176Sae if (m0->m_pkthdr.csum_flags & CSUM_SCTP && 695233176Sae (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 696233176Sae sctp_delayed_cksum(m0); 697233176Sae m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 698233176Sae } 699233176Sae#endif 700233176Sae if (len > PAGE_SIZE) { 701233176Sae /* 702233176Sae * Fragment large datagrams such that each segment 703233176Sae * contains a multiple of PAGE_SIZE amount of data, 704233176Sae * plus headers. This enables a receiver to perform 705233176Sae * page-flipping zero-copy optimizations. 706233176Sae * 707233176Sae * XXX When does this help given that sender and receiver 708233176Sae * could have different page sizes, and also mtu could 709233176Sae * be less than the receiver's page size ? 710233176Sae */ 711233176Sae int newlen; 712233176Sae struct mbuf *m; 713233176Sae 714233176Sae for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 715233176Sae off += m->m_len; 716233176Sae 717233176Sae /* 718233176Sae * firstlen (off - hlen) must be aligned on an 719233176Sae * 8-byte boundary 720233176Sae */ 721233176Sae if (off < hlen) 722233176Sae goto smart_frag_failure; 723233176Sae off = ((off - hlen) & ~7) + hlen; 724233176Sae newlen = (~PAGE_MASK) & mtu; 725233176Sae if ((newlen + sizeof (struct ip)) > mtu) { 726233176Sae /* we failed, go back the default */ 727233176Saesmart_frag_failure: 728233176Sae newlen = len; 729233176Sae off = hlen + len; 730233176Sae } 731233176Sae len = newlen; 732233176Sae 733233176Sae } else { 734233176Sae off = hlen + len; 735233176Sae } 736233176Sae 737233176Sae firstlen = off - hlen; 738233176Sae mnext = &m0->m_nextpkt; /* pointer to next packet */ 739233176Sae 740233176Sae /* 741233176Sae * Loop through length of segment after first fragment, 742233176Sae * make new header and copy data of each part and link onto chain. 743233176Sae * Here, m0 is the original packet, m is the fragment being created. 744233176Sae * The fragments are linked off the m_nextpkt of the original 745233176Sae * packet, which after processing serves as the first fragment. 746233176Sae */ 747233176Sae for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 748233176Sae struct ip *mhip; /* ip header on the fragment */ 749233176Sae struct mbuf *m; 750233176Sae int mhlen = sizeof (struct ip); 751233176Sae 752233176Sae MGETHDR(m, M_DONTWAIT, MT_DATA); 753233176Sae if (m == NULL) { 754233176Sae error = ENOBUFS; 755233176Sae IPSTAT_INC(ips_odropped); 756233176Sae goto done; 757233176Sae } 758233176Sae m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 759233176Sae /* 760233176Sae * In the first mbuf, leave room for the link header, then 761233176Sae * copy the original IP header including options. The payload 762233176Sae * goes into an additional mbuf chain returned by m_copym(). 763233176Sae */ 764233176Sae m->m_data += max_linkhdr; 765233176Sae mhip = mtod(m, struct ip *); 766233176Sae *mhip = *ip; 767233176Sae if (hlen > sizeof (struct ip)) { 768233176Sae mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 769233176Sae mhip->ip_v = IPVERSION; 770233176Sae mhip->ip_hl = mhlen >> 2; 771233176Sae } 772233176Sae m->m_len = mhlen; 773233176Sae /* XXX do we need to add ip->ip_off below ? */ 774233176Sae mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 775233176Sae if (off + len >= ip->ip_len) { /* last fragment */ 776233176Sae len = ip->ip_len - off; 777233176Sae m->m_flags |= M_LASTFRAG; 778233176Sae } else 779233176Sae mhip->ip_off |= IP_MF; 780233176Sae mhip->ip_len = htons((u_short)(len + mhlen)); 781233176Sae m->m_next = m_copym(m0, off, len, M_DONTWAIT); 782233176Sae if (m->m_next == NULL) { /* copy failed */ 783233176Sae m_free(m); 784233176Sae error = ENOBUFS; /* ??? */ 785233176Sae IPSTAT_INC(ips_odropped); 786233176Sae goto done; 787233176Sae } 788233176Sae m->m_pkthdr.len = mhlen + len; 789233176Sae m->m_pkthdr.rcvif = NULL; 790233176Sae#ifdef MAC 791233176Sae mac_netinet_fragment(m0, m); 792233176Sae#endif 793233176Sae m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 794233176Sae mhip->ip_off = htons(mhip->ip_off); 795233176Sae mhip->ip_sum = 0; 796233176Sae if (sw_csum & CSUM_DELAY_IP) 797233176Sae mhip->ip_sum = in_cksum(m, mhlen); 798233176Sae *mnext = m; 799233176Sae mnext = &m->m_nextpkt; 800233176Sae } 801233176Sae IPSTAT_ADD(ips_ofragments, nfrags); 802233176Sae 803233176Sae /* set first marker for fragment chain */ 804233176Sae m0->m_flags |= M_FIRSTFRAG | M_FRAG; 805233176Sae m0->m_pkthdr.csum_data = nfrags; 806233176Sae 807233176Sae /* 808233176Sae * Update first fragment by trimming what's been copied out 809233176Sae * and updating header. 810233176Sae */ 811233176Sae m_adj(m0, hlen + firstlen - ip->ip_len); 812233176Sae m0->m_pkthdr.len = hlen + firstlen; 813233176Sae ip->ip_len = htons((u_short)m0->m_pkthdr.len); 814233176Sae ip->ip_off |= IP_MF; 815233176Sae ip->ip_off = htons(ip->ip_off); 816233176Sae ip->ip_sum = 0; 817233176Sae if (sw_csum & CSUM_DELAY_IP) 818233176Sae ip->ip_sum = in_cksum(m0, hlen); 819233176Sae 820233176Saedone: 821233176Sae *m_frag = m0; 822233176Sae return error; 823233176Sae} 824233176Sae 825233176Saevoid 826233176Saein_delayed_cksum(struct mbuf *m) 827233176Sae{ 828233176Sae struct ip *ip; 829233176Sae u_short csum, offset; 830233176Sae 831233176Sae ip = mtod(m, struct ip *); 832233176Sae offset = ip->ip_hl << 2 ; 833233176Sae csum = in_cksum_skip(m, ip->ip_len, offset); 834233176Sae if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 835233176Sae csum = 0xffff; 836233176Sae offset += m->m_pkthdr.csum_data; /* checksum offset */ 837233176Sae 838233176Sae if (offset + sizeof(u_short) > m->m_len) { 839233176Sae printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 840233176Sae m->m_len, offset, ip->ip_p); 841233176Sae /* 842233176Sae * XXX 843233176Sae * this shouldn't happen, but if it does, the 844233176Sae * correct behavior may be to insert the checksum 845233176Sae * in the appropriate next mbuf in the chain. 846233176Sae */ 847233176Sae return; 848233176Sae } 849233176Sae *(u_short *)(m->m_data + offset) = csum; 850233176Sae} 851233176Sae 852233176Sae/* 853233176Sae * IP socket option processing. 854233176Sae */ 855233176Saeint 856233176Saeip_ctloutput(struct socket *so, struct sockopt *sopt) 857233176Sae{ 858233176Sae struct inpcb *inp = sotoinpcb(so); 859233176Sae int error, optval; 860233176Sae 861233176Sae error = optval = 0; 862233176Sae if (sopt->sopt_level != IPPROTO_IP) { 863233176Sae if ((sopt->sopt_level == SOL_SOCKET) && 864233176Sae (sopt->sopt_name == SO_SETFIB)) { 865233176Sae inp->inp_inc.inc_fibnum = so->so_fibnum; 866233176Sae return (0); 867233176Sae } 868233176Sae return (EINVAL); 869233176Sae } 870233176Sae 871233176Sae switch (sopt->sopt_dir) { 872233176Sae case SOPT_SET: 873233176Sae switch (sopt->sopt_name) { 874233176Sae case IP_OPTIONS: 875233176Sae#ifdef notyet 876233176Sae case IP_RETOPTS: 877233176Sae#endif 878233176Sae { 879233176Sae struct mbuf *m; 880233176Sae if (sopt->sopt_valsize > MLEN) { 881233176Sae error = EMSGSIZE; 882233176Sae break; 883233176Sae } 884233176Sae MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 885233176Sae if (m == NULL) { 886233176Sae error = ENOBUFS; 887233176Sae break; 888233176Sae } 889233176Sae m->m_len = sopt->sopt_valsize; 890233176Sae error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 891233176Sae m->m_len); 892233176Sae if (error) { 893233176Sae m_free(m); 894233176Sae break; 895233176Sae } 896233176Sae INP_WLOCK(inp); 897233176Sae error = ip_pcbopts(inp, sopt->sopt_name, m); 898233176Sae INP_WUNLOCK(inp); 899233176Sae return (error); 900233176Sae } 901233176Sae 902233176Sae#if defined(IP_NONLOCALBIND) 903233176Sae case IP_NONLOCALOK: 904233176Sae if (! ip_nonlocalok) { 905233176Sae error = ENOPROTOOPT; 906233176Sae break; 907233176Sae } 908233176Sae /* FALLTHROUGH */ 909233176Sae#endif 910233176Sae case IP_TOS: 911233176Sae case IP_TTL: 912233176Sae case IP_MINTTL: 913233176Sae case IP_RECVOPTS: 914233176Sae case IP_RECVRETOPTS: 915233176Sae case IP_RECVDSTADDR: 916233176Sae case IP_RECVTTL: 917233176Sae case IP_RECVIF: 918233176Sae case IP_FAITH: 919233176Sae case IP_ONESBCAST: 920233176Sae case IP_DONTFRAG: 921233176Sae error = sooptcopyin(sopt, &optval, sizeof optval, 922233176Sae sizeof optval); 923233176Sae if (error) 924233176Sae break; 925233176Sae 926233176Sae switch (sopt->sopt_name) { 927233176Sae case IP_TOS: 928233176Sae inp->inp_ip_tos = optval; 929233176Sae break; 930233176Sae 931233176Sae case IP_TTL: 932233176Sae inp->inp_ip_ttl = optval; 933233176Sae break; 934233176Sae 935233176Sae case IP_MINTTL: 936233176Sae if (optval >= 0 && optval <= MAXTTL) 937233176Sae inp->inp_ip_minttl = optval; 938233176Sae else 939233176Sae error = EINVAL; 940233176Sae break; 941233176Sae 942233176Sae#define OPTSET(bit) do { \ 943233176Sae INP_WLOCK(inp); \ 944233176Sae if (optval) \ 945233176Sae inp->inp_flags |= bit; \ 946233176Sae else \ 947233176Sae inp->inp_flags &= ~bit; \ 948233176Sae INP_WUNLOCK(inp); \ 949233176Sae} while (0) 950233176Sae 951233176Sae case IP_RECVOPTS: 952233176Sae OPTSET(INP_RECVOPTS); 953233176Sae break; 954233176Sae 955233176Sae case IP_RECVRETOPTS: 956233176Sae OPTSET(INP_RECVRETOPTS); 957233176Sae break; 958233176Sae 959233176Sae case IP_RECVDSTADDR: 960233176Sae OPTSET(INP_RECVDSTADDR); 961233176Sae break; 962233176Sae 963233176Sae case IP_RECVTTL: 964233176Sae OPTSET(INP_RECVTTL); 965233176Sae break; 966233176Sae 967233176Sae case IP_RECVIF: 968233176Sae OPTSET(INP_RECVIF); 969233176Sae break; 970233176Sae 971233176Sae case IP_FAITH: 972233176Sae OPTSET(INP_FAITH); 973233176Sae break; 974233176Sae 975233176Sae case IP_ONESBCAST: 976233176Sae OPTSET(INP_ONESBCAST); 977233176Sae break; 978233176Sae case IP_DONTFRAG: 979233176Sae OPTSET(INP_DONTFRAG); 980233176Sae break; 981233176Sae#if defined(IP_NONLOCALBIND) 982233176Sae case IP_NONLOCALOK: 983233176Sae OPTSET(INP_NONLOCALOK); 984233176Sae break; 985233176Sae#endif 986233176Sae } 987233176Sae break; 988233176Sae#undef OPTSET 989233176Sae 990233176Sae /* 991233176Sae * Multicast socket options are processed by the in_mcast 992233176Sae * module. 993233176Sae */ 994233176Sae case IP_MULTICAST_IF: 995233176Sae case IP_MULTICAST_VIF: 996233176Sae case IP_MULTICAST_TTL: 997233176Sae case IP_MULTICAST_LOOP: 998233176Sae case IP_ADD_MEMBERSHIP: 999233176Sae case IP_DROP_MEMBERSHIP: 1000233176Sae case IP_ADD_SOURCE_MEMBERSHIP: 1001233176Sae case IP_DROP_SOURCE_MEMBERSHIP: 1002233176Sae case IP_BLOCK_SOURCE: 1003233176Sae case IP_UNBLOCK_SOURCE: 1004233176Sae case IP_MSFILTER: 1005233176Sae case MCAST_JOIN_GROUP: 1006233176Sae case MCAST_LEAVE_GROUP: 1007233176Sae case MCAST_JOIN_SOURCE_GROUP: 1008233176Sae case MCAST_LEAVE_SOURCE_GROUP: 1009233176Sae case MCAST_BLOCK_SOURCE: 1010233176Sae case MCAST_UNBLOCK_SOURCE: 1011233176Sae error = inp_setmoptions(inp, sopt); 1012233176Sae break; 1013233176Sae 1014233176Sae case IP_PORTRANGE: 1015233176Sae error = sooptcopyin(sopt, &optval, sizeof optval, 1016233176Sae sizeof optval); 1017233176Sae if (error) 1018233176Sae break; 1019233176Sae 1020233176Sae INP_WLOCK(inp); 1021233176Sae switch (optval) { 1022233176Sae case IP_PORTRANGE_DEFAULT: 1023233176Sae inp->inp_flags &= ~(INP_LOWPORT); 1024233176Sae inp->inp_flags &= ~(INP_HIGHPORT); 1025233176Sae break; 1026233176Sae 1027233176Sae case IP_PORTRANGE_HIGH: 1028233176Sae inp->inp_flags &= ~(INP_LOWPORT); 1029233176Sae inp->inp_flags |= INP_HIGHPORT; 1030233176Sae break; 1031233176Sae 1032233176Sae case IP_PORTRANGE_LOW: 1033233176Sae inp->inp_flags &= ~(INP_HIGHPORT); 1034233176Sae inp->inp_flags |= INP_LOWPORT; 1035233176Sae break; 1036233176Sae 1037233176Sae default: 1038233176Sae error = EINVAL; 1039233176Sae break; 1040233176Sae } 1041233176Sae INP_WUNLOCK(inp); 1042233176Sae break; 1043233176Sae 1044233176Sae#ifdef IPSEC 1045233176Sae case IP_IPSEC_POLICY: 1046233176Sae { 1047233176Sae caddr_t req; 1048233176Sae struct mbuf *m; 1049233176Sae 1050233176Sae if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1051233176Sae break; 1052233176Sae if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1053233176Sae break; 1054233176Sae req = mtod(m, caddr_t); 1055233176Sae error = ipsec_set_policy(inp, sopt->sopt_name, req, 1056233176Sae m->m_len, (sopt->sopt_td != NULL) ? 1057233176Sae sopt->sopt_td->td_ucred : NULL); 1058233176Sae m_freem(m); 1059233176Sae break; 1060233176Sae } 1061233176Sae#endif /* IPSEC */ 1062233176Sae 1063233176Sae default: 1064233176Sae error = ENOPROTOOPT; 1065233176Sae break; 1066233176Sae } 1067233176Sae break; 1068233176Sae 1069233176Sae case SOPT_GET: 1070233176Sae switch (sopt->sopt_name) { 1071233176Sae case IP_OPTIONS: 1072233176Sae case IP_RETOPTS: 1073233176Sae if (inp->inp_options) 1074233176Sae error = sooptcopyout(sopt, 1075233176Sae mtod(inp->inp_options, 1076233176Sae char *), 1077233176Sae inp->inp_options->m_len); 1078233176Sae else 1079233176Sae sopt->sopt_valsize = 0; 1080233176Sae break; 1081233176Sae 1082233176Sae case IP_TOS: 1083233176Sae case IP_TTL: 1084233176Sae case IP_MINTTL: 1085233176Sae case IP_RECVOPTS: 1086233176Sae case IP_RECVRETOPTS: 1087233176Sae case IP_RECVDSTADDR: 1088233176Sae case IP_RECVTTL: 1089233176Sae case IP_RECVIF: 1090233176Sae case IP_PORTRANGE: 1091233176Sae case IP_FAITH: 1092233176Sae case IP_ONESBCAST: 1093233176Sae case IP_DONTFRAG: 1094233176Sae switch (sopt->sopt_name) { 1095233176Sae 1096233176Sae case IP_TOS: 1097233176Sae optval = inp->inp_ip_tos; 1098233176Sae break; 1099233176Sae 1100233176Sae case IP_TTL: 1101233176Sae optval = inp->inp_ip_ttl; 1102233176Sae break; 1103233176Sae 1104233176Sae case IP_MINTTL: 1105233176Sae optval = inp->inp_ip_minttl; 1106233176Sae break; 1107233176Sae 1108233176Sae#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1109233176Sae 1110233176Sae case IP_RECVOPTS: 1111233176Sae optval = OPTBIT(INP_RECVOPTS); 1112233176Sae break; 1113233176Sae 1114233176Sae case IP_RECVRETOPTS: 1115233176Sae optval = OPTBIT(INP_RECVRETOPTS); 1116233176Sae break; 1117233176Sae 1118233176Sae case IP_RECVDSTADDR: 1119233176Sae optval = OPTBIT(INP_RECVDSTADDR); 1120233176Sae break; 1121233176Sae 1122233176Sae case IP_RECVTTL: 1123233176Sae optval = OPTBIT(INP_RECVTTL); 1124233176Sae break; 1125233176Sae 1126233176Sae case IP_RECVIF: 1127233176Sae optval = OPTBIT(INP_RECVIF); 1128233176Sae break; 1129233176Sae 1130233176Sae case IP_PORTRANGE: 1131233176Sae if (inp->inp_flags & INP_HIGHPORT) 1132233176Sae optval = IP_PORTRANGE_HIGH; 1133233176Sae else if (inp->inp_flags & INP_LOWPORT) 1134233176Sae optval = IP_PORTRANGE_LOW; 1135233176Sae else 1136233176Sae optval = 0; 1137233176Sae break; 1138233176Sae 1139233176Sae case IP_FAITH: 1140233176Sae optval = OPTBIT(INP_FAITH); 1141233176Sae break; 1142233176Sae 1143233176Sae case IP_ONESBCAST: 1144233176Sae optval = OPTBIT(INP_ONESBCAST); 1145233176Sae break; 1146233176Sae case IP_DONTFRAG: 1147233176Sae optval = OPTBIT(INP_DONTFRAG); 1148233176Sae break; 1149233176Sae } 1150233176Sae error = sooptcopyout(sopt, &optval, sizeof optval); 1151233176Sae break; 1152233176Sae 1153233176Sae /* 1154233176Sae * Multicast socket options are processed by the in_mcast 1155233176Sae * module. 1156233176Sae */ 1157233176Sae case IP_MULTICAST_IF: 1158233176Sae case IP_MULTICAST_VIF: 1159233176Sae case IP_MULTICAST_TTL: 1160233176Sae case IP_MULTICAST_LOOP: 1161233176Sae case IP_MSFILTER: 1162233176Sae error = inp_getmoptions(inp, sopt); 1163233176Sae break; 1164233176Sae 1165233176Sae#ifdef IPSEC 1166233176Sae case IP_IPSEC_POLICY: 1167233176Sae { 1168233176Sae struct mbuf *m = NULL; 1169233176Sae caddr_t req = NULL; 1170233176Sae size_t len = 0; 1171233176Sae 1172233176Sae if (m != 0) { 1173233176Sae req = mtod(m, caddr_t); 1174233176Sae len = m->m_len; 1175233176Sae } 1176233176Sae error = ipsec_get_policy(sotoinpcb(so), req, len, &m); 1177233176Sae if (error == 0) 1178233176Sae error = soopt_mcopyout(sopt, m); /* XXX */ 1179233176Sae if (error == 0) 1180233176Sae m_freem(m); 1181233176Sae break; 1182233176Sae } 1183233176Sae#endif /* IPSEC */ 1184233176Sae 1185233176Sae default: 1186233176Sae error = ENOPROTOOPT; 1187233176Sae break; 1188233176Sae } 1189233176Sae break; 1190233176Sae } 1191233176Sae return (error); 1192233176Sae} 1193233176Sae 1194233176Sae/* 1195233176Sae * Routine called from ip_output() to loop back a copy of an IP multicast 1196233176Sae * packet to the input queue of a specified interface. Note that this 1197233176Sae * calls the output routine of the loopback "driver", but with an interface 1198233176Sae * pointer that might NOT be a loopback interface -- evil, but easier than 1199233176Sae * replicating that code here. 1200233176Sae */ 1201233176Saestatic void 1202233176Saeip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, 1203233176Sae int hlen) 1204233176Sae{ 1205233176Sae register struct ip *ip; 1206233176Sae struct mbuf *copym; 1207233176Sae 1208233176Sae /* 1209233176Sae * Make a deep copy of the packet because we're going to 1210233176Sae * modify the pack in order to generate checksums. 1211233176Sae */ 1212233176Sae copym = m_dup(m, M_DONTWAIT); 1213233176Sae if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1214233176Sae copym = m_pullup(copym, hlen); 1215233176Sae if (copym != NULL) { 1216233176Sae /* If needed, compute the checksum and mark it as valid. */ 1217233176Sae if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1218233176Sae in_delayed_cksum(copym); 1219233176Sae copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1220233176Sae copym->m_pkthdr.csum_flags |= 1221233176Sae CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1222233176Sae copym->m_pkthdr.csum_data = 0xffff; 1223233176Sae } 1224233176Sae /* 1225233176Sae * We don't bother to fragment if the IP length is greater 1226233176Sae * than the interface's MTU. Can this possibly matter? 1227233176Sae */ 1228233176Sae ip = mtod(copym, struct ip *); 1229233176Sae ip->ip_len = htons(ip->ip_len); 1230233176Sae ip->ip_off = htons(ip->ip_off); 1231233176Sae ip->ip_sum = 0; 1232233176Sae ip->ip_sum = in_cksum(copym, hlen); 1233233176Sae#if 1 /* XXX */ 1234233176Sae if (dst->sin_family != AF_INET) { 1235233176Sae printf("ip_mloopback: bad address family %d\n", 1236233176Sae dst->sin_family); 1237233176Sae dst->sin_family = AF_INET; 1238233176Sae } 1239233176Sae#endif 1240233176Sae if_simloop(ifp, copym, dst->sin_family, 0); 1241233176Sae } 1242233176Sae} 1243233176Sae