ip_output.c revision 162798
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30 * $FreeBSD: head/sys/netinet/ip_output.c 162798 2006-09-29 16:44:45Z andre $ 31 */ 32 33#include "opt_ipfw.h" 34#include "opt_ipsec.h" 35#include "opt_mac.h" 36#include "opt_mbuf_stress_test.h" 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/kernel.h> 41#include <sys/mac.h> 42#include <sys/malloc.h> 43#include <sys/mbuf.h> 44#include <sys/protosw.h> 45#include <sys/socket.h> 46#include <sys/socketvar.h> 47#include <sys/sysctl.h> 48 49#include <net/if.h> 50#include <net/netisr.h> 51#include <net/pfil.h> 52#include <net/route.h> 53 54#include <netinet/in.h> 55#include <netinet/in_systm.h> 56#include <netinet/ip.h> 57#include <netinet/in_pcb.h> 58#include <netinet/in_var.h> 59#include <netinet/ip_var.h> 60#include <netinet/ip_options.h> 61 62#if defined(IPSEC) || defined(FAST_IPSEC) 63#include <netinet/ip_ipsec.h> 64#ifdef IPSEC 65#include <netinet6/ipsec.h> 66#endif 67#ifdef FAST_IPSEC 68#include <netipsec/ipsec.h> 69#endif 70#endif /*IPSEC*/ 71 72#include <machine/in_cksum.h> 73 74static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 75 76#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 77 x, (ntohl(a.s_addr)>>24)&0xFF,\ 78 (ntohl(a.s_addr)>>16)&0xFF,\ 79 (ntohl(a.s_addr)>>8)&0xFF,\ 80 (ntohl(a.s_addr))&0xFF, y); 81 82u_short ip_id; 83 84#ifdef MBUF_STRESS_TEST 85int mbuf_frag_size = 0; 86SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 87 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 88#endif 89 90static struct ifnet *ip_multicast_if(struct in_addr *, int *); 91static void ip_mloopback 92 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 93static int ip_getmoptions(struct inpcb *, struct sockopt *); 94static int ip_setmoptions(struct inpcb *, struct sockopt *); 95 96 97extern struct protosw inetsw[]; 98 99/* 100 * IP output. The packet in mbuf chain m contains a skeletal IP 101 * header (with len, off, ttl, proto, tos, src, dst). 102 * The mbuf chain containing the packet will be freed. 103 * The mbuf opt, if present, will not be freed. 104 * In the IP forwarding case, the packet will arrive with options already 105 * inserted, so must have a NULL opt pointer. 106 */ 107int 108ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, 109 int flags, struct ip_moptions *imo, struct inpcb *inp) 110{ 111 struct ip *ip; 112 struct ifnet *ifp = NULL; /* keep compiler happy */ 113 struct mbuf *m0; 114 int hlen = sizeof (struct ip); 115 int mtu; 116 int len, error = 0; 117 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 118 struct in_ifaddr *ia = NULL; 119 struct in_ifaddr *sia = NULL; 120 int isbroadcast, sw_csum; 121 struct route iproute; 122 struct in_addr odst; 123#ifdef IPFIREWALL_FORWARD 124 struct m_tag *fwd_tag = NULL; 125#endif 126 M_ASSERTPKTHDR(m); 127 128 if (ro == NULL) { 129 ro = &iproute; 130 bzero(ro, sizeof (*ro)); 131 } 132 133 if (inp != NULL) 134 INP_LOCK_ASSERT(inp); 135 136 if (opt) { 137 len = 0; 138 m = ip_insertoptions(m, opt, &len); 139 if (len != 0) 140 hlen = len; 141 } 142 ip = mtod(m, struct ip *); 143 144 /* 145 * Fill in IP header. If we are not allowing fragmentation, 146 * then the ip_id field is meaningless, but we don't set it 147 * to zero. Doing so causes various problems when devices along 148 * the path (routers, load balancers, firewalls, etc.) illegally 149 * disable DF on our packet. Note that a 16-bit counter 150 * will wrap around in less than 10 seconds at 100 Mbit/s on a 151 * medium with MTU 1500. See Steven M. Bellovin, "A Technique 152 * for Counting NATted Hosts", Proc. IMW'02, available at 153 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>. 154 */ 155 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 156 ip->ip_v = IPVERSION; 157 ip->ip_hl = hlen >> 2; 158 ip->ip_id = ip_newid(); 159 ipstat.ips_localout++; 160 } else { 161 hlen = ip->ip_hl << 2; 162 } 163 164 dst = (struct sockaddr_in *)&ro->ro_dst; 165again: 166 /* 167 * If there is a cached route, 168 * check that it is to the same destination 169 * and is still up. If not, free it and try again. 170 * The address family should also be checked in case of sharing the 171 * cache with IPv6. 172 */ 173 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 174 dst->sin_family != AF_INET || 175 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 176 RTFREE(ro->ro_rt); 177 ro->ro_rt = (struct rtentry *)NULL; 178 } 179#ifdef IPFIREWALL_FORWARD 180 if (ro->ro_rt == NULL && fwd_tag == NULL) { 181#else 182 if (ro->ro_rt == NULL) { 183#endif 184 bzero(dst, sizeof(*dst)); 185 dst->sin_family = AF_INET; 186 dst->sin_len = sizeof(*dst); 187 dst->sin_addr = ip->ip_dst; 188 } 189 /* 190 * If routing to interface only, 191 * short circuit routing lookup. 192 */ 193 if (flags & IP_ROUTETOIF) { 194 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 195 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 196 ipstat.ips_noroute++; 197 error = ENETUNREACH; 198 goto bad; 199 } 200 ifp = ia->ia_ifp; 201 ip->ip_ttl = 1; 202 isbroadcast = in_broadcast(dst->sin_addr, ifp); 203 } else if (flags & IP_SENDONES) { 204 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL) { 205 ipstat.ips_noroute++; 206 error = ENETUNREACH; 207 goto bad; 208 } 209 ifp = ia->ia_ifp; 210 ip->ip_dst.s_addr = INADDR_BROADCAST; 211 dst->sin_addr = ip->ip_dst; 212 ip->ip_ttl = 1; 213 isbroadcast = 1; 214 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 215 imo != NULL && imo->imo_multicast_ifp != NULL) { 216 /* 217 * Bypass the normal routing lookup for multicast 218 * packets if the interface is specified. 219 */ 220 ifp = imo->imo_multicast_ifp; 221 IFP_TO_IA(ifp, ia); 222 isbroadcast = 0; /* fool gcc */ 223 } else { 224 /* 225 * We want to do any cloning requested by the link layer, 226 * as this is probably required in all cases for correct 227 * operation (as it is for ARP). 228 */ 229 if (ro->ro_rt == NULL) 230 rtalloc_ign(ro, 0); 231 if (ro->ro_rt == NULL) { 232 ipstat.ips_noroute++; 233 error = EHOSTUNREACH; 234 goto bad; 235 } 236 ia = ifatoia(ro->ro_rt->rt_ifa); 237 ifp = ro->ro_rt->rt_ifp; 238 ro->ro_rt->rt_rmx.rmx_pksent++; 239 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 240 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 241 if (ro->ro_rt->rt_flags & RTF_HOST) 242 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 243 else 244 isbroadcast = in_broadcast(dst->sin_addr, ifp); 245 } 246 /* 247 * Calculate MTU. If we have a route that is up, use that, 248 * otherwise use the interface's MTU. 249 */ 250 if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) { 251 /* 252 * This case can happen if the user changed the MTU 253 * of an interface after enabling IP on it. Because 254 * most netifs don't keep track of routes pointing to 255 * them, there is no way for one to update all its 256 * routes when the MTU is changed. 257 */ 258 if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu) 259 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 260 mtu = ro->ro_rt->rt_rmx.rmx_mtu; 261 } else { 262 mtu = ifp->if_mtu; 263 } 264 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 265 struct in_multi *inm; 266 267 m->m_flags |= M_MCAST; 268 /* 269 * IP destination address is multicast. Make sure "dst" 270 * still points to the address in "ro". (It may have been 271 * changed to point to a gateway address, above.) 272 */ 273 dst = (struct sockaddr_in *)&ro->ro_dst; 274 /* 275 * See if the caller provided any multicast options 276 */ 277 if (imo != NULL) { 278 ip->ip_ttl = imo->imo_multicast_ttl; 279 if (imo->imo_multicast_vif != -1) 280 ip->ip_src.s_addr = 281 ip_mcast_src ? 282 ip_mcast_src(imo->imo_multicast_vif) : 283 INADDR_ANY; 284 } else 285 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 286 /* 287 * Confirm that the outgoing interface supports multicast. 288 */ 289 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 290 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 291 ipstat.ips_noroute++; 292 error = ENETUNREACH; 293 goto bad; 294 } 295 } 296 /* 297 * If source address not specified yet, use address 298 * of outgoing interface. 299 */ 300 if (ip->ip_src.s_addr == INADDR_ANY) { 301 /* Interface may have no addresses. */ 302 if (ia != NULL) 303 ip->ip_src = IA_SIN(ia)->sin_addr; 304 } 305 306 IN_MULTI_LOCK(); 307 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 308 if (inm != NULL && 309 (imo == NULL || imo->imo_multicast_loop)) { 310 IN_MULTI_UNLOCK(); 311 /* 312 * If we belong to the destination multicast group 313 * on the outgoing interface, and the caller did not 314 * forbid loopback, loop back a copy. 315 */ 316 ip_mloopback(ifp, m, dst, hlen); 317 } 318 else { 319 IN_MULTI_UNLOCK(); 320 /* 321 * If we are acting as a multicast router, perform 322 * multicast forwarding as if the packet had just 323 * arrived on the interface to which we are about 324 * to send. The multicast forwarding function 325 * recursively calls this function, using the 326 * IP_FORWARDING flag to prevent infinite recursion. 327 * 328 * Multicasts that are looped back by ip_mloopback(), 329 * above, will be forwarded by the ip_input() routine, 330 * if necessary. 331 */ 332 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 333 /* 334 * If rsvp daemon is not running, do not 335 * set ip_moptions. This ensures that the packet 336 * is multicast and not just sent down one link 337 * as prescribed by rsvpd. 338 */ 339 if (!rsvp_on) 340 imo = NULL; 341 if (ip_mforward && 342 ip_mforward(ip, ifp, m, imo) != 0) { 343 m_freem(m); 344 goto done; 345 } 346 } 347 } 348 349 /* 350 * Multicasts with a time-to-live of zero may be looped- 351 * back, above, but must not be transmitted on a network. 352 * Also, multicasts addressed to the loopback interface 353 * are not sent -- the above call to ip_mloopback() will 354 * loop back a copy if this host actually belongs to the 355 * destination group on the loopback interface. 356 */ 357 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 358 m_freem(m); 359 goto done; 360 } 361 362 goto sendit; 363 } 364 365 /* 366 * If the source address is not specified yet, use the address 367 * of the outoing interface. 368 */ 369 if (ip->ip_src.s_addr == INADDR_ANY) { 370 /* Interface may have no addresses. */ 371 if (ia != NULL) { 372 ip->ip_src = IA_SIN(ia)->sin_addr; 373 } 374 } 375 376 /* 377 * Verify that we have any chance at all of being able to queue the 378 * packet or packet fragments, unless ALTQ is enabled on the given 379 * interface in which case packetdrop should be done by queueing. 380 */ 381#ifdef ALTQ 382 if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) && 383 ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 384 ifp->if_snd.ifq_maxlen)) 385#else 386 if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >= 387 ifp->if_snd.ifq_maxlen) 388#endif /* ALTQ */ 389 { 390 error = ENOBUFS; 391 ipstat.ips_odropped++; 392 ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1); 393 goto bad; 394 } 395 396 /* 397 * Look for broadcast address and 398 * verify user is allowed to send 399 * such a packet. 400 */ 401 if (isbroadcast) { 402 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 403 error = EADDRNOTAVAIL; 404 goto bad; 405 } 406 if ((flags & IP_ALLOWBROADCAST) == 0) { 407 error = EACCES; 408 goto bad; 409 } 410 /* don't allow broadcast messages to be fragmented */ 411 if (ip->ip_len > mtu) { 412 error = EMSGSIZE; 413 goto bad; 414 } 415 m->m_flags |= M_BCAST; 416 } else { 417 m->m_flags &= ~M_BCAST; 418 } 419 420sendit: 421#if defined(IPSEC) || defined(FAST_IPSEC) 422 switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) { 423 case 1: 424 goto bad; 425 case -1: 426 goto done; 427 case 0: 428 default: 429 break; /* Continue with packet processing. */ 430 } 431 /* Update variables that are affected by ipsec4_output(). */ 432 ip = mtod(m, struct ip *); 433 hlen = ip->ip_hl << 2; 434#endif /* IPSEC */ 435 436 /* Jump over all PFIL processing if hooks are not active. */ 437 if (!PFIL_HOOKED(&inet_pfil_hook)) 438 goto passout; 439 440 /* Run through list of hooks for output packets. */ 441 odst.s_addr = ip->ip_dst.s_addr; 442 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp); 443 if (error != 0 || m == NULL) 444 goto done; 445 446 ip = mtod(m, struct ip *); 447 448 /* See if destination IP address was changed by packet filter. */ 449 if (odst.s_addr != ip->ip_dst.s_addr) { 450 m->m_flags |= M_SKIP_FIREWALL; 451 /* If destination is now ourself drop to ip_input(). */ 452 if (in_localip(ip->ip_dst)) { 453 m->m_flags |= M_FASTFWD_OURS; 454 if (m->m_pkthdr.rcvif == NULL) 455 m->m_pkthdr.rcvif = loif; 456 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 457 m->m_pkthdr.csum_flags |= 458 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 459 m->m_pkthdr.csum_data = 0xffff; 460 } 461 m->m_pkthdr.csum_flags |= 462 CSUM_IP_CHECKED | CSUM_IP_VALID; 463 464 error = netisr_queue(NETISR_IP, m); 465 goto done; 466 } else 467 goto again; /* Redo the routing table lookup. */ 468 } 469 470#ifdef IPFIREWALL_FORWARD 471 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 472 if (m->m_flags & M_FASTFWD_OURS) { 473 if (m->m_pkthdr.rcvif == NULL) 474 m->m_pkthdr.rcvif = loif; 475 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 476 m->m_pkthdr.csum_flags |= 477 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 478 m->m_pkthdr.csum_data = 0xffff; 479 } 480 m->m_pkthdr.csum_flags |= 481 CSUM_IP_CHECKED | CSUM_IP_VALID; 482 483 error = netisr_queue(NETISR_IP, m); 484 goto done; 485 } 486 /* Or forward to some other address? */ 487 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 488 if (fwd_tag) { 489 dst = (struct sockaddr_in *)&ro->ro_dst; 490 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 491 m->m_flags |= M_SKIP_FIREWALL; 492 m_tag_delete(m, fwd_tag); 493 goto again; 494 } 495#endif /* IPFIREWALL_FORWARD */ 496 497passout: 498 /* 127/8 must not appear on wire - RFC1122. */ 499 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 500 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 501 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 502 ipstat.ips_badaddr++; 503 error = EADDRNOTAVAIL; 504 goto bad; 505 } 506 } 507 508 m->m_pkthdr.csum_flags |= CSUM_IP; 509 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 510 if (sw_csum & CSUM_DELAY_DATA) { 511 in_delayed_cksum(m); 512 sw_csum &= ~CSUM_DELAY_DATA; 513 } 514 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 515 516 /* 517 * If small enough for interface, or the interface will take 518 * care of the fragmentation for us, we can just send directly. 519 */ 520 if (ip->ip_len <= mtu || 521 (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || 522 ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { 523 ip->ip_len = htons(ip->ip_len); 524 ip->ip_off = htons(ip->ip_off); 525 ip->ip_sum = 0; 526 if (sw_csum & CSUM_DELAY_IP) 527 ip->ip_sum = in_cksum(m, hlen); 528 529 /* 530 * Record statistics for this interface address. 531 * With CSUM_TSO the byte/packet count will be slightly 532 * incorrect because we count the IP+TCP headers only 533 * once instead of for every generated packet. 534 */ 535 if (!(flags & IP_FORWARDING) && ia) { 536 INADDR_TO_IFADDR(ip->ip_src, sia); 537 if (sia == NULL) 538 sia = ia; 539 if (m->m_pkthdr.csum_flags & CSUM_TSO) 540 sia->ia_ifa.if_opackets += 541 m->m_pkthdr.len / m->m_pkthdr.tso_segsz; 542 else 543 sia->ia_ifa.if_opackets++; 544 sia->ia_ifa.if_obytes += m->m_pkthdr.len; 545 } 546#ifdef IPSEC 547 /* clean ipsec history once it goes out of the node */ 548 ipsec_delaux(m); 549#endif 550#ifdef MBUF_STRESS_TEST 551 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 552 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 553#endif 554 /* 555 * Reset layer specific mbuf flags 556 * to avoid confusing lower layers. 557 */ 558 m->m_flags &= ~(M_PROTOFLAGS); 559 560 error = (*ifp->if_output)(ifp, m, 561 (struct sockaddr *)dst, ro->ro_rt); 562 goto done; 563 } 564 565 /* Balk when DF bit is set or the interface didn't support TSO. */ 566 if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 567 error = EMSGSIZE; 568 ipstat.ips_cantfrag++; 569 goto bad; 570 } 571 572 /* 573 * Too large for interface; fragment if possible. If successful, 574 * on return, m will point to a list of packets to be sent. 575 */ 576 error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum); 577 if (error) 578 goto bad; 579 for (; m; m = m0) { 580 m0 = m->m_nextpkt; 581 m->m_nextpkt = 0; 582#ifdef IPSEC 583 /* clean ipsec history once it goes out of the node */ 584 ipsec_delaux(m); 585#endif 586 if (error == 0) { 587 /* Record statistics for this interface address. */ 588 if (ia != NULL) { 589 INADDR_TO_IFADDR(ip->ip_src, sia); 590 if (sia == NULL) 591 sia = ia; 592 sia->ia_ifa.if_opackets++; 593 sia->ia_ifa.if_obytes += m->m_pkthdr.len; 594 } 595 /* 596 * Reset layer specific mbuf flags 597 * to avoid confusing upper layers. 598 */ 599 m->m_flags &= ~(M_PROTOFLAGS); 600 601 error = (*ifp->if_output)(ifp, m, 602 (struct sockaddr *)dst, ro->ro_rt); 603 } else 604 m_freem(m); 605 } 606 607 if (error == 0) 608 ipstat.ips_fragmented++; 609 610done: 611 if (ro == &iproute && ro->ro_rt) { 612 RTFREE(ro->ro_rt); 613 } 614 return (error); 615bad: 616 m_freem(m); 617 goto done; 618} 619 620/* 621 * Create a chain of fragments which fit the given mtu. m_frag points to the 622 * mbuf to be fragmented; on return it points to the chain with the fragments. 623 * Return 0 if no error. If error, m_frag may contain a partially built 624 * chain of fragments that should be freed by the caller. 625 * 626 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 627 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 628 */ 629int 630ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 631 u_long if_hwassist_flags, int sw_csum) 632{ 633 int error = 0; 634 int hlen = ip->ip_hl << 2; 635 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 636 int off; 637 struct mbuf *m0 = *m_frag; /* the original packet */ 638 int firstlen; 639 struct mbuf **mnext; 640 int nfrags; 641 642 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 643 ipstat.ips_cantfrag++; 644 return EMSGSIZE; 645 } 646 647 /* 648 * Must be able to put at least 8 bytes per fragment. 649 */ 650 if (len < 8) 651 return EMSGSIZE; 652 653 /* 654 * If the interface will not calculate checksums on 655 * fragmented packets, then do it here. 656 */ 657 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 658 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 659 in_delayed_cksum(m0); 660 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 661 } 662 663 if (len > PAGE_SIZE) { 664 /* 665 * Fragment large datagrams such that each segment 666 * contains a multiple of PAGE_SIZE amount of data, 667 * plus headers. This enables a receiver to perform 668 * page-flipping zero-copy optimizations. 669 * 670 * XXX When does this help given that sender and receiver 671 * could have different page sizes, and also mtu could 672 * be less than the receiver's page size ? 673 */ 674 int newlen; 675 struct mbuf *m; 676 677 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 678 off += m->m_len; 679 680 /* 681 * firstlen (off - hlen) must be aligned on an 682 * 8-byte boundary 683 */ 684 if (off < hlen) 685 goto smart_frag_failure; 686 off = ((off - hlen) & ~7) + hlen; 687 newlen = (~PAGE_MASK) & mtu; 688 if ((newlen + sizeof (struct ip)) > mtu) { 689 /* we failed, go back the default */ 690smart_frag_failure: 691 newlen = len; 692 off = hlen + len; 693 } 694 len = newlen; 695 696 } else { 697 off = hlen + len; 698 } 699 700 firstlen = off - hlen; 701 mnext = &m0->m_nextpkt; /* pointer to next packet */ 702 703 /* 704 * Loop through length of segment after first fragment, 705 * make new header and copy data of each part and link onto chain. 706 * Here, m0 is the original packet, m is the fragment being created. 707 * The fragments are linked off the m_nextpkt of the original 708 * packet, which after processing serves as the first fragment. 709 */ 710 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 711 struct ip *mhip; /* ip header on the fragment */ 712 struct mbuf *m; 713 int mhlen = sizeof (struct ip); 714 715 MGETHDR(m, M_DONTWAIT, MT_DATA); 716 if (m == NULL) { 717 error = ENOBUFS; 718 ipstat.ips_odropped++; 719 goto done; 720 } 721 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 722 /* 723 * In the first mbuf, leave room for the link header, then 724 * copy the original IP header including options. The payload 725 * goes into an additional mbuf chain returned by m_copy(). 726 */ 727 m->m_data += max_linkhdr; 728 mhip = mtod(m, struct ip *); 729 *mhip = *ip; 730 if (hlen > sizeof (struct ip)) { 731 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 732 mhip->ip_v = IPVERSION; 733 mhip->ip_hl = mhlen >> 2; 734 } 735 m->m_len = mhlen; 736 /* XXX do we need to add ip->ip_off below ? */ 737 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 738 if (off + len >= ip->ip_len) { /* last fragment */ 739 len = ip->ip_len - off; 740 m->m_flags |= M_LASTFRAG; 741 } else 742 mhip->ip_off |= IP_MF; 743 mhip->ip_len = htons((u_short)(len + mhlen)); 744 m->m_next = m_copy(m0, off, len); 745 if (m->m_next == NULL) { /* copy failed */ 746 m_free(m); 747 error = ENOBUFS; /* ??? */ 748 ipstat.ips_odropped++; 749 goto done; 750 } 751 m->m_pkthdr.len = mhlen + len; 752 m->m_pkthdr.rcvif = NULL; 753#ifdef MAC 754 mac_create_fragment(m0, m); 755#endif 756 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 757 mhip->ip_off = htons(mhip->ip_off); 758 mhip->ip_sum = 0; 759 if (sw_csum & CSUM_DELAY_IP) 760 mhip->ip_sum = in_cksum(m, mhlen); 761 *mnext = m; 762 mnext = &m->m_nextpkt; 763 } 764 ipstat.ips_ofragments += nfrags; 765 766 /* set first marker for fragment chain */ 767 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 768 m0->m_pkthdr.csum_data = nfrags; 769 770 /* 771 * Update first fragment by trimming what's been copied out 772 * and updating header. 773 */ 774 m_adj(m0, hlen + firstlen - ip->ip_len); 775 m0->m_pkthdr.len = hlen + firstlen; 776 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 777 ip->ip_off |= IP_MF; 778 ip->ip_off = htons(ip->ip_off); 779 ip->ip_sum = 0; 780 if (sw_csum & CSUM_DELAY_IP) 781 ip->ip_sum = in_cksum(m0, hlen); 782 783done: 784 *m_frag = m0; 785 return error; 786} 787 788void 789in_delayed_cksum(struct mbuf *m) 790{ 791 struct ip *ip; 792 u_short csum, offset; 793 794 ip = mtod(m, struct ip *); 795 offset = ip->ip_hl << 2 ; 796 csum = in_cksum_skip(m, ip->ip_len, offset); 797 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 798 csum = 0xffff; 799 offset += m->m_pkthdr.csum_data; /* checksum offset */ 800 801 if (offset + sizeof(u_short) > m->m_len) { 802 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 803 m->m_len, offset, ip->ip_p); 804 /* 805 * XXX 806 * this shouldn't happen, but if it does, the 807 * correct behavior may be to insert the checksum 808 * in the appropriate next mbuf in the chain. 809 */ 810 return; 811 } 812 *(u_short *)(m->m_data + offset) = csum; 813} 814 815/* 816 * IP socket option processing. 817 */ 818int 819ip_ctloutput(so, sopt) 820 struct socket *so; 821 struct sockopt *sopt; 822{ 823 struct inpcb *inp = sotoinpcb(so); 824 int error, optval; 825 826 error = optval = 0; 827 if (sopt->sopt_level != IPPROTO_IP) { 828 return (EINVAL); 829 } 830 831 switch (sopt->sopt_dir) { 832 case SOPT_SET: 833 switch (sopt->sopt_name) { 834 case IP_OPTIONS: 835#ifdef notyet 836 case IP_RETOPTS: 837#endif 838 { 839 struct mbuf *m; 840 if (sopt->sopt_valsize > MLEN) { 841 error = EMSGSIZE; 842 break; 843 } 844 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 845 if (m == NULL) { 846 error = ENOBUFS; 847 break; 848 } 849 m->m_len = sopt->sopt_valsize; 850 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 851 m->m_len); 852 if (error) { 853 m_free(m); 854 break; 855 } 856 INP_LOCK(inp); 857 error = ip_pcbopts(inp, sopt->sopt_name, m); 858 INP_UNLOCK(inp); 859 return (error); 860 } 861 862 case IP_TOS: 863 case IP_TTL: 864 case IP_MINTTL: 865 case IP_RECVOPTS: 866 case IP_RECVRETOPTS: 867 case IP_RECVDSTADDR: 868 case IP_RECVTTL: 869 case IP_RECVIF: 870 case IP_FAITH: 871 case IP_ONESBCAST: 872 case IP_DONTFRAG: 873 error = sooptcopyin(sopt, &optval, sizeof optval, 874 sizeof optval); 875 if (error) 876 break; 877 878 switch (sopt->sopt_name) { 879 case IP_TOS: 880 inp->inp_ip_tos = optval; 881 break; 882 883 case IP_TTL: 884 inp->inp_ip_ttl = optval; 885 break; 886 887 case IP_MINTTL: 888 if (optval > 0 && optval <= MAXTTL) 889 inp->inp_ip_minttl = optval; 890 else 891 error = EINVAL; 892 break; 893 894#define OPTSET(bit) do { \ 895 INP_LOCK(inp); \ 896 if (optval) \ 897 inp->inp_flags |= bit; \ 898 else \ 899 inp->inp_flags &= ~bit; \ 900 INP_UNLOCK(inp); \ 901} while (0) 902 903 case IP_RECVOPTS: 904 OPTSET(INP_RECVOPTS); 905 break; 906 907 case IP_RECVRETOPTS: 908 OPTSET(INP_RECVRETOPTS); 909 break; 910 911 case IP_RECVDSTADDR: 912 OPTSET(INP_RECVDSTADDR); 913 break; 914 915 case IP_RECVTTL: 916 OPTSET(INP_RECVTTL); 917 break; 918 919 case IP_RECVIF: 920 OPTSET(INP_RECVIF); 921 break; 922 923 case IP_FAITH: 924 OPTSET(INP_FAITH); 925 break; 926 927 case IP_ONESBCAST: 928 OPTSET(INP_ONESBCAST); 929 break; 930 case IP_DONTFRAG: 931 OPTSET(INP_DONTFRAG); 932 break; 933 } 934 break; 935#undef OPTSET 936 937 case IP_MULTICAST_IF: 938 case IP_MULTICAST_VIF: 939 case IP_MULTICAST_TTL: 940 case IP_MULTICAST_LOOP: 941 case IP_ADD_MEMBERSHIP: 942 case IP_DROP_MEMBERSHIP: 943 error = ip_setmoptions(inp, sopt); 944 break; 945 946 case IP_PORTRANGE: 947 error = sooptcopyin(sopt, &optval, sizeof optval, 948 sizeof optval); 949 if (error) 950 break; 951 952 INP_LOCK(inp); 953 switch (optval) { 954 case IP_PORTRANGE_DEFAULT: 955 inp->inp_flags &= ~(INP_LOWPORT); 956 inp->inp_flags &= ~(INP_HIGHPORT); 957 break; 958 959 case IP_PORTRANGE_HIGH: 960 inp->inp_flags &= ~(INP_LOWPORT); 961 inp->inp_flags |= INP_HIGHPORT; 962 break; 963 964 case IP_PORTRANGE_LOW: 965 inp->inp_flags &= ~(INP_HIGHPORT); 966 inp->inp_flags |= INP_LOWPORT; 967 break; 968 969 default: 970 error = EINVAL; 971 break; 972 } 973 INP_UNLOCK(inp); 974 break; 975 976#if defined(IPSEC) || defined(FAST_IPSEC) 977 case IP_IPSEC_POLICY: 978 { 979 caddr_t req; 980 size_t len = 0; 981 int priv; 982 struct mbuf *m; 983 int optname; 984 985 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 986 break; 987 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 988 break; 989 priv = (sopt->sopt_td != NULL && 990 suser(sopt->sopt_td) != 0) ? 0 : 1; 991 req = mtod(m, caddr_t); 992 len = m->m_len; 993 optname = sopt->sopt_name; 994 error = ipsec4_set_policy(inp, optname, req, len, priv); 995 m_freem(m); 996 break; 997 } 998#endif /*IPSEC*/ 999 1000 default: 1001 error = ENOPROTOOPT; 1002 break; 1003 } 1004 break; 1005 1006 case SOPT_GET: 1007 switch (sopt->sopt_name) { 1008 case IP_OPTIONS: 1009 case IP_RETOPTS: 1010 if (inp->inp_options) 1011 error = sooptcopyout(sopt, 1012 mtod(inp->inp_options, 1013 char *), 1014 inp->inp_options->m_len); 1015 else 1016 sopt->sopt_valsize = 0; 1017 break; 1018 1019 case IP_TOS: 1020 case IP_TTL: 1021 case IP_MINTTL: 1022 case IP_RECVOPTS: 1023 case IP_RECVRETOPTS: 1024 case IP_RECVDSTADDR: 1025 case IP_RECVTTL: 1026 case IP_RECVIF: 1027 case IP_PORTRANGE: 1028 case IP_FAITH: 1029 case IP_ONESBCAST: 1030 case IP_DONTFRAG: 1031 switch (sopt->sopt_name) { 1032 1033 case IP_TOS: 1034 optval = inp->inp_ip_tos; 1035 break; 1036 1037 case IP_TTL: 1038 optval = inp->inp_ip_ttl; 1039 break; 1040 1041 case IP_MINTTL: 1042 optval = inp->inp_ip_minttl; 1043 break; 1044 1045#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1046 1047 case IP_RECVOPTS: 1048 optval = OPTBIT(INP_RECVOPTS); 1049 break; 1050 1051 case IP_RECVRETOPTS: 1052 optval = OPTBIT(INP_RECVRETOPTS); 1053 break; 1054 1055 case IP_RECVDSTADDR: 1056 optval = OPTBIT(INP_RECVDSTADDR); 1057 break; 1058 1059 case IP_RECVTTL: 1060 optval = OPTBIT(INP_RECVTTL); 1061 break; 1062 1063 case IP_RECVIF: 1064 optval = OPTBIT(INP_RECVIF); 1065 break; 1066 1067 case IP_PORTRANGE: 1068 if (inp->inp_flags & INP_HIGHPORT) 1069 optval = IP_PORTRANGE_HIGH; 1070 else if (inp->inp_flags & INP_LOWPORT) 1071 optval = IP_PORTRANGE_LOW; 1072 else 1073 optval = 0; 1074 break; 1075 1076 case IP_FAITH: 1077 optval = OPTBIT(INP_FAITH); 1078 break; 1079 1080 case IP_ONESBCAST: 1081 optval = OPTBIT(INP_ONESBCAST); 1082 break; 1083 case IP_DONTFRAG: 1084 optval = OPTBIT(INP_DONTFRAG); 1085 break; 1086 } 1087 error = sooptcopyout(sopt, &optval, sizeof optval); 1088 break; 1089 1090 case IP_MULTICAST_IF: 1091 case IP_MULTICAST_VIF: 1092 case IP_MULTICAST_TTL: 1093 case IP_MULTICAST_LOOP: 1094 case IP_ADD_MEMBERSHIP: 1095 case IP_DROP_MEMBERSHIP: 1096 error = ip_getmoptions(inp, sopt); 1097 break; 1098 1099#if defined(IPSEC) || defined(FAST_IPSEC) 1100 case IP_IPSEC_POLICY: 1101 { 1102 struct mbuf *m = NULL; 1103 caddr_t req = NULL; 1104 size_t len = 0; 1105 1106 if (m != 0) { 1107 req = mtod(m, caddr_t); 1108 len = m->m_len; 1109 } 1110 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1111 if (error == 0) 1112 error = soopt_mcopyout(sopt, m); /* XXX */ 1113 if (error == 0) 1114 m_freem(m); 1115 break; 1116 } 1117#endif /*IPSEC*/ 1118 1119 default: 1120 error = ENOPROTOOPT; 1121 break; 1122 } 1123 break; 1124 } 1125 return (error); 1126} 1127 1128/* 1129 * XXX 1130 * The whole multicast option thing needs to be re-thought. 1131 * Several of these options are equally applicable to non-multicast 1132 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1133 * standard option (IP_TTL). 1134 */ 1135 1136/* 1137 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1138 */ 1139static struct ifnet * 1140ip_multicast_if(a, ifindexp) 1141 struct in_addr *a; 1142 int *ifindexp; 1143{ 1144 int ifindex; 1145 struct ifnet *ifp; 1146 1147 if (ifindexp) 1148 *ifindexp = 0; 1149 if (ntohl(a->s_addr) >> 24 == 0) { 1150 ifindex = ntohl(a->s_addr) & 0xffffff; 1151 if (ifindex < 0 || if_index < ifindex) 1152 return NULL; 1153 ifp = ifnet_byindex(ifindex); 1154 if (ifindexp) 1155 *ifindexp = ifindex; 1156 } else { 1157 INADDR_TO_IFP(*a, ifp); 1158 } 1159 return ifp; 1160} 1161 1162/* 1163 * Given an inpcb, return its multicast options structure pointer. Accepts 1164 * an unlocked inpcb pointer, but will return it locked. May sleep. 1165 */ 1166static struct ip_moptions * 1167ip_findmoptions(struct inpcb *inp) 1168{ 1169 struct ip_moptions *imo; 1170 struct in_multi **immp; 1171 1172 INP_LOCK(inp); 1173 if (inp->inp_moptions != NULL) 1174 return (inp->inp_moptions); 1175 1176 INP_UNLOCK(inp); 1177 1178 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1179 immp = (struct in_multi **)malloc((sizeof(*immp) * IP_MIN_MEMBERSHIPS), 1180 M_IPMOPTS, M_WAITOK); 1181 1182 imo->imo_multicast_ifp = NULL; 1183 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1184 imo->imo_multicast_vif = -1; 1185 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1186 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1187 imo->imo_num_memberships = 0; 1188 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1189 imo->imo_membership = immp; 1190 1191 INP_LOCK(inp); 1192 if (inp->inp_moptions != NULL) { 1193 free(immp, M_IPMOPTS); 1194 free(imo, M_IPMOPTS); 1195 return (inp->inp_moptions); 1196 } 1197 inp->inp_moptions = imo; 1198 return (imo); 1199} 1200 1201/* 1202 * Set the IP multicast options in response to user setsockopt(). 1203 */ 1204static int 1205ip_setmoptions(struct inpcb *inp, struct sockopt *sopt) 1206{ 1207 int error = 0; 1208 int i; 1209 struct in_addr addr; 1210 struct ip_mreq mreq; 1211 struct ifnet *ifp; 1212 struct ip_moptions *imo; 1213 struct route ro; 1214 struct sockaddr_in *dst; 1215 int ifindex; 1216 int s; 1217 1218 switch (sopt->sopt_name) { 1219 /* store an index number for the vif you wanna use in the send */ 1220 case IP_MULTICAST_VIF: 1221 if (legal_vif_num == 0) { 1222 error = EOPNOTSUPP; 1223 break; 1224 } 1225 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1226 if (error) 1227 break; 1228 if (!legal_vif_num(i) && (i != -1)) { 1229 error = EINVAL; 1230 break; 1231 } 1232 imo = ip_findmoptions(inp); 1233 imo->imo_multicast_vif = i; 1234 INP_UNLOCK(inp); 1235 break; 1236 1237 case IP_MULTICAST_IF: 1238 /* 1239 * Select the interface for outgoing multicast packets. 1240 */ 1241 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1242 if (error) 1243 break; 1244 /* 1245 * INADDR_ANY is used to remove a previous selection. 1246 * When no interface is selected, a default one is 1247 * chosen every time a multicast packet is sent. 1248 */ 1249 imo = ip_findmoptions(inp); 1250 if (addr.s_addr == INADDR_ANY) { 1251 imo->imo_multicast_ifp = NULL; 1252 INP_UNLOCK(inp); 1253 break; 1254 } 1255 /* 1256 * The selected interface is identified by its local 1257 * IP address. Find the interface and confirm that 1258 * it supports multicasting. 1259 */ 1260 s = splimp(); 1261 ifp = ip_multicast_if(&addr, &ifindex); 1262 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1263 INP_UNLOCK(inp); 1264 splx(s); 1265 error = EADDRNOTAVAIL; 1266 break; 1267 } 1268 imo->imo_multicast_ifp = ifp; 1269 if (ifindex) 1270 imo->imo_multicast_addr = addr; 1271 else 1272 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1273 INP_UNLOCK(inp); 1274 splx(s); 1275 break; 1276 1277 case IP_MULTICAST_TTL: 1278 /* 1279 * Set the IP time-to-live for outgoing multicast packets. 1280 * The original multicast API required a char argument, 1281 * which is inconsistent with the rest of the socket API. 1282 * We allow either a char or an int. 1283 */ 1284 if (sopt->sopt_valsize == 1) { 1285 u_char ttl; 1286 error = sooptcopyin(sopt, &ttl, 1, 1); 1287 if (error) 1288 break; 1289 imo = ip_findmoptions(inp); 1290 imo->imo_multicast_ttl = ttl; 1291 INP_UNLOCK(inp); 1292 } else { 1293 u_int ttl; 1294 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1295 sizeof ttl); 1296 if (error) 1297 break; 1298 if (ttl > 255) 1299 error = EINVAL; 1300 else { 1301 imo = ip_findmoptions(inp); 1302 imo->imo_multicast_ttl = ttl; 1303 INP_UNLOCK(inp); 1304 } 1305 } 1306 break; 1307 1308 case IP_MULTICAST_LOOP: 1309 /* 1310 * Set the loopback flag for outgoing multicast packets. 1311 * Must be zero or one. The original multicast API required a 1312 * char argument, which is inconsistent with the rest 1313 * of the socket API. We allow either a char or an int. 1314 */ 1315 if (sopt->sopt_valsize == 1) { 1316 u_char loop; 1317 error = sooptcopyin(sopt, &loop, 1, 1); 1318 if (error) 1319 break; 1320 imo = ip_findmoptions(inp); 1321 imo->imo_multicast_loop = !!loop; 1322 INP_UNLOCK(inp); 1323 } else { 1324 u_int loop; 1325 error = sooptcopyin(sopt, &loop, sizeof loop, 1326 sizeof loop); 1327 if (error) 1328 break; 1329 imo = ip_findmoptions(inp); 1330 imo->imo_multicast_loop = !!loop; 1331 INP_UNLOCK(inp); 1332 } 1333 break; 1334 1335 case IP_ADD_MEMBERSHIP: 1336 /* 1337 * Add a multicast group membership. 1338 * Group must be a valid IP multicast address. 1339 */ 1340 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1341 if (error) 1342 break; 1343 1344 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1345 error = EINVAL; 1346 break; 1347 } 1348 s = splimp(); 1349 /* 1350 * If no interface address was provided, use the interface of 1351 * the route to the given multicast address. 1352 */ 1353 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1354 bzero((caddr_t)&ro, sizeof(ro)); 1355 dst = (struct sockaddr_in *)&ro.ro_dst; 1356 dst->sin_len = sizeof(*dst); 1357 dst->sin_family = AF_INET; 1358 dst->sin_addr = mreq.imr_multiaddr; 1359 rtalloc_ign(&ro, RTF_CLONING); 1360 if (ro.ro_rt == NULL) { 1361 error = EADDRNOTAVAIL; 1362 splx(s); 1363 break; 1364 } 1365 ifp = ro.ro_rt->rt_ifp; 1366 RTFREE(ro.ro_rt); 1367 } 1368 else { 1369 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1370 } 1371 1372 /* 1373 * See if we found an interface, and confirm that it 1374 * supports multicast. 1375 */ 1376 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1377 error = EADDRNOTAVAIL; 1378 splx(s); 1379 break; 1380 } 1381 /* 1382 * See if the membership already exists or if all the 1383 * membership slots are full. 1384 */ 1385 imo = ip_findmoptions(inp); 1386 for (i = 0; i < imo->imo_num_memberships; ++i) { 1387 if (imo->imo_membership[i]->inm_ifp == ifp && 1388 imo->imo_membership[i]->inm_addr.s_addr 1389 == mreq.imr_multiaddr.s_addr) 1390 break; 1391 } 1392 if (i < imo->imo_num_memberships) { 1393 INP_UNLOCK(inp); 1394 error = EADDRINUSE; 1395 splx(s); 1396 break; 1397 } 1398 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1399 struct in_multi **nmships, **omships; 1400 size_t newmax; 1401 /* 1402 * Resize the vector to next power-of-two minus 1. If the 1403 * size would exceed the maximum then we know we've really 1404 * run out of entries. Otherwise, we realloc() the vector 1405 * with the INP lock held to avoid introducing a race. 1406 */ 1407 nmships = NULL; 1408 omships = imo->imo_membership; 1409 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1410 if (newmax <= IP_MAX_MEMBERSHIPS) { 1411 nmships = (struct in_multi **)realloc(omships, 1412sizeof(*nmships) * newmax, M_IPMOPTS, M_NOWAIT); 1413 if (nmships != NULL) { 1414 imo->imo_membership = nmships; 1415 imo->imo_max_memberships = newmax; 1416 } 1417 } 1418 if (nmships == NULL) { 1419 INP_UNLOCK(inp); 1420 error = ETOOMANYREFS; 1421 splx(s); 1422 break; 1423 } 1424 } 1425 /* 1426 * Everything looks good; add a new record to the multicast 1427 * address list for the given interface. 1428 */ 1429 if ((imo->imo_membership[i] = 1430 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1431 INP_UNLOCK(inp); 1432 error = ENOBUFS; 1433 splx(s); 1434 break; 1435 } 1436 ++imo->imo_num_memberships; 1437 INP_UNLOCK(inp); 1438 splx(s); 1439 break; 1440 1441 case IP_DROP_MEMBERSHIP: 1442 /* 1443 * Drop a multicast group membership. 1444 * Group must be a valid IP multicast address. 1445 */ 1446 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1447 if (error) 1448 break; 1449 1450 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1451 error = EINVAL; 1452 break; 1453 } 1454 1455 s = splimp(); 1456 /* 1457 * If an interface address was specified, get a pointer 1458 * to its ifnet structure. 1459 */ 1460 if (mreq.imr_interface.s_addr == INADDR_ANY) 1461 ifp = NULL; 1462 else { 1463 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1464 if (ifp == NULL) { 1465 error = EADDRNOTAVAIL; 1466 splx(s); 1467 break; 1468 } 1469 } 1470 /* 1471 * Find the membership in the membership array. 1472 */ 1473 imo = ip_findmoptions(inp); 1474 for (i = 0; i < imo->imo_num_memberships; ++i) { 1475 if ((ifp == NULL || 1476 imo->imo_membership[i]->inm_ifp == ifp) && 1477 imo->imo_membership[i]->inm_addr.s_addr == 1478 mreq.imr_multiaddr.s_addr) 1479 break; 1480 } 1481 if (i == imo->imo_num_memberships) { 1482 INP_UNLOCK(inp); 1483 error = EADDRNOTAVAIL; 1484 splx(s); 1485 break; 1486 } 1487 /* 1488 * Give up the multicast address record to which the 1489 * membership points. 1490 */ 1491 in_delmulti(imo->imo_membership[i]); 1492 /* 1493 * Remove the gap in the membership array. 1494 */ 1495 for (++i; i < imo->imo_num_memberships; ++i) 1496 imo->imo_membership[i-1] = imo->imo_membership[i]; 1497 --imo->imo_num_memberships; 1498 INP_UNLOCK(inp); 1499 splx(s); 1500 break; 1501 1502 default: 1503 error = EOPNOTSUPP; 1504 break; 1505 } 1506 1507 return (error); 1508} 1509 1510/* 1511 * Return the IP multicast options in response to user getsockopt(). 1512 */ 1513static int 1514ip_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1515{ 1516 struct ip_moptions *imo; 1517 struct in_addr addr; 1518 struct in_ifaddr *ia; 1519 int error, optval; 1520 u_char coptval; 1521 1522 INP_LOCK(inp); 1523 imo = inp->inp_moptions; 1524 1525 error = 0; 1526 switch (sopt->sopt_name) { 1527 case IP_MULTICAST_VIF: 1528 if (imo != NULL) 1529 optval = imo->imo_multicast_vif; 1530 else 1531 optval = -1; 1532 INP_UNLOCK(inp); 1533 error = sooptcopyout(sopt, &optval, sizeof optval); 1534 break; 1535 1536 case IP_MULTICAST_IF: 1537 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1538 addr.s_addr = INADDR_ANY; 1539 else if (imo->imo_multicast_addr.s_addr) { 1540 /* return the value user has set */ 1541 addr = imo->imo_multicast_addr; 1542 } else { 1543 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1544 addr.s_addr = (ia == NULL) ? INADDR_ANY 1545 : IA_SIN(ia)->sin_addr.s_addr; 1546 } 1547 INP_UNLOCK(inp); 1548 error = sooptcopyout(sopt, &addr, sizeof addr); 1549 break; 1550 1551 case IP_MULTICAST_TTL: 1552 if (imo == 0) 1553 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1554 else 1555 optval = coptval = imo->imo_multicast_ttl; 1556 INP_UNLOCK(inp); 1557 if (sopt->sopt_valsize == 1) 1558 error = sooptcopyout(sopt, &coptval, 1); 1559 else 1560 error = sooptcopyout(sopt, &optval, sizeof optval); 1561 break; 1562 1563 case IP_MULTICAST_LOOP: 1564 if (imo == 0) 1565 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1566 else 1567 optval = coptval = imo->imo_multicast_loop; 1568 INP_UNLOCK(inp); 1569 if (sopt->sopt_valsize == 1) 1570 error = sooptcopyout(sopt, &coptval, 1); 1571 else 1572 error = sooptcopyout(sopt, &optval, sizeof optval); 1573 break; 1574 1575 default: 1576 INP_UNLOCK(inp); 1577 error = ENOPROTOOPT; 1578 break; 1579 } 1580 INP_UNLOCK_ASSERT(inp); 1581 1582 return (error); 1583} 1584 1585/* 1586 * Discard the IP multicast options. 1587 */ 1588void 1589ip_freemoptions(imo) 1590 register struct ip_moptions *imo; 1591{ 1592 register int i; 1593 1594 if (imo != NULL) { 1595 for (i = 0; i < imo->imo_num_memberships; ++i) 1596 in_delmulti(imo->imo_membership[i]); 1597 free(imo->imo_membership, M_IPMOPTS); 1598 free(imo, M_IPMOPTS); 1599 } 1600} 1601 1602/* 1603 * Routine called from ip_output() to loop back a copy of an IP multicast 1604 * packet to the input queue of a specified interface. Note that this 1605 * calls the output routine of the loopback "driver", but with an interface 1606 * pointer that might NOT be a loopback interface -- evil, but easier than 1607 * replicating that code here. 1608 */ 1609static void 1610ip_mloopback(ifp, m, dst, hlen) 1611 struct ifnet *ifp; 1612 register struct mbuf *m; 1613 register struct sockaddr_in *dst; 1614 int hlen; 1615{ 1616 register struct ip *ip; 1617 struct mbuf *copym; 1618 1619 copym = m_copy(m, 0, M_COPYALL); 1620 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1621 copym = m_pullup(copym, hlen); 1622 if (copym != NULL) { 1623 /* If needed, compute the checksum and mark it as valid. */ 1624 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1625 in_delayed_cksum(copym); 1626 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1627 copym->m_pkthdr.csum_flags |= 1628 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1629 copym->m_pkthdr.csum_data = 0xffff; 1630 } 1631 /* 1632 * We don't bother to fragment if the IP length is greater 1633 * than the interface's MTU. Can this possibly matter? 1634 */ 1635 ip = mtod(copym, struct ip *); 1636 ip->ip_len = htons(ip->ip_len); 1637 ip->ip_off = htons(ip->ip_off); 1638 ip->ip_sum = 0; 1639 ip->ip_sum = in_cksum(copym, hlen); 1640 /* 1641 * NB: 1642 * It's not clear whether there are any lingering 1643 * reentrancy problems in other areas which might 1644 * be exposed by using ip_input directly (in 1645 * particular, everything which modifies the packet 1646 * in-place). Yet another option is using the 1647 * protosw directly to deliver the looped back 1648 * packet. For the moment, we'll err on the side 1649 * of safety by using if_simloop(). 1650 */ 1651#if 1 /* XXX */ 1652 if (dst->sin_family != AF_INET) { 1653 printf("ip_mloopback: bad address family %d\n", 1654 dst->sin_family); 1655 dst->sin_family = AF_INET; 1656 } 1657#endif 1658 1659#ifdef notdef 1660 copym->m_pkthdr.rcvif = ifp; 1661 ip_input(copym); 1662#else 1663 if_simloop(ifp, copym, dst->sin_family, 0); 1664#endif 1665 } 1666} 1667