ip_output.c revision 161380
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30 * $FreeBSD: head/sys/netinet/ip_output.c 161380 2006-08-17 00:37:03Z julian $ 31 */ 32 33#include "opt_ipfw.h" 34#include "opt_ipsec.h" 35#include "opt_mac.h" 36#include "opt_mbuf_stress_test.h" 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/kernel.h> 41#include <sys/mac.h> 42#include <sys/malloc.h> 43#include <sys/mbuf.h> 44#include <sys/protosw.h> 45#include <sys/socket.h> 46#include <sys/socketvar.h> 47#include <sys/sysctl.h> 48 49#include <net/if.h> 50#include <net/netisr.h> 51#include <net/pfil.h> 52#include <net/route.h> 53 54#include <netinet/in.h> 55#include <netinet/in_systm.h> 56#include <netinet/ip.h> 57#include <netinet/in_pcb.h> 58#include <netinet/in_var.h> 59#include <netinet/ip_var.h> 60#include <netinet/ip_options.h> 61 62#if defined(IPSEC) || defined(FAST_IPSEC) 63#include <netinet/ip_ipsec.h> 64#ifdef IPSEC 65#include <netinet6/ipsec.h> 66#endif 67#ifdef FAST_IPSEC 68#include <netipsec/ipsec.h> 69#endif 70#endif /*IPSEC*/ 71 72#include <machine/in_cksum.h> 73 74static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 75 76#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 77 x, (ntohl(a.s_addr)>>24)&0xFF,\ 78 (ntohl(a.s_addr)>>16)&0xFF,\ 79 (ntohl(a.s_addr)>>8)&0xFF,\ 80 (ntohl(a.s_addr))&0xFF, y); 81 82u_short ip_id; 83 84#ifdef MBUF_STRESS_TEST 85int mbuf_frag_size = 0; 86SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 87 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 88#endif 89 90static struct ifnet *ip_multicast_if(struct in_addr *, int *); 91static void ip_mloopback 92 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 93static int ip_getmoptions(struct inpcb *, struct sockopt *); 94static int ip_setmoptions(struct inpcb *, struct sockopt *); 95 96 97extern struct protosw inetsw[]; 98 99/* 100 * IP output. The packet in mbuf chain m contains a skeletal IP 101 * header (with len, off, ttl, proto, tos, src, dst). 102 * The mbuf chain containing the packet will be freed. 103 * The mbuf opt, if present, will not be freed. 104 * In the IP forwarding case, the packet will arrive with options already 105 * inserted, so must have a NULL opt pointer. 106 */ 107int 108ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, 109 int flags, struct ip_moptions *imo, struct inpcb *inp) 110{ 111 struct ip *ip; 112 struct ifnet *ifp = NULL; /* keep compiler happy */ 113 struct mbuf *m0; 114 int hlen = sizeof (struct ip); 115 int len, error = 0; 116 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 117 struct in_ifaddr *ia = NULL; 118 int isbroadcast, sw_csum; 119 struct route iproute; 120 struct in_addr odst; 121#ifdef IPFIREWALL_FORWARD 122 struct m_tag *fwd_tag = NULL; 123#endif 124 M_ASSERTPKTHDR(m); 125 126 if (ro == NULL) { 127 ro = &iproute; 128 bzero(ro, sizeof (*ro)); 129 } 130 131 if (inp != NULL) 132 INP_LOCK_ASSERT(inp); 133 134 if (opt) { 135 len = 0; 136 m = ip_insertoptions(m, opt, &len); 137 if (len != 0) 138 hlen = len; 139 } 140 ip = mtod(m, struct ip *); 141 142 /* 143 * Fill in IP header. If we are not allowing fragmentation, 144 * then the ip_id field is meaningless, but we don't set it 145 * to zero. Doing so causes various problems when devices along 146 * the path (routers, load balancers, firewalls, etc.) illegally 147 * disable DF on our packet. Note that a 16-bit counter 148 * will wrap around in less than 10 seconds at 100 Mbit/s on a 149 * medium with MTU 1500. See Steven M. Bellovin, "A Technique 150 * for Counting NATted Hosts", Proc. IMW'02, available at 151 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>. 152 */ 153 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 154 ip->ip_v = IPVERSION; 155 ip->ip_hl = hlen >> 2; 156 ip->ip_id = ip_newid(); 157 ipstat.ips_localout++; 158 } else { 159 hlen = ip->ip_hl << 2; 160 } 161 162 dst = (struct sockaddr_in *)&ro->ro_dst; 163again: 164 /* 165 * If there is a cached route, 166 * check that it is to the same destination 167 * and is still up. If not, free it and try again. 168 * The address family should also be checked in case of sharing the 169 * cache with IPv6. 170 */ 171 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 172 dst->sin_family != AF_INET || 173 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 174 RTFREE(ro->ro_rt); 175 ro->ro_rt = (struct rtentry *)0; 176 } 177#ifdef IPFIREWALL_FORWARD 178 if (ro->ro_rt == NULL && fwd_tag == NULL) { 179#else 180 if (ro->ro_rt == NULL) { 181#endif 182 bzero(dst, sizeof(*dst)); 183 dst->sin_family = AF_INET; 184 dst->sin_len = sizeof(*dst); 185 dst->sin_addr = ip->ip_dst; 186 } 187 /* 188 * If routing to interface only, 189 * short circuit routing lookup. 190 */ 191 if (flags & IP_ROUTETOIF) { 192 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 193 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 194 ipstat.ips_noroute++; 195 error = ENETUNREACH; 196 goto bad; 197 } 198 ifp = ia->ia_ifp; 199 ip->ip_ttl = 1; 200 isbroadcast = in_broadcast(dst->sin_addr, ifp); 201 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 202 imo != NULL && imo->imo_multicast_ifp != NULL) { 203 /* 204 * Bypass the normal routing lookup for multicast 205 * packets if the interface is specified. 206 */ 207 ifp = imo->imo_multicast_ifp; 208 IFP_TO_IA(ifp, ia); 209 isbroadcast = 0; /* fool gcc */ 210 } else { 211 /* 212 * We want to do any cloning requested by the link layer, 213 * as this is probably required in all cases for correct 214 * operation (as it is for ARP). 215 */ 216 if (ro->ro_rt == NULL) 217 rtalloc_ign(ro, 0); 218 if (ro->ro_rt == NULL) { 219 ipstat.ips_noroute++; 220 error = EHOSTUNREACH; 221 goto bad; 222 } 223 ia = ifatoia(ro->ro_rt->rt_ifa); 224 ifp = ro->ro_rt->rt_ifp; 225 ro->ro_rt->rt_rmx.rmx_pksent++; 226 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 227 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 228 if (ro->ro_rt->rt_flags & RTF_HOST) 229 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 230 else 231 isbroadcast = in_broadcast(dst->sin_addr, ifp); 232 } 233 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 234 struct in_multi *inm; 235 236 m->m_flags |= M_MCAST; 237 /* 238 * IP destination address is multicast. Make sure "dst" 239 * still points to the address in "ro". (It may have been 240 * changed to point to a gateway address, above.) 241 */ 242 dst = (struct sockaddr_in *)&ro->ro_dst; 243 /* 244 * See if the caller provided any multicast options 245 */ 246 if (imo != NULL) { 247 ip->ip_ttl = imo->imo_multicast_ttl; 248 if (imo->imo_multicast_vif != -1) 249 ip->ip_src.s_addr = 250 ip_mcast_src ? 251 ip_mcast_src(imo->imo_multicast_vif) : 252 INADDR_ANY; 253 } else 254 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 255 /* 256 * Confirm that the outgoing interface supports multicast. 257 */ 258 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 259 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 260 ipstat.ips_noroute++; 261 error = ENETUNREACH; 262 goto bad; 263 } 264 } 265 /* 266 * If source address not specified yet, use address 267 * of outgoing interface. 268 */ 269 if (ip->ip_src.s_addr == INADDR_ANY) { 270 /* Interface may have no addresses. */ 271 if (ia != NULL) 272 ip->ip_src = IA_SIN(ia)->sin_addr; 273 } 274 275 IN_MULTI_LOCK(); 276 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 277 if (inm != NULL && 278 (imo == NULL || imo->imo_multicast_loop)) { 279 IN_MULTI_UNLOCK(); 280 /* 281 * If we belong to the destination multicast group 282 * on the outgoing interface, and the caller did not 283 * forbid loopback, loop back a copy. 284 */ 285 ip_mloopback(ifp, m, dst, hlen); 286 } 287 else { 288 IN_MULTI_UNLOCK(); 289 /* 290 * If we are acting as a multicast router, perform 291 * multicast forwarding as if the packet had just 292 * arrived on the interface to which we are about 293 * to send. The multicast forwarding function 294 * recursively calls this function, using the 295 * IP_FORWARDING flag to prevent infinite recursion. 296 * 297 * Multicasts that are looped back by ip_mloopback(), 298 * above, will be forwarded by the ip_input() routine, 299 * if necessary. 300 */ 301 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 302 /* 303 * If rsvp daemon is not running, do not 304 * set ip_moptions. This ensures that the packet 305 * is multicast and not just sent down one link 306 * as prescribed by rsvpd. 307 */ 308 if (!rsvp_on) 309 imo = NULL; 310 if (ip_mforward && 311 ip_mforward(ip, ifp, m, imo) != 0) { 312 m_freem(m); 313 goto done; 314 } 315 } 316 } 317 318 /* 319 * Multicasts with a time-to-live of zero may be looped- 320 * back, above, but must not be transmitted on a network. 321 * Also, multicasts addressed to the loopback interface 322 * are not sent -- the above call to ip_mloopback() will 323 * loop back a copy if this host actually belongs to the 324 * destination group on the loopback interface. 325 */ 326 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 327 m_freem(m); 328 goto done; 329 } 330 331 goto sendit; 332 } 333#ifndef notdef 334 /* 335 * If the source address is not specified yet, use the address 336 * of the outoing interface. 337 */ 338 if (ip->ip_src.s_addr == INADDR_ANY) { 339 /* Interface may have no addresses. */ 340 if (ia != NULL) { 341 ip->ip_src = IA_SIN(ia)->sin_addr; 342 } 343 } 344#endif /* notdef */ 345 /* 346 * Verify that we have any chance at all of being able to queue the 347 * packet or packet fragments, unless ALTQ is enabled on the given 348 * interface in which case packetdrop should be done by queueing. 349 */ 350#ifdef ALTQ 351 if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) && 352 ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 353 ifp->if_snd.ifq_maxlen)) 354#else 355 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 356 ifp->if_snd.ifq_maxlen) 357#endif /* ALTQ */ 358 { 359 error = ENOBUFS; 360 ipstat.ips_odropped++; 361 ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1); 362 goto bad; 363 } 364 365 /* 366 * Look for broadcast address and 367 * verify user is allowed to send 368 * such a packet. 369 */ 370 if (isbroadcast) { 371 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 372 error = EADDRNOTAVAIL; 373 goto bad; 374 } 375 if ((flags & IP_ALLOWBROADCAST) == 0) { 376 error = EACCES; 377 goto bad; 378 } 379 /* don't allow broadcast messages to be fragmented */ 380 if (ip->ip_len > ifp->if_mtu) { 381 error = EMSGSIZE; 382 goto bad; 383 } 384 if (flags & IP_SENDONES) 385 ip->ip_dst.s_addr = INADDR_BROADCAST; 386 m->m_flags |= M_BCAST; 387 } else { 388 m->m_flags &= ~M_BCAST; 389 } 390 391sendit: 392#if defined(IPSEC) || defined(FAST_IPSEC) 393 switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) { 394 case 1: 395 goto bad; 396 case -1: 397 goto done; 398 case 0: 399 default: 400 break; /* Continue with packet processing. */ 401 } 402 /* Update variables that are affected by ipsec4_output(). */ 403 ip = mtod(m, struct ip *); 404 hlen = ip->ip_hl << 2; 405#endif /* IPSEC */ 406 407 /* Jump over all PFIL processing if hooks are not active. */ 408 if (!PFIL_HOOKED(&inet_pfil_hook)) 409 goto passout; 410 411 /* Run through list of hooks for output packets. */ 412 odst.s_addr = ip->ip_dst.s_addr; 413 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp); 414 if (error != 0 || m == NULL) 415 goto done; 416 417 ip = mtod(m, struct ip *); 418 419 /* See if destination IP address was changed by packet filter. */ 420 if (odst.s_addr != ip->ip_dst.s_addr) { 421 m->m_flags |= M_SKIP_FIREWALL; 422 /* If destination is now ourself drop to ip_input(). */ 423 if (in_localip(ip->ip_dst)) { 424 m->m_flags |= M_FASTFWD_OURS; 425 if (m->m_pkthdr.rcvif == NULL) 426 m->m_pkthdr.rcvif = loif; 427 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 428 m->m_pkthdr.csum_flags |= 429 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 430 m->m_pkthdr.csum_data = 0xffff; 431 } 432 m->m_pkthdr.csum_flags |= 433 CSUM_IP_CHECKED | CSUM_IP_VALID; 434 435 error = netisr_queue(NETISR_IP, m); 436 goto done; 437 } else 438 goto again; /* Redo the routing table lookup. */ 439 } 440 441#ifdef IPFIREWALL_FORWARD 442 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 443 if (m->m_flags & M_FASTFWD_OURS) { 444 if (m->m_pkthdr.rcvif == NULL) 445 m->m_pkthdr.rcvif = loif; 446 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 447 m->m_pkthdr.csum_flags |= 448 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 449 m->m_pkthdr.csum_data = 0xffff; 450 } 451 m->m_pkthdr.csum_flags |= 452 CSUM_IP_CHECKED | CSUM_IP_VALID; 453 454 error = netisr_queue(NETISR_IP, m); 455 goto done; 456 } 457 /* Or forward to some other address? */ 458 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 459 if (fwd_tag) { 460 dst = (struct sockaddr_in *)&ro->ro_dst; 461 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 462 m->m_flags |= M_SKIP_FIREWALL; 463 m_tag_delete(m, fwd_tag); 464 goto again; 465 } 466#endif /* IPFIREWALL_FORWARD */ 467 468passout: 469 /* 127/8 must not appear on wire - RFC1122. */ 470 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 471 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 472 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 473 ipstat.ips_badaddr++; 474 error = EADDRNOTAVAIL; 475 goto bad; 476 } 477 } 478 479 m->m_pkthdr.csum_flags |= CSUM_IP; 480 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 481 if (sw_csum & CSUM_DELAY_DATA) { 482 in_delayed_cksum(m); 483 sw_csum &= ~CSUM_DELAY_DATA; 484 } 485 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 486 487 /* 488 * If small enough for interface, or the interface will take 489 * care of the fragmentation for us, can just send directly. 490 */ 491 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT && 492 ((ip->ip_off & IP_DF) == 0))) { 493 ip->ip_len = htons(ip->ip_len); 494 ip->ip_off = htons(ip->ip_off); 495 ip->ip_sum = 0; 496 if (sw_csum & CSUM_DELAY_IP) 497 ip->ip_sum = in_cksum(m, hlen); 498 499 /* Record statistics for this interface address. */ 500 if (!(flags & IP_FORWARDING) && ia) { 501 ia->ia_ifa.if_opackets++; 502 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 503 } 504#ifdef IPSEC 505 /* clean ipsec history once it goes out of the node */ 506 ipsec_delaux(m); 507#endif 508#ifdef MBUF_STRESS_TEST 509 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 510 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 511#endif 512 /* 513 * Reset layer specific mbuf flags 514 * to avoid confusing lower layers. 515 */ 516 m->m_flags &= ~(M_PROTOFLAGS); 517 518 error = (*ifp->if_output)(ifp, m, 519 (struct sockaddr *)dst, ro->ro_rt); 520 goto done; 521 } 522 523 if (ip->ip_off & IP_DF) { 524 error = EMSGSIZE; 525 /* 526 * This case can happen if the user changed the MTU 527 * of an interface after enabling IP on it. Because 528 * most netifs don't keep track of routes pointing to 529 * them, there is no way for one to update all its 530 * routes when the MTU is changed. 531 */ 532 if (ro != NULL && 533 (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 534 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 535 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 536 } 537 ipstat.ips_cantfrag++; 538 goto bad; 539 } 540 541 /* 542 * Too large for interface; fragment if possible. If successful, 543 * on return, m will point to a list of packets to be sent. 544 */ 545 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); 546 if (error) 547 goto bad; 548 for (; m; m = m0) { 549 m0 = m->m_nextpkt; 550 m->m_nextpkt = 0; 551#ifdef IPSEC 552 /* clean ipsec history once it goes out of the node */ 553 ipsec_delaux(m); 554#endif 555 if (error == 0) { 556 /* Record statistics for this interface address. */ 557 if (ia != NULL) { 558 ia->ia_ifa.if_opackets++; 559 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 560 } 561 /* 562 * Reset layer specific mbuf flags 563 * to avoid confusing upper layers. 564 */ 565 m->m_flags &= ~(M_PROTOFLAGS); 566 567 error = (*ifp->if_output)(ifp, m, 568 (struct sockaddr *)dst, ro->ro_rt); 569 } else 570 m_freem(m); 571 } 572 573 if (error == 0) 574 ipstat.ips_fragmented++; 575 576done: 577 if (ro == &iproute && ro->ro_rt) { 578 RTFREE(ro->ro_rt); 579 } 580 return (error); 581bad: 582 m_freem(m); 583 goto done; 584} 585 586/* 587 * Create a chain of fragments which fit the given mtu. m_frag points to the 588 * mbuf to be fragmented; on return it points to the chain with the fragments. 589 * Return 0 if no error. If error, m_frag may contain a partially built 590 * chain of fragments that should be freed by the caller. 591 * 592 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 593 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 594 */ 595int 596ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 597 u_long if_hwassist_flags, int sw_csum) 598{ 599 int error = 0; 600 int hlen = ip->ip_hl << 2; 601 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 602 int off; 603 struct mbuf *m0 = *m_frag; /* the original packet */ 604 int firstlen; 605 struct mbuf **mnext; 606 int nfrags; 607 608 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 609 ipstat.ips_cantfrag++; 610 return EMSGSIZE; 611 } 612 613 /* 614 * Must be able to put at least 8 bytes per fragment. 615 */ 616 if (len < 8) 617 return EMSGSIZE; 618 619 /* 620 * If the interface will not calculate checksums on 621 * fragmented packets, then do it here. 622 */ 623 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 624 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 625 in_delayed_cksum(m0); 626 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 627 } 628 629 if (len > PAGE_SIZE) { 630 /* 631 * Fragment large datagrams such that each segment 632 * contains a multiple of PAGE_SIZE amount of data, 633 * plus headers. This enables a receiver to perform 634 * page-flipping zero-copy optimizations. 635 * 636 * XXX When does this help given that sender and receiver 637 * could have different page sizes, and also mtu could 638 * be less than the receiver's page size ? 639 */ 640 int newlen; 641 struct mbuf *m; 642 643 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 644 off += m->m_len; 645 646 /* 647 * firstlen (off - hlen) must be aligned on an 648 * 8-byte boundary 649 */ 650 if (off < hlen) 651 goto smart_frag_failure; 652 off = ((off - hlen) & ~7) + hlen; 653 newlen = (~PAGE_MASK) & mtu; 654 if ((newlen + sizeof (struct ip)) > mtu) { 655 /* we failed, go back the default */ 656smart_frag_failure: 657 newlen = len; 658 off = hlen + len; 659 } 660 len = newlen; 661 662 } else { 663 off = hlen + len; 664 } 665 666 firstlen = off - hlen; 667 mnext = &m0->m_nextpkt; /* pointer to next packet */ 668 669 /* 670 * Loop through length of segment after first fragment, 671 * make new header and copy data of each part and link onto chain. 672 * Here, m0 is the original packet, m is the fragment being created. 673 * The fragments are linked off the m_nextpkt of the original 674 * packet, which after processing serves as the first fragment. 675 */ 676 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 677 struct ip *mhip; /* ip header on the fragment */ 678 struct mbuf *m; 679 int mhlen = sizeof (struct ip); 680 681 MGETHDR(m, M_DONTWAIT, MT_DATA); 682 if (m == NULL) { 683 error = ENOBUFS; 684 ipstat.ips_odropped++; 685 goto done; 686 } 687 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 688 /* 689 * In the first mbuf, leave room for the link header, then 690 * copy the original IP header including options. The payload 691 * goes into an additional mbuf chain returned by m_copy(). 692 */ 693 m->m_data += max_linkhdr; 694 mhip = mtod(m, struct ip *); 695 *mhip = *ip; 696 if (hlen > sizeof (struct ip)) { 697 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 698 mhip->ip_v = IPVERSION; 699 mhip->ip_hl = mhlen >> 2; 700 } 701 m->m_len = mhlen; 702 /* XXX do we need to add ip->ip_off below ? */ 703 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 704 if (off + len >= ip->ip_len) { /* last fragment */ 705 len = ip->ip_len - off; 706 m->m_flags |= M_LASTFRAG; 707 } else 708 mhip->ip_off |= IP_MF; 709 mhip->ip_len = htons((u_short)(len + mhlen)); 710 m->m_next = m_copy(m0, off, len); 711 if (m->m_next == NULL) { /* copy failed */ 712 m_free(m); 713 error = ENOBUFS; /* ??? */ 714 ipstat.ips_odropped++; 715 goto done; 716 } 717 m->m_pkthdr.len = mhlen + len; 718 m->m_pkthdr.rcvif = NULL; 719#ifdef MAC 720 mac_create_fragment(m0, m); 721#endif 722 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 723 mhip->ip_off = htons(mhip->ip_off); 724 mhip->ip_sum = 0; 725 if (sw_csum & CSUM_DELAY_IP) 726 mhip->ip_sum = in_cksum(m, mhlen); 727 *mnext = m; 728 mnext = &m->m_nextpkt; 729 } 730 ipstat.ips_ofragments += nfrags; 731 732 /* set first marker for fragment chain */ 733 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 734 m0->m_pkthdr.csum_data = nfrags; 735 736 /* 737 * Update first fragment by trimming what's been copied out 738 * and updating header. 739 */ 740 m_adj(m0, hlen + firstlen - ip->ip_len); 741 m0->m_pkthdr.len = hlen + firstlen; 742 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 743 ip->ip_off |= IP_MF; 744 ip->ip_off = htons(ip->ip_off); 745 ip->ip_sum = 0; 746 if (sw_csum & CSUM_DELAY_IP) 747 ip->ip_sum = in_cksum(m0, hlen); 748 749done: 750 *m_frag = m0; 751 return error; 752} 753 754void 755in_delayed_cksum(struct mbuf *m) 756{ 757 struct ip *ip; 758 u_short csum, offset; 759 760 ip = mtod(m, struct ip *); 761 offset = ip->ip_hl << 2 ; 762 csum = in_cksum_skip(m, ip->ip_len, offset); 763 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 764 csum = 0xffff; 765 offset += m->m_pkthdr.csum_data; /* checksum offset */ 766 767 if (offset + sizeof(u_short) > m->m_len) { 768 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 769 m->m_len, offset, ip->ip_p); 770 /* 771 * XXX 772 * this shouldn't happen, but if it does, the 773 * correct behavior may be to insert the checksum 774 * in the appropriate next mbuf in the chain. 775 */ 776 return; 777 } 778 *(u_short *)(m->m_data + offset) = csum; 779} 780 781/* 782 * IP socket option processing. 783 */ 784int 785ip_ctloutput(so, sopt) 786 struct socket *so; 787 struct sockopt *sopt; 788{ 789 struct inpcb *inp = sotoinpcb(so); 790 int error, optval; 791 792 error = optval = 0; 793 if (sopt->sopt_level != IPPROTO_IP) { 794 return (EINVAL); 795 } 796 797 switch (sopt->sopt_dir) { 798 case SOPT_SET: 799 switch (sopt->sopt_name) { 800 case IP_OPTIONS: 801#ifdef notyet 802 case IP_RETOPTS: 803#endif 804 { 805 struct mbuf *m; 806 if (sopt->sopt_valsize > MLEN) { 807 error = EMSGSIZE; 808 break; 809 } 810 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA); 811 if (m == NULL) { 812 error = ENOBUFS; 813 break; 814 } 815 m->m_len = sopt->sopt_valsize; 816 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 817 m->m_len); 818 if (error) { 819 m_free(m); 820 break; 821 } 822 INP_LOCK(inp); 823 error = ip_pcbopts(inp, sopt->sopt_name, m); 824 INP_UNLOCK(inp); 825 return (error); 826 } 827 828 case IP_TOS: 829 case IP_TTL: 830 case IP_MINTTL: 831 case IP_RECVOPTS: 832 case IP_RECVRETOPTS: 833 case IP_RECVDSTADDR: 834 case IP_RECVTTL: 835 case IP_RECVIF: 836 case IP_FAITH: 837 case IP_ONESBCAST: 838 case IP_DONTFRAG: 839 error = sooptcopyin(sopt, &optval, sizeof optval, 840 sizeof optval); 841 if (error) 842 break; 843 844 switch (sopt->sopt_name) { 845 case IP_TOS: 846 inp->inp_ip_tos = optval; 847 break; 848 849 case IP_TTL: 850 inp->inp_ip_ttl = optval; 851 break; 852 853 case IP_MINTTL: 854 if (optval > 0 && optval <= MAXTTL) 855 inp->inp_ip_minttl = optval; 856 else 857 error = EINVAL; 858 break; 859 860#define OPTSET(bit) do { \ 861 INP_LOCK(inp); \ 862 if (optval) \ 863 inp->inp_flags |= bit; \ 864 else \ 865 inp->inp_flags &= ~bit; \ 866 INP_UNLOCK(inp); \ 867} while (0) 868 869 case IP_RECVOPTS: 870 OPTSET(INP_RECVOPTS); 871 break; 872 873 case IP_RECVRETOPTS: 874 OPTSET(INP_RECVRETOPTS); 875 break; 876 877 case IP_RECVDSTADDR: 878 OPTSET(INP_RECVDSTADDR); 879 break; 880 881 case IP_RECVTTL: 882 OPTSET(INP_RECVTTL); 883 break; 884 885 case IP_RECVIF: 886 OPTSET(INP_RECVIF); 887 break; 888 889 case IP_FAITH: 890 OPTSET(INP_FAITH); 891 break; 892 893 case IP_ONESBCAST: 894 OPTSET(INP_ONESBCAST); 895 break; 896 case IP_DONTFRAG: 897 OPTSET(INP_DONTFRAG); 898 break; 899 } 900 break; 901#undef OPTSET 902 903 case IP_MULTICAST_IF: 904 case IP_MULTICAST_VIF: 905 case IP_MULTICAST_TTL: 906 case IP_MULTICAST_LOOP: 907 case IP_ADD_MEMBERSHIP: 908 case IP_DROP_MEMBERSHIP: 909 error = ip_setmoptions(inp, sopt); 910 break; 911 912 case IP_PORTRANGE: 913 error = sooptcopyin(sopt, &optval, sizeof optval, 914 sizeof optval); 915 if (error) 916 break; 917 918 INP_LOCK(inp); 919 switch (optval) { 920 case IP_PORTRANGE_DEFAULT: 921 inp->inp_flags &= ~(INP_LOWPORT); 922 inp->inp_flags &= ~(INP_HIGHPORT); 923 break; 924 925 case IP_PORTRANGE_HIGH: 926 inp->inp_flags &= ~(INP_LOWPORT); 927 inp->inp_flags |= INP_HIGHPORT; 928 break; 929 930 case IP_PORTRANGE_LOW: 931 inp->inp_flags &= ~(INP_HIGHPORT); 932 inp->inp_flags |= INP_LOWPORT; 933 break; 934 935 default: 936 error = EINVAL; 937 break; 938 } 939 INP_UNLOCK(inp); 940 break; 941 942#if defined(IPSEC) || defined(FAST_IPSEC) 943 case IP_IPSEC_POLICY: 944 { 945 caddr_t req; 946 size_t len = 0; 947 int priv; 948 struct mbuf *m; 949 int optname; 950 951 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 952 break; 953 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 954 break; 955 priv = (sopt->sopt_td != NULL && 956 suser(sopt->sopt_td) != 0) ? 0 : 1; 957 req = mtod(m, caddr_t); 958 len = m->m_len; 959 optname = sopt->sopt_name; 960 error = ipsec4_set_policy(inp, optname, req, len, priv); 961 m_freem(m); 962 break; 963 } 964#endif /*IPSEC*/ 965 966 default: 967 error = ENOPROTOOPT; 968 break; 969 } 970 break; 971 972 case SOPT_GET: 973 switch (sopt->sopt_name) { 974 case IP_OPTIONS: 975 case IP_RETOPTS: 976 if (inp->inp_options) 977 error = sooptcopyout(sopt, 978 mtod(inp->inp_options, 979 char *), 980 inp->inp_options->m_len); 981 else 982 sopt->sopt_valsize = 0; 983 break; 984 985 case IP_TOS: 986 case IP_TTL: 987 case IP_MINTTL: 988 case IP_RECVOPTS: 989 case IP_RECVRETOPTS: 990 case IP_RECVDSTADDR: 991 case IP_RECVTTL: 992 case IP_RECVIF: 993 case IP_PORTRANGE: 994 case IP_FAITH: 995 case IP_ONESBCAST: 996 case IP_DONTFRAG: 997 switch (sopt->sopt_name) { 998 999 case IP_TOS: 1000 optval = inp->inp_ip_tos; 1001 break; 1002 1003 case IP_TTL: 1004 optval = inp->inp_ip_ttl; 1005 break; 1006 1007 case IP_MINTTL: 1008 optval = inp->inp_ip_minttl; 1009 break; 1010 1011#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1012 1013 case IP_RECVOPTS: 1014 optval = OPTBIT(INP_RECVOPTS); 1015 break; 1016 1017 case IP_RECVRETOPTS: 1018 optval = OPTBIT(INP_RECVRETOPTS); 1019 break; 1020 1021 case IP_RECVDSTADDR: 1022 optval = OPTBIT(INP_RECVDSTADDR); 1023 break; 1024 1025 case IP_RECVTTL: 1026 optval = OPTBIT(INP_RECVTTL); 1027 break; 1028 1029 case IP_RECVIF: 1030 optval = OPTBIT(INP_RECVIF); 1031 break; 1032 1033 case IP_PORTRANGE: 1034 if (inp->inp_flags & INP_HIGHPORT) 1035 optval = IP_PORTRANGE_HIGH; 1036 else if (inp->inp_flags & INP_LOWPORT) 1037 optval = IP_PORTRANGE_LOW; 1038 else 1039 optval = 0; 1040 break; 1041 1042 case IP_FAITH: 1043 optval = OPTBIT(INP_FAITH); 1044 break; 1045 1046 case IP_ONESBCAST: 1047 optval = OPTBIT(INP_ONESBCAST); 1048 break; 1049 case IP_DONTFRAG: 1050 optval = OPTBIT(INP_DONTFRAG); 1051 break; 1052 } 1053 error = sooptcopyout(sopt, &optval, sizeof optval); 1054 break; 1055 1056 case IP_MULTICAST_IF: 1057 case IP_MULTICAST_VIF: 1058 case IP_MULTICAST_TTL: 1059 case IP_MULTICAST_LOOP: 1060 case IP_ADD_MEMBERSHIP: 1061 case IP_DROP_MEMBERSHIP: 1062 error = ip_getmoptions(inp, sopt); 1063 break; 1064 1065#if defined(IPSEC) || defined(FAST_IPSEC) 1066 case IP_IPSEC_POLICY: 1067 { 1068 struct mbuf *m = NULL; 1069 caddr_t req = NULL; 1070 size_t len = 0; 1071 1072 if (m != 0) { 1073 req = mtod(m, caddr_t); 1074 len = m->m_len; 1075 } 1076 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1077 if (error == 0) 1078 error = soopt_mcopyout(sopt, m); /* XXX */ 1079 if (error == 0) 1080 m_freem(m); 1081 break; 1082 } 1083#endif /*IPSEC*/ 1084 1085 default: 1086 error = ENOPROTOOPT; 1087 break; 1088 } 1089 break; 1090 } 1091 return (error); 1092} 1093 1094/* 1095 * XXX 1096 * The whole multicast option thing needs to be re-thought. 1097 * Several of these options are equally applicable to non-multicast 1098 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1099 * standard option (IP_TTL). 1100 */ 1101 1102/* 1103 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1104 */ 1105static struct ifnet * 1106ip_multicast_if(a, ifindexp) 1107 struct in_addr *a; 1108 int *ifindexp; 1109{ 1110 int ifindex; 1111 struct ifnet *ifp; 1112 1113 if (ifindexp) 1114 *ifindexp = 0; 1115 if (ntohl(a->s_addr) >> 24 == 0) { 1116 ifindex = ntohl(a->s_addr) & 0xffffff; 1117 if (ifindex < 0 || if_index < ifindex) 1118 return NULL; 1119 ifp = ifnet_byindex(ifindex); 1120 if (ifindexp) 1121 *ifindexp = ifindex; 1122 } else { 1123 INADDR_TO_IFP(*a, ifp); 1124 } 1125 return ifp; 1126} 1127 1128/* 1129 * Given an inpcb, return its multicast options structure pointer. Accepts 1130 * an unlocked inpcb pointer, but will return it locked. May sleep. 1131 */ 1132static struct ip_moptions * 1133ip_findmoptions(struct inpcb *inp) 1134{ 1135 struct ip_moptions *imo; 1136 struct in_multi **immp; 1137 1138 INP_LOCK(inp); 1139 if (inp->inp_moptions != NULL) 1140 return (inp->inp_moptions); 1141 1142 INP_UNLOCK(inp); 1143 1144 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1145 immp = (struct in_multi **)malloc((sizeof(*immp) * IP_MIN_MEMBERSHIPS), 1146 M_IPMOPTS, M_WAITOK); 1147 1148 imo->imo_multicast_ifp = NULL; 1149 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1150 imo->imo_multicast_vif = -1; 1151 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1152 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1153 imo->imo_num_memberships = 0; 1154 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1155 imo->imo_membership = immp; 1156 1157 INP_LOCK(inp); 1158 if (inp->inp_moptions != NULL) { 1159 free(immp, M_IPMOPTS); 1160 free(imo, M_IPMOPTS); 1161 return (inp->inp_moptions); 1162 } 1163 inp->inp_moptions = imo; 1164 return (imo); 1165} 1166 1167/* 1168 * Set the IP multicast options in response to user setsockopt(). 1169 */ 1170static int 1171ip_setmoptions(struct inpcb *inp, struct sockopt *sopt) 1172{ 1173 int error = 0; 1174 int i; 1175 struct in_addr addr; 1176 struct ip_mreq mreq; 1177 struct ifnet *ifp; 1178 struct ip_moptions *imo; 1179 struct route ro; 1180 struct sockaddr_in *dst; 1181 int ifindex; 1182 int s; 1183 1184 switch (sopt->sopt_name) { 1185 /* store an index number for the vif you wanna use in the send */ 1186 case IP_MULTICAST_VIF: 1187 if (legal_vif_num == 0) { 1188 error = EOPNOTSUPP; 1189 break; 1190 } 1191 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1192 if (error) 1193 break; 1194 if (!legal_vif_num(i) && (i != -1)) { 1195 error = EINVAL; 1196 break; 1197 } 1198 imo = ip_findmoptions(inp); 1199 imo->imo_multicast_vif = i; 1200 INP_UNLOCK(inp); 1201 break; 1202 1203 case IP_MULTICAST_IF: 1204 /* 1205 * Select the interface for outgoing multicast packets. 1206 */ 1207 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1208 if (error) 1209 break; 1210 /* 1211 * INADDR_ANY is used to remove a previous selection. 1212 * When no interface is selected, a default one is 1213 * chosen every time a multicast packet is sent. 1214 */ 1215 imo = ip_findmoptions(inp); 1216 if (addr.s_addr == INADDR_ANY) { 1217 imo->imo_multicast_ifp = NULL; 1218 INP_UNLOCK(inp); 1219 break; 1220 } 1221 /* 1222 * The selected interface is identified by its local 1223 * IP address. Find the interface and confirm that 1224 * it supports multicasting. 1225 */ 1226 s = splimp(); 1227 ifp = ip_multicast_if(&addr, &ifindex); 1228 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1229 INP_UNLOCK(inp); 1230 splx(s); 1231 error = EADDRNOTAVAIL; 1232 break; 1233 } 1234 imo->imo_multicast_ifp = ifp; 1235 if (ifindex) 1236 imo->imo_multicast_addr = addr; 1237 else 1238 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1239 INP_UNLOCK(inp); 1240 splx(s); 1241 break; 1242 1243 case IP_MULTICAST_TTL: 1244 /* 1245 * Set the IP time-to-live for outgoing multicast packets. 1246 * The original multicast API required a char argument, 1247 * which is inconsistent with the rest of the socket API. 1248 * We allow either a char or an int. 1249 */ 1250 if (sopt->sopt_valsize == 1) { 1251 u_char ttl; 1252 error = sooptcopyin(sopt, &ttl, 1, 1); 1253 if (error) 1254 break; 1255 imo = ip_findmoptions(inp); 1256 imo->imo_multicast_ttl = ttl; 1257 INP_UNLOCK(inp); 1258 } else { 1259 u_int ttl; 1260 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1261 sizeof ttl); 1262 if (error) 1263 break; 1264 if (ttl > 255) 1265 error = EINVAL; 1266 else { 1267 imo = ip_findmoptions(inp); 1268 imo->imo_multicast_ttl = ttl; 1269 INP_UNLOCK(inp); 1270 } 1271 } 1272 break; 1273 1274 case IP_MULTICAST_LOOP: 1275 /* 1276 * Set the loopback flag for outgoing multicast packets. 1277 * Must be zero or one. The original multicast API required a 1278 * char argument, which is inconsistent with the rest 1279 * of the socket API. We allow either a char or an int. 1280 */ 1281 if (sopt->sopt_valsize == 1) { 1282 u_char loop; 1283 error = sooptcopyin(sopt, &loop, 1, 1); 1284 if (error) 1285 break; 1286 imo = ip_findmoptions(inp); 1287 imo->imo_multicast_loop = !!loop; 1288 INP_UNLOCK(inp); 1289 } else { 1290 u_int loop; 1291 error = sooptcopyin(sopt, &loop, sizeof loop, 1292 sizeof loop); 1293 if (error) 1294 break; 1295 imo = ip_findmoptions(inp); 1296 imo->imo_multicast_loop = !!loop; 1297 INP_UNLOCK(inp); 1298 } 1299 break; 1300 1301 case IP_ADD_MEMBERSHIP: 1302 /* 1303 * Add a multicast group membership. 1304 * Group must be a valid IP multicast address. 1305 */ 1306 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1307 if (error) 1308 break; 1309 1310 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1311 error = EINVAL; 1312 break; 1313 } 1314 s = splimp(); 1315 /* 1316 * If no interface address was provided, use the interface of 1317 * the route to the given multicast address. 1318 */ 1319 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1320 bzero((caddr_t)&ro, sizeof(ro)); 1321 dst = (struct sockaddr_in *)&ro.ro_dst; 1322 dst->sin_len = sizeof(*dst); 1323 dst->sin_family = AF_INET; 1324 dst->sin_addr = mreq.imr_multiaddr; 1325 rtalloc_ign(&ro, RTF_CLONING); 1326 if (ro.ro_rt == NULL) { 1327 error = EADDRNOTAVAIL; 1328 splx(s); 1329 break; 1330 } 1331 ifp = ro.ro_rt->rt_ifp; 1332 RTFREE(ro.ro_rt); 1333 } 1334 else { 1335 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1336 } 1337 1338 /* 1339 * See if we found an interface, and confirm that it 1340 * supports multicast. 1341 */ 1342 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1343 error = EADDRNOTAVAIL; 1344 splx(s); 1345 break; 1346 } 1347 /* 1348 * See if the membership already exists or if all the 1349 * membership slots are full. 1350 */ 1351 imo = ip_findmoptions(inp); 1352 for (i = 0; i < imo->imo_num_memberships; ++i) { 1353 if (imo->imo_membership[i]->inm_ifp == ifp && 1354 imo->imo_membership[i]->inm_addr.s_addr 1355 == mreq.imr_multiaddr.s_addr) 1356 break; 1357 } 1358 if (i < imo->imo_num_memberships) { 1359 INP_UNLOCK(inp); 1360 error = EADDRINUSE; 1361 splx(s); 1362 break; 1363 } 1364 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1365 struct in_multi **nmships, **omships; 1366 size_t newmax; 1367 /* 1368 * Resize the vector to next power-of-two minus 1. If the 1369 * size would exceed the maximum then we know we've really 1370 * run out of entries. Otherwise, we realloc() the vector 1371 * with the INP lock held to avoid introducing a race. 1372 */ 1373 nmships = NULL; 1374 omships = imo->imo_membership; 1375 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1376 if (newmax <= IP_MAX_MEMBERSHIPS) { 1377 nmships = (struct in_multi **)realloc(omships, 1378sizeof(*nmships) * newmax, M_IPMOPTS, M_NOWAIT); 1379 if (nmships != NULL) { 1380 imo->imo_membership = nmships; 1381 imo->imo_max_memberships = newmax; 1382 } 1383 } 1384 if (nmships == NULL) { 1385 INP_UNLOCK(inp); 1386 error = ETOOMANYREFS; 1387 splx(s); 1388 break; 1389 } 1390 } 1391 /* 1392 * Everything looks good; add a new record to the multicast 1393 * address list for the given interface. 1394 */ 1395 if ((imo->imo_membership[i] = 1396 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1397 INP_UNLOCK(inp); 1398 error = ENOBUFS; 1399 splx(s); 1400 break; 1401 } 1402 ++imo->imo_num_memberships; 1403 INP_UNLOCK(inp); 1404 splx(s); 1405 break; 1406 1407 case IP_DROP_MEMBERSHIP: 1408 /* 1409 * Drop a multicast group membership. 1410 * Group must be a valid IP multicast address. 1411 */ 1412 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1413 if (error) 1414 break; 1415 1416 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1417 error = EINVAL; 1418 break; 1419 } 1420 1421 s = splimp(); 1422 /* 1423 * If an interface address was specified, get a pointer 1424 * to its ifnet structure. 1425 */ 1426 if (mreq.imr_interface.s_addr == INADDR_ANY) 1427 ifp = NULL; 1428 else { 1429 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1430 if (ifp == NULL) { 1431 error = EADDRNOTAVAIL; 1432 splx(s); 1433 break; 1434 } 1435 } 1436 /* 1437 * Find the membership in the membership array. 1438 */ 1439 imo = ip_findmoptions(inp); 1440 for (i = 0; i < imo->imo_num_memberships; ++i) { 1441 if ((ifp == NULL || 1442 imo->imo_membership[i]->inm_ifp == ifp) && 1443 imo->imo_membership[i]->inm_addr.s_addr == 1444 mreq.imr_multiaddr.s_addr) 1445 break; 1446 } 1447 if (i == imo->imo_num_memberships) { 1448 INP_UNLOCK(inp); 1449 error = EADDRNOTAVAIL; 1450 splx(s); 1451 break; 1452 } 1453 /* 1454 * Give up the multicast address record to which the 1455 * membership points. 1456 */ 1457 in_delmulti(imo->imo_membership[i]); 1458 /* 1459 * Remove the gap in the membership array. 1460 */ 1461 for (++i; i < imo->imo_num_memberships; ++i) 1462 imo->imo_membership[i-1] = imo->imo_membership[i]; 1463 --imo->imo_num_memberships; 1464 INP_UNLOCK(inp); 1465 splx(s); 1466 break; 1467 1468 default: 1469 error = EOPNOTSUPP; 1470 break; 1471 } 1472 1473 return (error); 1474} 1475 1476/* 1477 * Return the IP multicast options in response to user getsockopt(). 1478 */ 1479static int 1480ip_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1481{ 1482 struct ip_moptions *imo; 1483 struct in_addr addr; 1484 struct in_ifaddr *ia; 1485 int error, optval; 1486 u_char coptval; 1487 1488 INP_LOCK(inp); 1489 imo = inp->inp_moptions; 1490 1491 error = 0; 1492 switch (sopt->sopt_name) { 1493 case IP_MULTICAST_VIF: 1494 if (imo != NULL) 1495 optval = imo->imo_multicast_vif; 1496 else 1497 optval = -1; 1498 INP_UNLOCK(inp); 1499 error = sooptcopyout(sopt, &optval, sizeof optval); 1500 break; 1501 1502 case IP_MULTICAST_IF: 1503 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1504 addr.s_addr = INADDR_ANY; 1505 else if (imo->imo_multicast_addr.s_addr) { 1506 /* return the value user has set */ 1507 addr = imo->imo_multicast_addr; 1508 } else { 1509 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1510 addr.s_addr = (ia == NULL) ? INADDR_ANY 1511 : IA_SIN(ia)->sin_addr.s_addr; 1512 } 1513 INP_UNLOCK(inp); 1514 error = sooptcopyout(sopt, &addr, sizeof addr); 1515 break; 1516 1517 case IP_MULTICAST_TTL: 1518 if (imo == 0) 1519 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1520 else 1521 optval = coptval = imo->imo_multicast_ttl; 1522 INP_UNLOCK(inp); 1523 if (sopt->sopt_valsize == 1) 1524 error = sooptcopyout(sopt, &coptval, 1); 1525 else 1526 error = sooptcopyout(sopt, &optval, sizeof optval); 1527 break; 1528 1529 case IP_MULTICAST_LOOP: 1530 if (imo == 0) 1531 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1532 else 1533 optval = coptval = imo->imo_multicast_loop; 1534 INP_UNLOCK(inp); 1535 if (sopt->sopt_valsize == 1) 1536 error = sooptcopyout(sopt, &coptval, 1); 1537 else 1538 error = sooptcopyout(sopt, &optval, sizeof optval); 1539 break; 1540 1541 default: 1542 INP_UNLOCK(inp); 1543 error = ENOPROTOOPT; 1544 break; 1545 } 1546 INP_UNLOCK_ASSERT(inp); 1547 1548 return (error); 1549} 1550 1551/* 1552 * Discard the IP multicast options. 1553 */ 1554void 1555ip_freemoptions(imo) 1556 register struct ip_moptions *imo; 1557{ 1558 register int i; 1559 1560 if (imo != NULL) { 1561 for (i = 0; i < imo->imo_num_memberships; ++i) 1562 in_delmulti(imo->imo_membership[i]); 1563 free(imo->imo_membership, M_IPMOPTS); 1564 free(imo, M_IPMOPTS); 1565 } 1566} 1567 1568/* 1569 * Routine called from ip_output() to loop back a copy of an IP multicast 1570 * packet to the input queue of a specified interface. Note that this 1571 * calls the output routine of the loopback "driver", but with an interface 1572 * pointer that might NOT be a loopback interface -- evil, but easier than 1573 * replicating that code here. 1574 */ 1575static void 1576ip_mloopback(ifp, m, dst, hlen) 1577 struct ifnet *ifp; 1578 register struct mbuf *m; 1579 register struct sockaddr_in *dst; 1580 int hlen; 1581{ 1582 register struct ip *ip; 1583 struct mbuf *copym; 1584 1585 copym = m_copy(m, 0, M_COPYALL); 1586 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1587 copym = m_pullup(copym, hlen); 1588 if (copym != NULL) { 1589 /* If needed, compute the checksum and mark it as valid. */ 1590 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1591 in_delayed_cksum(copym); 1592 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1593 copym->m_pkthdr.csum_flags |= 1594 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1595 copym->m_pkthdr.csum_data = 0xffff; 1596 } 1597 /* 1598 * We don't bother to fragment if the IP length is greater 1599 * than the interface's MTU. Can this possibly matter? 1600 */ 1601 ip = mtod(copym, struct ip *); 1602 ip->ip_len = htons(ip->ip_len); 1603 ip->ip_off = htons(ip->ip_off); 1604 ip->ip_sum = 0; 1605 ip->ip_sum = in_cksum(copym, hlen); 1606 /* 1607 * NB: 1608 * It's not clear whether there are any lingering 1609 * reentrancy problems in other areas which might 1610 * be exposed by using ip_input directly (in 1611 * particular, everything which modifies the packet 1612 * in-place). Yet another option is using the 1613 * protosw directly to deliver the looped back 1614 * packet. For the moment, we'll err on the side 1615 * of safety by using if_simloop(). 1616 */ 1617#if 1 /* XXX */ 1618 if (dst->sin_family != AF_INET) { 1619 printf("ip_mloopback: bad address family %d\n", 1620 dst->sin_family); 1621 dst->sin_family = AF_INET; 1622 } 1623#endif 1624 1625#ifdef notdef 1626 copym->m_pkthdr.rcvif = ifp; 1627 ip_input(copym); 1628#else 1629 if_simloop(ifp, copym, dst->sin_family, 0); 1630#endif 1631 } 1632} 1633