1/* 2 * Copyright (c) 2004-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * if_bond.c 31 * - bond/failover interface 32 * - implements IEEE 802.3ad Link Aggregation 33 */ 34 35/* 36 * Modification History: 37 * 38 * April 29, 2004 Dieter Siegmund (dieter@apple.com) 39 * - created 40 */ 41 42#include <sys/param.h> 43#include <sys/kernel.h> 44#include <sys/malloc.h> 45#include <sys/mbuf.h> 46#include <sys/queue.h> 47#include <sys/socket.h> 48#include <sys/sockio.h> 49#include <sys/sysctl.h> 50#include <sys/systm.h> 51#include <sys/kern_event.h> 52 53#include <net/bpf.h> 54#include <net/ethernet.h> 55#include <net/if.h> 56#include <net/kpi_interface.h> 57#include <net/if_arp.h> 58#include <net/if_dl.h> 59#include <net/if_ether.h> 60#include <net/if_types.h> 61#include <net/if_bond_var.h> 62#include <net/ieee8023ad.h> 63#include <net/lacp.h> 64#include <net/dlil.h> 65#include <sys/time.h> 66#include <net/devtimer.h> 67#include <net/if_vlan_var.h> 68#include <net/kpi_protocol.h> 69 70#include <kern/locks.h> 71#include <libkern/OSAtomic.h> 72 73#include <netinet/in.h> 74#include <netinet/if_ether.h> 75#include <netinet/in_systm.h> 76#include <netinet/ip.h> 77#include <netinet/ip6.h> 78 79#include <net/if_media.h> 80#include <net/multicast_list.h> 81 82static struct ether_addr slow_proto_multicast = { 83 IEEE8023AD_SLOW_PROTO_MULTICAST 84}; 85 86#define BOND_MAXUNIT 128 87#define BONDNAME "bond" 88#define M_BOND M_DEVBUF 89 90#define EA_FORMAT "%x:%x:%x:%x:%x:%x" 91#define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)]) 92#define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5) 93 94#define timestamp_printf printf 95 96/** 97 ** bond locks 98 **/ 99static __inline__ lck_grp_t * 100my_lck_grp_alloc_init(const char * grp_name) 101{ 102 lck_grp_t * grp; 103 lck_grp_attr_t * grp_attrs; 104 105 grp_attrs = lck_grp_attr_alloc_init(); 106 grp = lck_grp_alloc_init(grp_name, grp_attrs); 107 lck_grp_attr_free(grp_attrs); 108 return (grp); 109} 110 111static __inline__ lck_mtx_t * 112my_lck_mtx_alloc_init(lck_grp_t * lck_grp) 113{ 114 lck_attr_t * lck_attrs; 115 lck_mtx_t * lck_mtx; 116 117 lck_attrs = lck_attr_alloc_init(); 118 lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs); 119 lck_attr_free(lck_attrs); 120 return (lck_mtx); 121} 122 123static lck_mtx_t * bond_lck_mtx; 124 125static __inline__ void 126bond_lock_init(void) 127{ 128 lck_grp_t * bond_lck_grp; 129 130 bond_lck_grp = my_lck_grp_alloc_init("if_bond"); 131 bond_lck_mtx = my_lck_mtx_alloc_init(bond_lck_grp); 132} 133 134static __inline__ void 135bond_assert_lock_held(void) 136{ 137 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_OWNED); 138 return; 139} 140 141static __inline__ void 142bond_assert_lock_not_held(void) 143{ 144 lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED); 145 return; 146} 147 148static __inline__ void 149bond_lock(void) 150{ 151 lck_mtx_lock(bond_lck_mtx); 152 return; 153} 154 155static __inline__ void 156bond_unlock(void) 157{ 158 lck_mtx_unlock(bond_lck_mtx); 159 return; 160} 161 162/** 163 ** bond structures, types 164 **/ 165 166struct LAG_info_s { 167 lacp_system li_system; 168 lacp_system_priority li_system_priority; 169 lacp_key li_key; 170}; 171typedef struct LAG_info_s LAG_info, * LAG_info_ref; 172 173struct bondport_s; 174TAILQ_HEAD(port_list, bondport_s); 175struct ifbond_s; 176TAILQ_HEAD(ifbond_list, ifbond_s); 177struct LAG_s; 178TAILQ_HEAD(lag_list, LAG_s); 179 180typedef struct ifbond_s ifbond, * ifbond_ref; 181typedef struct bondport_s bondport, * bondport_ref; 182 183struct LAG_s { 184 TAILQ_ENTRY(LAG_s) lag_list; 185 struct port_list lag_port_list; 186 short lag_port_count; 187 short lag_selected_port_count; 188 int lag_active_media; 189 LAG_info lag_info; 190}; 191typedef struct LAG_s LAG, * LAG_ref; 192 193typedef struct partner_state_s { 194 LAG_info ps_lag_info; 195 lacp_port ps_port; 196 lacp_port_priority ps_port_priority; 197 lacp_actor_partner_state ps_state; 198} partner_state, * partner_state_ref; 199 200struct ifbond_s { 201 TAILQ_ENTRY(ifbond_s) ifb_bond_list; 202 int ifb_flags; 203 SInt32 ifb_retain_count; 204 char ifb_name[IFNAMSIZ]; 205 struct ifnet * ifb_ifp; 206 bpf_packet_func ifb_bpf_input; 207 bpf_packet_func ifb_bpf_output; 208 int ifb_altmtu; 209 struct port_list ifb_port_list; 210 short ifb_port_count; 211 struct lag_list ifb_lag_list; 212 lacp_key ifb_key; 213 short ifb_max_active; /* 0 == unlimited */ 214 LAG_ref ifb_active_lag; 215 struct ifmultiaddr * ifb_ifma_slow_proto; 216 bondport_ref * ifb_distributing_array; 217 int ifb_distributing_count; 218 int ifb_last_link_event; 219 int ifb_mode; /* LACP, STATIC */ 220}; 221 222struct media_info { 223 int mi_active; 224 int mi_status; 225}; 226 227enum { 228 ReceiveState_none = 0, 229 ReceiveState_INITIALIZE = 1, 230 ReceiveState_PORT_DISABLED = 2, 231 ReceiveState_EXPIRED = 3, 232 ReceiveState_LACP_DISABLED = 4, 233 ReceiveState_DEFAULTED = 5, 234 ReceiveState_CURRENT = 6, 235}; 236 237typedef u_char ReceiveState; 238 239enum { 240 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED, 241 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED, 242 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY 243}; 244typedef u_char SelectedState; 245 246static __inline__ const char * 247SelectedStateString(SelectedState s) 248{ 249 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" }; 250 251 if (s <= SelectedState_STANDBY) { 252 return (names[s]); 253 } 254 return ("<unknown>"); 255} 256 257enum { 258 MuxState_none = 0, 259 MuxState_DETACHED = 1, 260 MuxState_WAITING = 2, 261 MuxState_ATTACHED = 3, 262 MuxState_COLLECTING_DISTRIBUTING = 4, 263}; 264 265typedef u_char MuxState; 266 267struct bondport_s { 268 TAILQ_ENTRY(bondport_s) po_port_list; 269 ifbond_ref po_bond; 270 struct multicast_list po_multicast; 271 struct ifnet * po_ifp; 272 struct ether_addr po_saved_addr; 273 int po_enabled; 274 char po_name[IFNAMSIZ]; 275 struct ifdevmtu po_devmtu; 276 277 /* LACP */ 278 TAILQ_ENTRY(bondport_s) po_lag_port_list; 279 devtimer_ref po_current_while_timer; 280 devtimer_ref po_periodic_timer; 281 devtimer_ref po_wait_while_timer; 282 devtimer_ref po_transmit_timer; 283 partner_state po_partner_state; 284 lacp_port_priority po_priority; 285 lacp_actor_partner_state po_actor_state; 286 u_char po_flags; 287 u_char po_periodic_interval; 288 u_char po_n_transmit; 289 ReceiveState po_receive_state; 290 MuxState po_mux_state; 291 SelectedState po_selected; 292 int32_t po_last_transmit_secs; 293 struct media_info po_media_info; 294 LAG_ref po_lag; 295}; 296 297#define IFBF_PROMISC 0x1 /* promiscuous mode */ 298#define IFBF_IF_DETACHING 0x2 /* interface is detaching */ 299#define IFBF_LLADDR 0x4 /* specific link address requested */ 300#define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */ 301 302static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, 303 user_addr_t datap); 304 305static __inline__ int 306ifbond_flags_promisc(ifbond_ref ifb) 307{ 308 return ((ifb->ifb_flags & IFBF_PROMISC) != 0); 309} 310 311static __inline__ void 312ifbond_flags_set_promisc(ifbond_ref ifb) 313{ 314 ifb->ifb_flags |= IFBF_PROMISC; 315 return; 316} 317 318static __inline__ void 319ifbond_flags_clear_promisc(ifbond_ref ifb) 320{ 321 ifb->ifb_flags &= ~IFBF_PROMISC; 322 return; 323} 324 325static __inline__ int 326ifbond_flags_if_detaching(ifbond_ref ifb) 327{ 328 return ((ifb->ifb_flags & IFBF_IF_DETACHING) != 0); 329} 330 331static __inline__ void 332ifbond_flags_set_if_detaching(ifbond_ref ifb) 333{ 334 ifb->ifb_flags |= IFBF_IF_DETACHING; 335 return; 336} 337 338static __inline__ int 339ifbond_flags_lladdr(ifbond_ref ifb) 340{ 341 return ((ifb->ifb_flags & IFBF_LLADDR) != 0); 342} 343 344static __inline__ void 345ifbond_flags_set_lladdr(ifbond_ref ifb) 346{ 347 ifb->ifb_flags |= IFBF_LLADDR; 348 return; 349} 350 351static __inline__ void 352ifbond_flags_clear_lladdr(ifbond_ref ifb) 353{ 354 ifb->ifb_flags &= ~IFBF_LLADDR; 355 return; 356} 357 358static __inline__ int 359ifbond_flags_change_in_progress(ifbond_ref ifb) 360{ 361 return ((ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0); 362} 363 364static __inline__ void 365ifbond_flags_set_change_in_progress(ifbond_ref ifb) 366{ 367 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS; 368 return; 369} 370 371static __inline__ void 372ifbond_flags_clear_change_in_progress(ifbond_ref ifb) 373{ 374 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS; 375 return; 376} 377 378/* 379 * bondport_ref->po_flags bits 380 */ 381#define BONDPORT_FLAGS_NTT 0x01 382#define BONDPORT_FLAGS_READY 0x02 383#define BONDPORT_FLAGS_SELECTED_CHANGED 0x04 384#define BONDPORT_FLAGS_MUX_ATTACHED 0x08 385#define BONDPORT_FLAGS_DISTRIBUTING 0x10 386#define BONDPORT_FLAGS_UNUSED2 0x20 387#define BONDPORT_FLAGS_UNUSED3 0x40 388#define BONDPORT_FLAGS_UNUSED4 0x80 389 390static __inline__ void 391bondport_flags_set_ntt(bondport_ref p) 392{ 393 p->po_flags |= BONDPORT_FLAGS_NTT; 394 return; 395} 396 397static __inline__ void 398bondport_flags_clear_ntt(bondport_ref p) 399{ 400 p->po_flags &= ~BONDPORT_FLAGS_NTT; 401 return; 402} 403 404static __inline__ int 405bondport_flags_ntt(bondport_ref p) 406{ 407 return ((p->po_flags & BONDPORT_FLAGS_NTT) != 0); 408} 409 410static __inline__ void 411bondport_flags_set_ready(bondport_ref p) 412{ 413 p->po_flags |= BONDPORT_FLAGS_READY; 414 return; 415} 416 417static __inline__ void 418bondport_flags_clear_ready(bondport_ref p) 419{ 420 p->po_flags &= ~BONDPORT_FLAGS_READY; 421 return; 422} 423 424static __inline__ int 425bondport_flags_ready(bondport_ref p) 426{ 427 return ((p->po_flags & BONDPORT_FLAGS_READY) != 0); 428} 429 430static __inline__ void 431bondport_flags_set_selected_changed(bondport_ref p) 432{ 433 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED; 434 return; 435} 436 437static __inline__ void 438bondport_flags_clear_selected_changed(bondport_ref p) 439{ 440 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED; 441 return; 442} 443 444static __inline__ int 445bondport_flags_selected_changed(bondport_ref p) 446{ 447 return ((p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0); 448} 449 450static __inline__ void 451bondport_flags_set_mux_attached(bondport_ref p) 452{ 453 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED; 454 return; 455} 456 457static __inline__ void 458bondport_flags_clear_mux_attached(bondport_ref p) 459{ 460 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED; 461 return; 462} 463 464static __inline__ int 465bondport_flags_mux_attached(bondport_ref p) 466{ 467 return ((p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0); 468} 469 470static __inline__ void 471bondport_flags_set_distributing(bondport_ref p) 472{ 473 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING; 474 return; 475} 476 477static __inline__ void 478bondport_flags_clear_distributing(bondport_ref p) 479{ 480 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING; 481 return; 482} 483 484static __inline__ int 485bondport_flags_distributing(bondport_ref p) 486{ 487 return ((p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0); 488} 489 490typedef struct bond_globals_s { 491 struct ifbond_list ifbond_list; 492 lacp_system system; 493 lacp_system_priority system_priority; 494 int verbose; 495} * bond_globals_ref; 496 497static bond_globals_ref g_bond; 498 499/** 500 ** packet_buffer routines 501 ** - thin wrapper for mbuf 502 **/ 503 504typedef struct mbuf * packet_buffer_ref; 505 506static packet_buffer_ref 507packet_buffer_allocate(int length) 508{ 509 packet_buffer_ref m; 510 int size; 511 512 /* leave room for ethernet header */ 513 size = length + sizeof(struct ether_header); 514 if (size > (int)MHLEN) { 515 if (size > (int)MCLBYTES) { 516 printf("bond: packet_buffer_allocate size %d > max %u\n", 517 size, MCLBYTES); 518 return (NULL); 519 } 520 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR); 521 } else { 522 m = m_gethdr(M_WAITOK, MT_DATA); 523 } 524 if (m == NULL) { 525 return (NULL); 526 } 527 m->m_len = size; 528 m->m_pkthdr.len = size; 529 return (m); 530} 531 532static void * 533packet_buffer_byteptr(packet_buffer_ref buf) 534{ 535 return (buf->m_data + sizeof(struct ether_header)); 536} 537 538typedef enum { 539 LAEventStart, 540 LAEventTimeout, 541 LAEventPacket, 542 LAEventMediaChange, 543 LAEventSelectedChange, 544 LAEventPortMoved, 545 LAEventReady 546} LAEvent; 547 548/** 549 ** Receive machine 550 **/ 551static void 552bondport_receive_machine(bondport_ref p, LAEvent event, 553 void * event_data); 554/** 555 ** Periodic Transmission machine 556 **/ 557static void 558bondport_periodic_transmit_machine(bondport_ref p, LAEvent event, 559 void * event_data); 560 561/** 562 ** Transmit machine 563 **/ 564#define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1) 565 566static void 567bondport_transmit_machine(bondport_ref p, LAEvent event, 568 void * event_data); 569 570/** 571 ** Mux machine 572 **/ 573static void 574bondport_mux_machine(bondport_ref p, LAEvent event, 575 void * event_data); 576 577/** 578 ** bond, LAG 579 **/ 580static void 581ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media); 582 583static void 584ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag); 585 586static int 587ifbond_all_ports_ready(ifbond_ref bond); 588 589static LAG_ref 590ifbond_find_best_LAG(ifbond_ref bond, int * active_media); 591 592static int 593LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media); 594 595static int 596ifbond_selection(ifbond_ref bond); 597 598 599/** 600 ** bondport 601 **/ 602 603static void 604bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p); 605 606static void 607bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf); 608 609static bondport_ref 610bondport_create(struct ifnet * port_ifp, lacp_port_priority priority, 611 int active, int short_timeout, int * error); 612static void 613bondport_start(bondport_ref p); 614 615static void 616bondport_free(bondport_ref p); 617 618static int 619bondport_aggregatable(bondport_ref p); 620 621static int 622bondport_remove_from_LAG(bondport_ref p); 623 624static void 625bondport_set_selected(bondport_ref p, SelectedState s); 626 627static int 628bondport_matches_LAG(bondport_ref p, LAG_ref lag); 629 630static void 631bondport_link_status_changed(bondport_ref p); 632 633static void 634bondport_enable_distributing(bondport_ref p); 635 636static void 637bondport_disable_distributing(bondport_ref p); 638 639static __inline__ int 640bondport_collecting(bondport_ref p) 641{ 642 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) { 643 return (lacp_actor_partner_state_collecting(p->po_actor_state)); 644 } 645 return (TRUE); 646} 647 648/** 649 ** bond interface/dlil specific routines 650 **/ 651static int bond_clone_create(struct if_clone *, u_int32_t, void *); 652static int bond_clone_destroy(struct ifnet *); 653static int bond_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t m, 654 char *frame_header); 655static int bond_output(struct ifnet *ifp, struct mbuf *m); 656static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr); 657static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, 658 bpf_packet_func func); 659static int bond_attach_protocol(struct ifnet *ifp); 660static int bond_detach_protocol(struct ifnet *ifp); 661static int bond_setmulti(struct ifnet *ifp); 662static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp); 663static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp); 664static void bond_if_free(struct ifnet * ifp); 665 666static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME, 667 bond_clone_create, 668 bond_clone_destroy, 669 0, 670 BOND_MAXUNIT); 671static void interface_link_event(struct ifnet * ifp, u_int32_t event_code); 672 673static int 674siocsifmtu(struct ifnet * ifp, int mtu) 675{ 676 struct ifreq ifr; 677 678 bzero(&ifr, sizeof(ifr)); 679 ifr.ifr_mtu = mtu; 680 return (ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr)); 681} 682 683static int 684siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p) 685{ 686 struct ifreq ifr; 687 int error; 688 689 bzero(&ifr, sizeof(ifr)); 690 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr); 691 if (error == 0) { 692 *ifdm_p = ifr.ifr_devmtu; 693 } 694 return (error); 695} 696 697static __inline__ void 698ether_addr_copy(void * dest, const void * source) 699{ 700 bcopy(source, dest, ETHER_ADDR_LEN); 701 return; 702} 703 704static __inline__ void 705ifbond_retain(ifbond_ref ifb) 706{ 707 OSIncrementAtomic(&ifb->ifb_retain_count); 708} 709 710static __inline__ void 711ifbond_release(ifbond_ref ifb) 712{ 713 UInt32 old_retain_count; 714 715 old_retain_count = OSDecrementAtomic(&ifb->ifb_retain_count); 716 switch (old_retain_count) { 717 case 0: 718 panic("ifbond_release: retain count is 0\n"); 719 break; 720 case 1: 721 if (g_bond->verbose) { 722 printf("ifbond_release(%s)\n", ifb->ifb_name); 723 } 724 if (ifb->ifb_ifma_slow_proto != NULL) { 725 if (g_bond->verbose) { 726 printf("ifbond_release(%s) removing multicast\n", 727 ifb->ifb_name); 728 } 729 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp, 730 ifb->ifb_ifma_slow_proto->ifma_addr); 731 IFMA_REMREF(ifb->ifb_ifma_slow_proto); 732 } 733 if (ifb->ifb_distributing_array != NULL) { 734 FREE(ifb->ifb_distributing_array, M_BOND); 735 } 736 FREE(ifb, M_BOND); 737 break; 738 default: 739 break; 740 } 741 return; 742} 743 744/* 745 * Function: ifbond_wait 746 * Purpose: 747 * Allows a single thread to gain exclusive access to the ifbond 748 * data structure. Some operations take a long time to complete, 749 * and some have side-effects that we can't predict. Holding the 750 * bond_lock() across such operations is not possible. 751 * 752 * For example: 753 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to 754 * complete. Simply holding the bond_lock() would freeze all other 755 * data structure accesses during that time. 756 * 2) When we attach our protocol to the interface, a dlil event is 757 * generated and invokes our bond_event() function. bond_event() 758 * needs to take the bond_lock(), but we're already holding it, so 759 * we're deadlocked against ourselves. 760 * Notes: 761 * Before calling, you must be holding the bond_lock and have taken 762 * a reference on the ifbond_ref. 763 */ 764static void 765ifbond_wait(ifbond_ref ifb, const char * msg) 766{ 767 int waited = 0; 768 769 /* other add/remove in progress */ 770 while (ifbond_flags_change_in_progress(ifb)) { 771 if (g_bond->verbose) { 772 printf("%s: %s msleep\n", ifb->ifb_name, msg); 773 } 774 waited = 1; 775 (void)msleep(ifb, bond_lck_mtx, PZERO, msg, 0); 776 } 777 /* prevent other bond list remove/add from taking place */ 778 ifbond_flags_set_change_in_progress(ifb); 779 if (g_bond->verbose && waited) { 780 printf("%s: %s woke up\n", ifb->ifb_name, msg); 781 } 782 return; 783} 784 785/* 786 * Function: ifbond_signal 787 * Purpose: 788 * Allows the thread that previously invoked ifbond_wait() to 789 * give up exclusive access to the ifbond data structure, and wake up 790 * any other threads waiting to access 791 * Notes: 792 * Before calling, you must be holding the bond_lock and have taken 793 * a reference on the ifbond_ref. 794 */ 795static void 796ifbond_signal(ifbond_ref ifb, const char * msg) 797{ 798 ifbond_flags_clear_change_in_progress(ifb); 799 wakeup((caddr_t)ifb); 800 if (g_bond->verbose) { 801 printf("%s: %s wakeup\n", ifb->ifb_name, msg); 802 } 803 return; 804} 805 806/** 807 ** Media information 808 **/ 809 810static int 811link_speed(int active) 812{ 813 switch (IFM_SUBTYPE(active)) { 814 case IFM_10_T: 815 case IFM_10_2: 816 case IFM_10_5: 817 case IFM_10_STP: 818 case IFM_10_FL: 819 return (10); 820 case IFM_100_TX: 821 case IFM_100_FX: 822 case IFM_100_T4: 823 case IFM_100_VG: 824 case IFM_100_T2: 825 return (100); 826 case IFM_1000_SX: 827 case IFM_1000_LX: 828 case IFM_1000_CX: 829 case IFM_1000_TX: 830 return (1000); 831 case IFM_HPNA_1: 832 return (0); 833 default: 834 /* assume that new defined types are going to be at least 10GigE */ 835 case IFM_10G_SR: 836 case IFM_10G_LR: 837 return (10000); 838 } 839} 840 841static __inline__ int 842media_active(const struct media_info * mi) 843{ 844 if ((mi->mi_status & IFM_AVALID) == 0) { 845 return (1); 846 } 847 return ((mi->mi_status & IFM_ACTIVE) != 0); 848} 849 850static __inline__ int 851media_full_duplex(const struct media_info * mi) 852{ 853 return ((mi->mi_active & IFM_FDX) != 0); 854} 855 856static __inline__ int 857media_speed(const struct media_info * mi) 858{ 859 return (link_speed(mi->mi_active)); 860} 861 862static struct media_info 863interface_media_info(struct ifnet * ifp) 864{ 865 struct ifmediareq ifmr; 866 struct media_info mi; 867 868 bzero(&mi, sizeof(mi)); 869 bzero(&ifmr, sizeof(ifmr)); 870 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) { 871 if (ifmr.ifm_count != 0) { 872 mi.mi_status = ifmr.ifm_status; 873 mi.mi_active = ifmr.ifm_active; 874 } 875 } 876 return (mi); 877} 878 879static int 880if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p) 881{ 882 struct ifreq ifr; 883 884 /* 885 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver 886 * currently expects it that way 887 */ 888 ifr.ifr_addr.sa_family = AF_UNSPEC; 889 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN; 890 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p); 891 return (ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr)); 892} 893 894/** 895 ** bond_globals 896 **/ 897static bond_globals_ref 898bond_globals_create(lacp_system_priority sys_pri, 899 lacp_system_ref sys) 900{ 901 bond_globals_ref b; 902 903 b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK); 904 if (b == NULL) { 905 return (NULL); 906 } 907 bzero(b, sizeof(*b)); 908 TAILQ_INIT(&b->ifbond_list); 909 b->system = *sys; 910 b->system_priority = sys_pri; 911 return (b); 912} 913 914static int 915bond_globals_init(void) 916{ 917 bond_globals_ref b; 918 int i; 919 struct ifnet * ifp; 920 921 bond_assert_lock_not_held(); 922 923 if (g_bond != NULL) { 924 return (0); 925 } 926 927 /* 928 * use en0's ethernet address as the system identifier, and if it's not 929 * there, use en1 .. en3 930 */ 931 ifp = NULL; 932 for (i = 0; i < 4; i++) { 933 char ifname[IFNAMSIZ+1]; 934 snprintf(ifname, sizeof(ifname), "en%d", i); 935 ifp = ifunit(ifname); 936 if (ifp != NULL) { 937 break; 938 } 939 } 940 b = NULL; 941 if (ifp != NULL) { 942 b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp)); 943 } 944 bond_lock(); 945 if (g_bond != NULL) { 946 bond_unlock(); 947 _FREE(b, M_BOND); 948 return (0); 949 } 950 g_bond = b; 951 bond_unlock(); 952 if (ifp == NULL) { 953 return (ENXIO); 954 } 955 if (b == NULL) { 956 return (ENOMEM); 957 } 958 return (0); 959} 960 961static void 962bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m, 963 const struct ether_header * eh_p, 964 u_int16_t vlan_tag, bpf_packet_func func) 965{ 966 struct ether_vlan_header * vlh_p; 967 struct mbuf * vl_m; 968 969 vl_m = m_get(M_DONTWAIT, MT_DATA); 970 if (vl_m == NULL) { 971 return; 972 } 973 /* populate a new mbuf containing the vlan ethernet header */ 974 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 975 vlh_p = mtod(vl_m, struct ether_vlan_header *); 976 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type)); 977 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN); 978 vlh_p->evl_tag = htons(vlan_tag); 979 vlh_p->evl_proto = eh_p->ether_type; 980 vl_m->m_next = m; 981 (*func)(ifp, vl_m); 982 vl_m->m_next = NULL; 983 m_free(vl_m); 984 return; 985} 986 987static __inline__ void 988bond_bpf_output(struct ifnet * ifp, struct mbuf * m, 989 bpf_packet_func func) 990{ 991 if (func != NULL) { 992 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { 993 const struct ether_header * eh_p; 994 eh_p = mtod(m, const struct ether_header *); 995 m->m_data += ETHER_HDR_LEN; 996 m->m_len -= ETHER_HDR_LEN; 997 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func); 998 m->m_data -= ETHER_HDR_LEN; 999 m->m_len += ETHER_HDR_LEN; 1000 } else { 1001 (*func)(ifp, m); 1002 } 1003 } 1004 return; 1005} 1006 1007static __inline__ void 1008bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p, 1009 bpf_packet_func func) 1010{ 1011 if (func != NULL) { 1012 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { 1013 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func); 1014 } else { 1015 /* restore the header */ 1016 m->m_data -= ETHER_HDR_LEN; 1017 m->m_len += ETHER_HDR_LEN; 1018 (*func)(ifp, m); 1019 m->m_data += ETHER_HDR_LEN; 1020 m->m_len -= ETHER_HDR_LEN; 1021 } 1022 } 1023 return; 1024} 1025 1026/* 1027 * Function: bond_setmulti 1028 * Purpose: 1029 * Enable multicast reception on "our" interface by enabling multicasts on 1030 * each of the member ports. 1031 */ 1032static int 1033bond_setmulti(struct ifnet * ifp) 1034{ 1035 ifbond_ref ifb; 1036 int error; 1037 int result = 0; 1038 bondport_ref p; 1039 1040 bond_lock(); 1041 ifb = ifnet_softc(ifp); 1042 if (ifb == NULL || ifbond_flags_if_detaching(ifb) 1043 || TAILQ_EMPTY(&ifb->ifb_port_list)) { 1044 bond_unlock(); 1045 return (0); 1046 } 1047 ifbond_retain(ifb); 1048 ifbond_wait(ifb, "bond_setmulti"); 1049 1050 if (ifbond_flags_if_detaching(ifb)) { 1051 /* someone destroyed the bond while we were waiting */ 1052 result = EBUSY; 1053 goto signal_done; 1054 } 1055 bond_unlock(); 1056 1057 /* ifbond_wait() let's us safely walk the list without holding the lock */ 1058 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { 1059 struct ifnet * port_ifp = p->po_ifp; 1060 1061 error = multicast_list_program(&p->po_multicast, 1062 ifp, port_ifp); 1063 if (error != 0) { 1064 printf("bond_setmulti(%s): " 1065 "multicast_list_program(%s%d) failed, %d\n", 1066 ifb->ifb_name, ifnet_name(port_ifp), 1067 ifnet_unit(port_ifp), error); 1068 result = error; 1069 } 1070 } 1071 bond_lock(); 1072 signal_done: 1073 ifbond_signal(ifb, "bond_setmulti"); 1074 bond_unlock(); 1075 ifbond_release(ifb); 1076 return (result); 1077} 1078 1079static int 1080bond_clone_attach(void) 1081{ 1082 int error; 1083 1084 if ((error = if_clone_attach(&bond_cloner)) != 0) 1085 return error; 1086 bond_lock_init(); 1087 return 0; 1088} 1089 1090static int 1091ifbond_add_slow_proto_multicast(ifbond_ref ifb) 1092{ 1093 int error; 1094 struct ifmultiaddr * ifma = NULL; 1095 struct sockaddr_dl sdl; 1096 1097 bond_assert_lock_not_held(); 1098 1099 bzero(&sdl, sizeof(sdl)); 1100 sdl.sdl_len = sizeof(sdl); 1101 sdl.sdl_family = AF_LINK; 1102 sdl.sdl_type = IFT_ETHER; 1103 sdl.sdl_nlen = 0; 1104 sdl.sdl_alen = sizeof(slow_proto_multicast); 1105 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast)); 1106 error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma); 1107 if (error == 0) { 1108 ifb->ifb_ifma_slow_proto = ifma; 1109 } 1110 return (error); 1111} 1112 1113static int 1114bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params) 1115{ 1116 int error; 1117 ifbond_ref ifb; 1118 ifnet_t ifp; 1119 struct ifnet_init_eparams bond_init; 1120 1121 error = bond_globals_init(); 1122 if (error != 0) { 1123 return (error); 1124 } 1125 1126 ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK); 1127 if (ifb == NULL) { 1128 return (ENOMEM); 1129 } 1130 bzero(ifb, sizeof(*ifb)); 1131 1132 ifbond_retain(ifb); 1133 TAILQ_INIT(&ifb->ifb_port_list); 1134 TAILQ_INIT(&ifb->ifb_lag_list); 1135 ifb->ifb_key = unit + 1; 1136 1137 /* use the interface name as the unique id for ifp recycle */ 1138 if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d", 1139 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) { 1140 ifbond_release(ifb); 1141 return (EINVAL); 1142 } 1143 1144 bzero(&bond_init, sizeof(bond_init)); 1145 bond_init.ver = IFNET_INIT_CURRENT_VERSION; 1146 bond_init.len = sizeof (bond_init); 1147 bond_init.flags = IFNET_INIT_LEGACY; 1148 bond_init.uniqueid = ifb->ifb_name; 1149 bond_init.uniqueid_len = strlen(ifb->ifb_name); 1150 bond_init.name = ifc->ifc_name; 1151 bond_init.unit = unit; 1152 bond_init.family = IFNET_FAMILY_BOND; 1153 bond_init.type = IFT_IEEE8023ADLAG; 1154 bond_init.output = bond_output; 1155 bond_init.demux = ether_demux; 1156 bond_init.add_proto = ether_add_proto; 1157 bond_init.del_proto = ether_del_proto; 1158 bond_init.check_multi = ether_check_multi; 1159 bond_init.framer_extended = ether_frameout_extended; 1160 bond_init.ioctl = bond_ioctl; 1161 bond_init.set_bpf_tap = bond_set_bpf_tap; 1162 bond_init.detach = bond_if_free; 1163 bond_init.broadcast_addr = etherbroadcastaddr; 1164 bond_init.broadcast_len = ETHER_ADDR_LEN; 1165 bond_init.softc = ifb; 1166 error = ifnet_allocate_extended(&bond_init, &ifp); 1167 1168 if (error) { 1169 ifbond_release(ifb); 1170 return (error); 1171 } 1172 1173 ifb->ifb_ifp = ifp; 1174 ifnet_set_offload(ifp, 0); 1175 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */ 1176 ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff); 1177 ifnet_set_baudrate(ifp, 0); 1178 ifnet_set_mtu(ifp, 0); 1179 1180 error = ifnet_attach(ifp, NULL); 1181 if (error != 0) { 1182 ifnet_release(ifp); 1183 ifbond_release(ifb); 1184 return (error); 1185 } 1186 error = ifbond_add_slow_proto_multicast(ifb); 1187 if (error != 0) { 1188 printf("bond_clone_create(%s): " 1189 "failed to add slow_proto multicast, %d\n", 1190 ifb->ifb_name, error); 1191 } 1192 1193 /* attach as ethernet */ 1194 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header)); 1195 1196 bond_lock(); 1197 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list); 1198 bond_unlock(); 1199 1200 return (0); 1201} 1202 1203static void 1204bond_remove_all_interfaces(ifbond_ref ifb) 1205{ 1206 bondport_ref p; 1207 1208 bond_assert_lock_held(); 1209 1210 /* 1211 * do this in reverse order to avoid re-programming the mac address 1212 * as each head interface is removed 1213 */ 1214 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) { 1215 bond_remove_interface(ifb, p->po_ifp); 1216 } 1217 return; 1218} 1219 1220static void 1221bond_remove(ifbond_ref ifb) 1222{ 1223 bond_assert_lock_held(); 1224 ifbond_flags_set_if_detaching(ifb); 1225 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list); 1226 bond_remove_all_interfaces(ifb); 1227 return; 1228} 1229 1230static void 1231bond_if_detach(struct ifnet * ifp) 1232{ 1233 int error; 1234 1235 error = ifnet_detach(ifp); 1236 if (error) { 1237 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n", 1238 ifnet_name(ifp), ifnet_unit(ifp), error); 1239 } 1240 1241 return; 1242} 1243 1244static int 1245bond_clone_destroy(struct ifnet * ifp) 1246{ 1247 ifbond_ref ifb; 1248 1249 bond_lock(); 1250 ifb = ifnet_softc(ifp); 1251 if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) { 1252 bond_unlock(); 1253 return 0; 1254 } 1255 if (ifbond_flags_if_detaching(ifb)) { 1256 bond_unlock(); 1257 return 0; 1258 } 1259 bond_remove(ifb); 1260 bond_unlock(); 1261 bond_if_detach(ifp); 1262 return 0; 1263} 1264 1265static int 1266bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func) 1267{ 1268 ifbond_ref ifb; 1269 1270 bond_lock(); 1271 ifb = ifnet_softc(ifp); 1272 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 1273 bond_unlock(); 1274 return (ENODEV); 1275 } 1276 switch (mode) { 1277 case BPF_TAP_DISABLE: 1278 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL; 1279 break; 1280 1281 case BPF_TAP_INPUT: 1282 ifb->ifb_bpf_input = func; 1283 break; 1284 1285 case BPF_TAP_OUTPUT: 1286 ifb->ifb_bpf_output = func; 1287 break; 1288 1289 case BPF_TAP_INPUT_OUTPUT: 1290 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func; 1291 break; 1292 default: 1293 break; 1294 } 1295 bond_unlock(); 1296 return 0; 1297} 1298 1299static uint32_t 1300ether_header_hash(struct ether_header * eh_p) 1301{ 1302 uint32_t h; 1303 1304 /* get 32-bits from destination ether and ether type */ 1305 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16) 1306 | eh_p->ether_type; 1307 h ^= *((uint32_t *)&eh_p->ether_dhost[0]); 1308 return (h); 1309} 1310 1311static struct mbuf * 1312S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset) 1313{ 1314 int len; 1315 1316 len = m->m_len; 1317 while (*offset >= len) { 1318 *offset -= len; 1319 m = m->m_next; 1320 if (m == NULL) { 1321 break; 1322 } 1323 len = m->m_len; 1324 } 1325 return (m); 1326} 1327 1328#if BYTE_ORDER == BIG_ENDIAN 1329static __inline__ uint32_t 1330make_uint32(u_char c0, u_char c1, u_char c2, u_char c3) 1331{ 1332 return (((uint32_t)c0 << 24) | ((uint32_t)c1 << 16) 1333 | ((uint32_t)c2 << 8) | (uint32_t)c3); 1334} 1335#else /* BYTE_ORDER == LITTLE_ENDIAN */ 1336static __inline__ uint32_t 1337make_uint32(u_char c0, u_char c1, u_char c2, u_char c3) 1338{ 1339 return (((uint32_t)c3 << 24) | ((uint32_t)c2 << 16) 1340 | ((uint32_t)c1 << 8) | (uint32_t)c0); 1341} 1342#endif /* BYTE_ORDER == LITTLE_ENDIAN */ 1343 1344static int 1345S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val) 1346{ 1347 struct mbuf * current; 1348 u_char * current_data; 1349 struct mbuf * next; 1350 u_char * next_data; 1351 int space_current; 1352 1353 current = S_mbuf_skip_to_offset(m, &offset); 1354 if (current == NULL) { 1355 return (1); 1356 } 1357 current_data = mtod(current, u_char *) + offset; 1358 space_current = current->m_len - offset; 1359 if (space_current >= (int)sizeof(uint32_t)) { 1360 *val = *((uint32_t *)current_data); 1361 return (0); 1362 } 1363 next = current->m_next; 1364 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) { 1365 return (1); 1366 } 1367 next_data = mtod(next, u_char *); 1368 switch (space_current) { 1369 case 1: 1370 *val = make_uint32(current_data[0], next_data[0], 1371 next_data[1], next_data[2]); 1372 break; 1373 case 2: 1374 *val = make_uint32(current_data[0], current_data[1], 1375 next_data[0], next_data[1]); 1376 break; 1377 default: 1378 *val = make_uint32(current_data[0], current_data[1], 1379 current_data[2], next_data[0]); 1380 break; 1381 } 1382 return (0); 1383} 1384 1385#define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p)) 1386#define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p)) 1387 1388static uint32_t 1389ip_header_hash(struct mbuf * m) 1390{ 1391 u_char * data; 1392 struct in_addr ip_dst; 1393 struct in_addr ip_src; 1394 u_char ip_p; 1395 int32_t offset; 1396 struct mbuf * orig_m = m; 1397 1398 /* find the IP protocol field relative to the start of the packet */ 1399 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header); 1400 m = S_mbuf_skip_to_offset(m, &offset); 1401 if (m == NULL || m->m_len < 1) { 1402 goto bad_ip_packet; 1403 } 1404 data = mtod(m, u_char *) + offset; 1405 ip_p = *data; 1406 1407 /* find the IP src relative to the IP protocol */ 1408 if ((m->m_len - offset) 1409 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) { 1410 /* this should be the normal case */ 1411 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET); 1412 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET); 1413 } 1414 else { 1415 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET, 1416 (uint32_t *)&ip_src.s_addr)) { 1417 goto bad_ip_packet; 1418 } 1419 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET, 1420 (uint32_t *)&ip_dst.s_addr)) { 1421 goto bad_ip_packet; 1422 } 1423 } 1424 return (ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p)); 1425 1426 bad_ip_packet: 1427 return (ether_header_hash(mtod(orig_m, struct ether_header *))); 1428} 1429 1430#define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2) 1431static uint32_t 1432ipv6_header_hash(struct mbuf * m) 1433{ 1434 u_char * data; 1435 int i; 1436 int32_t offset; 1437 struct mbuf * orig_m = m; 1438 uint32_t * scan; 1439 uint32_t val; 1440 1441 /* find the IP protocol field relative to the start of the packet */ 1442 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header); 1443 m = S_mbuf_skip_to_offset(m, &offset); 1444 if (m == NULL) { 1445 goto bad_ipv6_packet; 1446 } 1447 data = mtod(m, u_char *) + offset; 1448 val = 0; 1449 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) { 1450 /* this should be the normal case */ 1451 for (i = 0, scan = (uint32_t *)data; 1452 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); 1453 i++, scan++) { 1454 val ^= *scan; 1455 } 1456 } 1457 else { 1458 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) { 1459 uint32_t tmp; 1460 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t), 1461 (uint32_t *)&tmp)) { 1462 goto bad_ipv6_packet; 1463 } 1464 val ^= tmp; 1465 } 1466 } 1467 return (ntohl(val)); 1468 1469 bad_ipv6_packet: 1470 return (ether_header_hash(mtod(orig_m, struct ether_header *))); 1471} 1472 1473static int 1474bond_output(struct ifnet * ifp, struct mbuf * m) 1475{ 1476 bpf_packet_func bpf_func; 1477 uint32_t h; 1478 ifbond_ref ifb; 1479 struct ifnet * port_ifp = NULL; 1480 int err; 1481 struct flowadv adv = { FADV_SUCCESS }; 1482 1483 if (m == 0) { 1484 return (0); 1485 } 1486 if ((m->m_flags & M_PKTHDR) == 0) { 1487 m_freem(m); 1488 return (0); 1489 } 1490 if (m->m_pkthdr.pkt_flowid != 0) { 1491 h = m->m_pkthdr.pkt_flowid; 1492 } 1493 else { 1494 struct ether_header * eh_p; 1495 1496 eh_p = mtod(m, struct ether_header *); 1497 switch (ntohs(eh_p->ether_type)) { 1498 case ETHERTYPE_IP: 1499 h = ip_header_hash(m); 1500 break; 1501 case ETHERTYPE_IPV6: 1502 h = ipv6_header_hash(m); 1503 break; 1504 default: 1505 h = ether_header_hash(eh_p); 1506 break; 1507 } 1508 } 1509 bond_lock(); 1510 ifb = ifnet_softc(ifp); 1511 if (ifb == NULL || ifbond_flags_if_detaching(ifb) 1512 || ifb->ifb_distributing_count == 0) { 1513 goto done; 1514 } 1515 h %= ifb->ifb_distributing_count; 1516 port_ifp = ifb->ifb_distributing_array[h]->po_ifp; 1517 bpf_func = ifb->ifb_bpf_output; 1518 bond_unlock(); 1519 1520 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { 1521 (void)ifnet_stat_increment_out(ifp, 1, 1522 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN, 1523 0); 1524 } else { 1525 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0); 1526 } 1527 bond_bpf_output(ifp, m, bpf_func); 1528 1529 err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv); 1530 1531 if (err == 0) { 1532 if (adv.code == FADV_FLOW_CONTROLLED) { 1533 err = EQFULL; 1534 } else if (adv.code == FADV_SUSPENDED) { 1535 err = EQSUSPENDED; 1536 } 1537 } 1538 1539 return (err); 1540 1541 done: 1542 bond_unlock(); 1543 m_freem(m); 1544 return (0); 1545} 1546 1547static bondport_ref 1548ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp) 1549{ 1550 bondport_ref p; 1551 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { 1552 if (p->po_ifp == port_ifp) { 1553 return (p); 1554 } 1555 } 1556 return (NULL); 1557} 1558 1559static bondport_ref 1560bond_lookup_port(struct ifnet * port_ifp) 1561{ 1562 ifbond_ref ifb; 1563 bondport_ref port; 1564 1565 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) { 1566 port = ifbond_lookup_port(ifb, port_ifp); 1567 if (port != NULL) { 1568 return (port); 1569 } 1570 } 1571 return (NULL); 1572} 1573 1574static void 1575bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp) 1576{ 1577 struct ifnet * bond_ifp = NULL; 1578 ifbond_ref ifb; 1579 int event_code = 0; 1580 bondport_ref p; 1581 1582 bond_lock(); 1583 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) { 1584 goto done; 1585 } 1586 p = bond_lookup_port(port_ifp); 1587 if (p == NULL) { 1588 goto done; 1589 } 1590 if (p->po_enabled == 0) { 1591 goto done; 1592 } 1593 ifb = p->po_bond; 1594 if (ifb->ifb_mode != IF_BOND_MODE_LACP) { 1595 goto done; 1596 } 1597 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data); 1598 if (ifbond_selection(ifb)) { 1599 event_code = (ifb->ifb_active_lag == NULL) 1600 ? KEV_DL_LINK_OFF 1601 : KEV_DL_LINK_ON; 1602 /* XXX need to take a reference on bond_ifp */ 1603 bond_ifp = ifb->ifb_ifp; 1604 ifb->ifb_last_link_event = event_code; 1605 } 1606 else { 1607 event_code = (ifb->ifb_active_lag == NULL) 1608 ? KEV_DL_LINK_OFF 1609 : KEV_DL_LINK_ON; 1610 if (event_code != ifb->ifb_last_link_event) { 1611 if (g_bond->verbose) { 1612 timestamp_printf("%s: (receive) generating LINK event\n", 1613 ifb->ifb_name); 1614 } 1615 bond_ifp = ifb->ifb_ifp; 1616 ifb->ifb_last_link_event = event_code; 1617 } 1618 } 1619 1620 done: 1621 bond_unlock(); 1622 if (bond_ifp != NULL) { 1623 interface_link_event(bond_ifp, event_code); 1624 } 1625 m_freem(m); 1626 return; 1627} 1628 1629static void 1630bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp) 1631{ 1632 la_marker_pdu_ref marker_p; 1633 bondport_ref p; 1634 1635 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN); 1636 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) { 1637 goto failed; 1638 } 1639 bond_lock(); 1640 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) { 1641 bond_unlock(); 1642 goto failed; 1643 } 1644 p = bond_lookup_port(port_ifp); 1645 if (p == NULL || p->po_enabled == 0 1646 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) { 1647 bond_unlock(); 1648 goto failed; 1649 } 1650 /* echo back the same packet as a marker response */ 1651 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE; 1652 bondport_slow_proto_transmit(p, (packet_buffer_ref)m); 1653 bond_unlock(); 1654 return; 1655 1656 failed: 1657 m_freem(m); 1658 return; 1659} 1660 1661static int 1662bond_input(ifnet_t port_ifp, __unused protocol_family_t protocol, mbuf_t m, 1663 char * frame_header) 1664{ 1665 bpf_packet_func bpf_func; 1666 const struct ether_header * eh_p; 1667 ifbond_ref ifb; 1668 struct ifnet * ifp; 1669 bondport_ref p; 1670 1671 eh_p = (const struct ether_header *)frame_header; 1672 if ((m->m_flags & M_MCAST) != 0 1673 && bcmp(eh_p->ether_dhost, &slow_proto_multicast, 1674 sizeof(eh_p->ether_dhost)) == 0 1675 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) { 1676 u_char subtype = *mtod(m, u_char *); 1677 1678 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) { 1679 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) { 1680 m_freem(m); 1681 return (0); 1682 } 1683 /* send to lacp */ 1684 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) { 1685 m = m_pullup(m, offsetof(lacpdu, la_reserved)); 1686 if (m == NULL) { 1687 return (0); 1688 } 1689 } 1690 bond_receive_lacpdu(m, port_ifp); 1691 return (0); 1692 } 1693 else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) { 1694 int min_size; 1695 1696 /* restore the ethernet header pointer in the mbuf */ 1697 m->m_pkthdr.len += ETHER_HDR_LEN; 1698 m->m_data -= ETHER_HDR_LEN; 1699 m->m_len += ETHER_HDR_LEN; 1700 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved); 1701 if (m->m_pkthdr.len < min_size) { 1702 m_freem(m); 1703 return (0); 1704 } 1705 /* send to lacp */ 1706 if (m->m_len < min_size) { 1707 m = m_pullup(m, min_size); 1708 if (m == NULL) { 1709 return (0); 1710 } 1711 } 1712 /* send to marker responder */ 1713 bond_receive_la_marker_pdu(m, port_ifp); 1714 return (0); 1715 } 1716 else if (subtype == 0 1717 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) { 1718 /* invalid subtype, discard the frame */ 1719 m_freem(m); 1720 return (0); 1721 } 1722 } 1723 bond_lock(); 1724 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) { 1725 goto done; 1726 } 1727 p = bond_lookup_port(port_ifp); 1728 if (p == NULL || bondport_collecting(p) == 0) { 1729 goto done; 1730 } 1731 1732 /* make the packet appear as if it arrived on the bonded interface */ 1733 ifb = p->po_bond; 1734 ifp = ifb->ifb_ifp; 1735 bpf_func = ifb->ifb_bpf_input; 1736 bond_unlock(); 1737 1738 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { 1739 (void)ifnet_stat_increment_in(ifp, 1, 1740 (m->m_pkthdr.len + ETHER_HDR_LEN 1741 + ETHER_VLAN_ENCAP_LEN), 0); 1742 } 1743 else { 1744 (void)ifnet_stat_increment_in(ifp, 1, 1745 (m->m_pkthdr.len + ETHER_HDR_LEN), 0); 1746 } 1747 m->m_pkthdr.rcvif = ifp; 1748 bond_bpf_input(ifp, m, eh_p, bpf_func); 1749 m->m_pkthdr.pkt_hdr = frame_header; 1750 dlil_input_packet_list(ifp, m); 1751 return 0; 1752 1753 done: 1754 bond_unlock(); 1755 m_freem(m); 1756 return (0); 1757} 1758 1759static __inline__ const char * 1760bondport_get_name(bondport_ref p) 1761{ 1762 return (p->po_name); 1763} 1764 1765static __inline__ int 1766bondport_get_index(bondport_ref p) 1767{ 1768 return (ifnet_index(p->po_ifp)); 1769} 1770 1771static void 1772bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf) 1773{ 1774 struct ether_header * eh_p; 1775 int error; 1776 1777 /* packet_buffer_allocate leaves room for ethernet header */ 1778 eh_p = mtod(buf, struct ether_header *); 1779 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost)); 1780 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost)); 1781 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE); 1782 error = ifnet_output_raw(p->po_ifp, PF_BOND, buf); 1783 if (error != 0) { 1784 printf("bondport_slow_proto_transmit(%s) failed %d\n", 1785 bondport_get_name(p), error); 1786 } 1787 return; 1788} 1789 1790static void 1791bondport_timer_process_func(devtimer_ref timer, 1792 devtimer_process_func_event event) 1793{ 1794 bondport_ref p; 1795 1796 switch (event) { 1797 case devtimer_process_func_event_lock: 1798 bond_lock(); 1799 devtimer_retain(timer); 1800 break; 1801 case devtimer_process_func_event_unlock: 1802 if (devtimer_valid(timer)) { 1803 /* as long as the devtimer is valid, we can look at arg0 */ 1804 int event_code = 0; 1805 struct ifnet * bond_ifp = NULL; 1806 1807 p = (bondport_ref)devtimer_arg0(timer); 1808 if (ifbond_selection(p->po_bond)) { 1809 event_code = (p->po_bond->ifb_active_lag == NULL) 1810 ? KEV_DL_LINK_OFF 1811 : KEV_DL_LINK_ON; 1812 /* XXX need to take a reference on bond_ifp */ 1813 bond_ifp = p->po_bond->ifb_ifp; 1814 p->po_bond->ifb_last_link_event = event_code; 1815 } 1816 else { 1817 event_code = (p->po_bond->ifb_active_lag == NULL) 1818 ? KEV_DL_LINK_OFF 1819 : KEV_DL_LINK_ON; 1820 if (event_code != p->po_bond->ifb_last_link_event) { 1821 if (g_bond->verbose) { 1822 timestamp_printf("%s: (timer) generating LINK event\n", 1823 p->po_bond->ifb_name); 1824 } 1825 bond_ifp = p->po_bond->ifb_ifp; 1826 p->po_bond->ifb_last_link_event = event_code; 1827 } 1828 } 1829 devtimer_release(timer); 1830 bond_unlock(); 1831 if (bond_ifp != NULL) { 1832 interface_link_event(bond_ifp, event_code); 1833 } 1834 } 1835 else { 1836 /* timer is going away */ 1837 devtimer_release(timer); 1838 bond_unlock(); 1839 } 1840 break; 1841 default: 1842 break; 1843 } 1844} 1845 1846static bondport_ref 1847bondport_create(struct ifnet * port_ifp, lacp_port_priority priority, 1848 int active, int short_timeout, int * ret_error) 1849{ 1850 int error = 0; 1851 bondport_ref p = NULL; 1852 lacp_actor_partner_state s; 1853 1854 *ret_error = 0; 1855 p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK); 1856 if (p == NULL) { 1857 *ret_error = ENOMEM; 1858 return (NULL); 1859 } 1860 bzero(p, sizeof(*p)); 1861 multicast_list_init(&p->po_multicast); 1862 if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d", 1863 ifnet_name(port_ifp), ifnet_unit(port_ifp)) 1864 >= sizeof(p->po_name)) { 1865 printf("if_bond: name too large\n"); 1866 *ret_error = EINVAL; 1867 goto failed; 1868 } 1869 error = siocgifdevmtu(port_ifp, &p->po_devmtu); 1870 if (error != 0) { 1871 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n", 1872 bondport_get_name(p), error); 1873 goto failed; 1874 } 1875 /* remember the current interface MTU so it can be restored */ 1876 p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp); 1877 p->po_ifp = port_ifp; 1878 p->po_media_info = interface_media_info(port_ifp); 1879 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p); 1880 if (p->po_current_while_timer == NULL) { 1881 *ret_error = ENOMEM; 1882 goto failed; 1883 } 1884 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p); 1885 if (p->po_periodic_timer == NULL) { 1886 *ret_error = ENOMEM; 1887 goto failed; 1888 } 1889 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p); 1890 if (p->po_wait_while_timer == NULL) { 1891 *ret_error = ENOMEM; 1892 goto failed; 1893 } 1894 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p); 1895 if (p->po_transmit_timer == NULL) { 1896 *ret_error = ENOMEM; 1897 goto failed; 1898 } 1899 p->po_receive_state = ReceiveState_none; 1900 p->po_mux_state = MuxState_none; 1901 p->po_priority = priority; 1902 s = 0; 1903 s = lacp_actor_partner_state_set_aggregatable(s); 1904 if (short_timeout) { 1905 s = lacp_actor_partner_state_set_short_timeout(s); 1906 } 1907 if (active) { 1908 s = lacp_actor_partner_state_set_active_lacp(s); 1909 } 1910 p->po_actor_state = s; 1911 return (p); 1912 1913 failed: 1914 bondport_free(p); 1915 return (NULL); 1916} 1917 1918static void 1919bondport_start(bondport_ref p) 1920{ 1921 bondport_receive_machine(p, LAEventStart, NULL); 1922 bondport_mux_machine(p, LAEventStart, NULL); 1923 bondport_periodic_transmit_machine(p, LAEventStart, NULL); 1924 bondport_transmit_machine(p, LAEventStart, NULL); 1925 return; 1926} 1927 1928/* 1929 * Function: bondport_invalidate_timers 1930 * Purpose: 1931 * Invalidate all of the timers for the bondport. 1932 */ 1933static void 1934bondport_invalidate_timers(bondport_ref p) 1935{ 1936 devtimer_invalidate(p->po_current_while_timer); 1937 devtimer_invalidate(p->po_periodic_timer); 1938 devtimer_invalidate(p->po_wait_while_timer); 1939 devtimer_invalidate(p->po_transmit_timer); 1940} 1941 1942/* 1943 * Function: bondport_cancel_timers 1944 * Purpose: 1945 * Cancel all of the timers for the bondport. 1946 */ 1947static void 1948bondport_cancel_timers(bondport_ref p) 1949{ 1950 devtimer_cancel(p->po_current_while_timer); 1951 devtimer_cancel(p->po_periodic_timer); 1952 devtimer_cancel(p->po_wait_while_timer); 1953 devtimer_cancel(p->po_transmit_timer); 1954} 1955 1956static void 1957bondport_free(bondport_ref p) 1958{ 1959 multicast_list_remove(&p->po_multicast); 1960 devtimer_release(p->po_current_while_timer); 1961 devtimer_release(p->po_periodic_timer); 1962 devtimer_release(p->po_wait_while_timer); 1963 devtimer_release(p->po_transmit_timer); 1964 FREE(p, M_BOND); 1965 return; 1966} 1967 1968#define BOND_ADD_PROGRESS_IN_LIST 0x1 1969#define BOND_ADD_PROGRESS_PROTO_ATTACHED 0x2 1970#define BOND_ADD_PROGRESS_LLADDR_SET 0x4 1971#define BOND_ADD_PROGRESS_MTU_SET 0x8 1972 1973static __inline__ int 1974bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb) 1975{ 1976 return (((int)ifnet_mtu(ifp) > ifb->ifb_altmtu) 1977 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu); 1978} 1979 1980static int 1981bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp) 1982{ 1983 int devmtu; 1984 int error = 0; 1985 int event_code = 0; 1986 int first = FALSE; 1987 ifbond_ref ifb; 1988 bondport_ref * new_array = NULL; 1989 bondport_ref * old_array = NULL; 1990 bondport_ref p; 1991 int progress = 0; 1992 1993 /* pre-allocate space for new port */ 1994 p = bondport_create(port_ifp, 0x8000, 1, 0, &error); 1995 if (p == NULL) { 1996 return (error); 1997 } 1998 bond_lock(); 1999 ifb = (ifbond_ref)ifnet_softc(ifp); 2000 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2001 bond_unlock(); 2002 bondport_free(p); 2003 return ((ifb == NULL ? EOPNOTSUPP : EBUSY)); 2004 } 2005 2006 /* make sure this interface can handle our current MTU */ 2007 devmtu = bond_device_mtu(ifp, ifb); 2008 if (devmtu != 0 2009 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) { 2010 bond_unlock(); 2011 printf("if_bond: interface %s doesn't support mtu %d", 2012 bondport_get_name(p), devmtu); 2013 bondport_free(p); 2014 return (EINVAL); 2015 } 2016 2017 /* make sure ifb doesn't get de-allocated while we wait */ 2018 ifbond_retain(ifb); 2019 2020 /* wait for other add or remove to complete */ 2021 ifbond_wait(ifb, "bond_add_interface"); 2022 2023 if (ifbond_flags_if_detaching(ifb)) { 2024 /* someone destroyed the bond while we were waiting */ 2025 error = EBUSY; 2026 goto signal_done; 2027 } 2028 if (bond_lookup_port(port_ifp) != NULL) { 2029 /* port is already part of a bond */ 2030 error = EBUSY; 2031 goto signal_done; 2032 } 2033 ifnet_lock_exclusive(port_ifp); 2034 if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) { 2035 /* interface already has VLAN's, or is part of bond */ 2036 ifnet_lock_done(port_ifp); 2037 error = EBUSY; 2038 goto signal_done; 2039 } 2040 2041 /* mark the interface busy */ 2042 /* can't use ifnet_set_eflags because that takes the lock */ 2043 port_ifp->if_eflags |= IFEF_BOND; 2044 ifnet_lock_done(port_ifp); 2045 2046 if (TAILQ_EMPTY(&ifb->ifb_port_list)) { 2047 ifnet_set_offload(ifp, ifnet_offload(port_ifp)); 2048 ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING); 2049 if (ifbond_flags_lladdr(ifb) == FALSE) { 2050 first = TRUE; 2051 } 2052 } else { 2053 ifnet_offload_t ifp_offload; 2054 ifnet_offload_t port_ifp_offload; 2055 2056 ifp_offload = ifnet_offload(ifp); 2057 port_ifp_offload = ifnet_offload(port_ifp); 2058 if (ifp_offload != port_ifp_offload) { 2059 ifnet_offload_t offload; 2060 2061 offload = ifp_offload & port_ifp_offload; 2062 printf("bond_add_interface(%s, %s) " 2063 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n", 2064 ifb->ifb_name, bondport_get_name(p), 2065 ifp_offload, port_ifp_offload, offload); 2066 /* 2067 * XXX 2068 * if the bond has VLAN's, we can't simply change the hwassist 2069 * field behind its back: this needs work 2070 */ 2071 ifnet_set_offload(ifp, offload); 2072 } 2073 } 2074 p->po_bond = ifb; 2075 2076 /* remember the port's ethernet address so it can be restored */ 2077 ether_addr_copy(&p->po_saved_addr, IF_LLADDR(port_ifp)); 2078 2079 /* add it to the list of ports */ 2080 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list); 2081 ifb->ifb_port_count++; 2082 2083 /* set the default MTU */ 2084 if (ifnet_mtu(ifp) == 0) { 2085 ifnet_set_mtu(ifp, ETHERMTU); 2086 } 2087 bond_unlock(); 2088 2089 2090 /* first port added to bond determines bond's ethernet address */ 2091 if (first) { 2092 ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN, 2093 IFT_ETHER); 2094 } 2095 2096 progress |= BOND_ADD_PROGRESS_IN_LIST; 2097 2098 /* allocate a larger distributing array */ 2099 new_array = (bondport_ref *) 2100 _MALLOC(sizeof(*new_array) * ifb->ifb_port_count, M_BOND, M_WAITOK); 2101 if (new_array == NULL) { 2102 error = ENOMEM; 2103 goto failed; 2104 } 2105 2106 /* attach our BOND "protocol" to the interface */ 2107 error = bond_attach_protocol(port_ifp); 2108 if (error) { 2109 goto failed; 2110 } 2111 progress |= BOND_ADD_PROGRESS_PROTO_ATTACHED; 2112 2113 /* set the interface MTU */ 2114 devmtu = bond_device_mtu(ifp, ifb); 2115 error = siocsifmtu(port_ifp, devmtu); 2116 if (error != 0) { 2117 printf("bond_add_interface(%s, %s):" 2118 " SIOCSIFMTU %d failed %d\n", 2119 ifb->ifb_name, bondport_get_name(p), devmtu, error); 2120 goto failed; 2121 } 2122 progress |= BOND_ADD_PROGRESS_MTU_SET; 2123 2124 /* program the port with our multicast addresses */ 2125 error = multicast_list_program(&p->po_multicast, ifp, port_ifp); 2126 if (error) { 2127 printf("bond_add_interface(%s, %s):" 2128 " multicast_list_program failed %d\n", 2129 ifb->ifb_name, bondport_get_name(p), error); 2130 goto failed; 2131 } 2132 2133 /* mark the interface up */ 2134 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP); 2135 2136 error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL); 2137 if (error != 0) { 2138 printf("bond_add_interface(%s, %s): SIOCSIFFLAGS failed %d\n", 2139 ifb->ifb_name, bondport_get_name(p), error); 2140 goto failed; 2141 } 2142 2143 /* re-program the port's ethernet address */ 2144 error = if_siflladdr(port_ifp, 2145 (const struct ether_addr *)IF_LLADDR(ifp)); 2146 if (error != 0) { 2147 /* port doesn't support setting the link address */ 2148 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n", 2149 ifb->ifb_name, bondport_get_name(p), error); 2150 goto failed; 2151 } 2152 progress |= BOND_ADD_PROGRESS_LLADDR_SET; 2153 2154 bond_lock(); 2155 2156 /* no failures past this point */ 2157 p->po_enabled = 1; 2158 2159 /* copy the contents of the existing distributing array */ 2160 if (ifb->ifb_distributing_count) { 2161 bcopy(ifb->ifb_distributing_array, new_array, 2162 sizeof(*new_array) * ifb->ifb_distributing_count); 2163 } 2164 old_array = ifb->ifb_distributing_array; 2165 ifb->ifb_distributing_array = new_array; 2166 2167 if (ifb->ifb_mode == IF_BOND_MODE_LACP) { 2168 bondport_start(p); 2169 2170 /* check if we need to generate a link status event */ 2171 if (ifbond_selection(ifb)) { 2172 event_code = (ifb->ifb_active_lag == NULL) 2173 ? KEV_DL_LINK_OFF 2174 : KEV_DL_LINK_ON; 2175 ifb->ifb_last_link_event = event_code; 2176 } 2177 } 2178 else { 2179 /* are we adding the first distributing interface? */ 2180 if (media_active(&p->po_media_info)) { 2181 if (ifb->ifb_distributing_count == 0) { 2182 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON; 2183 } 2184 bondport_enable_distributing(p); 2185 } 2186 else { 2187 bondport_disable_distributing(p); 2188 } 2189 } 2190 /* clear the busy state, and wakeup anyone waiting */ 2191 ifbond_signal(ifb, "bond_add_interface"); 2192 bond_unlock(); 2193 if (event_code != 0) { 2194 interface_link_event(ifp, event_code); 2195 } 2196 if (old_array != NULL) { 2197 FREE(old_array, M_BOND); 2198 } 2199 return 0; 2200 2201 failed: 2202 bond_assert_lock_not_held(); 2203 2204 /* if this was the first port to be added, clear our address */ 2205 if (first) { 2206 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG); 2207 } 2208 2209 if (new_array != NULL) { 2210 FREE(new_array, M_BOND); 2211 } 2212 if ((progress & BOND_ADD_PROGRESS_LLADDR_SET) != 0) { 2213 int error1; 2214 2215 error1 = if_siflladdr(port_ifp, &p->po_saved_addr); 2216 if (error1 != 0) { 2217 printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n", 2218 ifb->ifb_name, bondport_get_name(p), error1); 2219 } 2220 } 2221 if ((progress & BOND_ADD_PROGRESS_PROTO_ATTACHED) != 0) { 2222 (void)bond_detach_protocol(port_ifp); 2223 } 2224 if ((progress & BOND_ADD_PROGRESS_MTU_SET) != 0) { 2225 int error1; 2226 2227 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current); 2228 if (error1 != 0) { 2229 printf("bond_add_interface(%s, %s): SIOCSIFMTU %d failed %d\n", 2230 ifb->ifb_name, bondport_get_name(p), 2231 p->po_devmtu.ifdm_current, error1); 2232 } 2233 } 2234 bond_lock(); 2235 if ((progress & BOND_ADD_PROGRESS_IN_LIST) != 0) { 2236 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list); 2237 ifb->ifb_port_count--; 2238 } 2239 ifnet_set_eflags(ifp, 0, IFEF_BOND); 2240 if (TAILQ_EMPTY(&ifb->ifb_port_list)) { 2241 ifb->ifb_altmtu = 0; 2242 ifnet_set_mtu(ifp, 0); 2243 ifnet_set_offload(ifp, 0); 2244 } 2245 2246 signal_done: 2247 ifbond_signal(ifb, "bond_add_interface"); 2248 bond_unlock(); 2249 ifbond_release(ifb); 2250 bondport_free(p); 2251 return (error); 2252} 2253 2254static int 2255bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp) 2256{ 2257 int active_lag = 0; 2258 int error = 0; 2259 int event_code = 0; 2260 bondport_ref head_port; 2261 struct ifnet * ifp; 2262 int last = FALSE; 2263 int new_link_address = FALSE; 2264 bondport_ref p; 2265 lacp_actor_partner_state s; 2266 int was_distributing; 2267 2268 bond_assert_lock_held(); 2269 2270 ifbond_retain(ifb); 2271 ifbond_wait(ifb, "bond_remove_interface"); 2272 2273 p = ifbond_lookup_port(ifb, port_ifp); 2274 if (p == NULL) { 2275 error = ENXIO; 2276 /* it got removed by another thread */ 2277 goto signal_done; 2278 } 2279 2280 /* de-select it and remove it from the lists */ 2281 was_distributing = bondport_flags_distributing(p); 2282 bondport_disable_distributing(p); 2283 if (ifb->ifb_mode == IF_BOND_MODE_LACP) { 2284 bondport_set_selected(p, SelectedState_UNSELECTED); 2285 active_lag = bondport_remove_from_LAG(p); 2286 /* invalidate timers here while holding the bond_lock */ 2287 bondport_invalidate_timers(p); 2288 2289 /* announce that we're Individual now */ 2290 s = p->po_actor_state; 2291 s = lacp_actor_partner_state_set_individual(s); 2292 s = lacp_actor_partner_state_set_not_collecting(s); 2293 s = lacp_actor_partner_state_set_not_distributing(s); 2294 s = lacp_actor_partner_state_set_out_of_sync(s); 2295 p->po_actor_state = s; 2296 bondport_flags_set_ntt(p); 2297 } 2298 2299 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list); 2300 ifb->ifb_port_count--; 2301 2302 ifp = ifb->ifb_ifp; 2303 head_port = TAILQ_FIRST(&ifb->ifb_port_list); 2304 if (head_port == NULL) { 2305 ifnet_set_flags(ifp, 0, IFF_RUNNING); 2306 if (ifbond_flags_lladdr(ifb) == FALSE) { 2307 last = TRUE; 2308 } 2309 ifnet_set_offload(ifp, 0); 2310 ifnet_set_mtu(ifp, 0); 2311 ifb->ifb_altmtu = 0; 2312 } else if (ifbond_flags_lladdr(ifb) == FALSE 2313 && bcmp(&p->po_saved_addr, IF_LLADDR(ifp), 2314 ETHER_ADDR_LEN) == 0) { 2315 new_link_address = TRUE; 2316 } 2317 /* check if we need to generate a link status event */ 2318 if (ifb->ifb_mode == IF_BOND_MODE_LACP ) { 2319 if (ifbond_selection(ifb) || active_lag) { 2320 event_code = (ifb->ifb_active_lag == NULL) 2321 ? KEV_DL_LINK_OFF 2322 : KEV_DL_LINK_ON; 2323 ifb->ifb_last_link_event = event_code; 2324 } 2325 bondport_transmit_machine(p, LAEventStart, 2326 TRANSMIT_MACHINE_TX_IMMEDIATE); 2327 } 2328 else { 2329 /* are we removing the last distributing interface? */ 2330 if (was_distributing && ifb->ifb_distributing_count == 0) { 2331 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF; 2332 } 2333 } 2334 2335 bond_unlock(); 2336 2337 if (last) { 2338 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG); 2339 } 2340 else if (new_link_address) { 2341 struct ifnet * scan_ifp; 2342 bondport_ref scan_port; 2343 2344 /* ifbond_wait() allows port list traversal without holding the lock */ 2345 2346 /* this port gave the bond its ethernet address, switch to new one */ 2347 ifnet_set_lladdr_and_type(ifp, 2348 &head_port->po_saved_addr, ETHER_ADDR_LEN, 2349 IFT_ETHER); 2350 2351 /* re-program each port with the new link address */ 2352 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) { 2353 scan_ifp = scan_port->po_ifp; 2354 2355 error = if_siflladdr(scan_ifp, 2356 (const struct ether_addr *) IF_LLADDR(ifp)); 2357 if (error != 0) { 2358 printf("bond_remove_interface(%s, %s): " 2359 "if_siflladdr (%s) failed %d\n", 2360 ifb->ifb_name, bondport_get_name(p), 2361 bondport_get_name(scan_port), error); 2362 } 2363 } 2364 } 2365 2366 /* restore the port's ethernet address */ 2367 error = if_siflladdr(port_ifp, &p->po_saved_addr); 2368 if (error != 0) { 2369 printf("bond_remove_interface(%s, %s): if_siflladdr failed %d\n", 2370 ifb->ifb_name, bondport_get_name(p), error); 2371 } 2372 2373 /* restore the port's MTU */ 2374 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current); 2375 if (error != 0) { 2376 printf("bond_remove_interface(%s, %s): SIOCSIFMTU %d failed %d\n", 2377 ifb->ifb_name, bondport_get_name(p), 2378 p->po_devmtu.ifdm_current, error); 2379 } 2380 2381 /* remove the bond "protocol" */ 2382 bond_detach_protocol(port_ifp); 2383 2384 /* generate link event */ 2385 if (event_code != 0) { 2386 interface_link_event(ifp, event_code); 2387 } 2388 2389 bond_lock(); 2390 bondport_free(p); 2391 ifnet_set_eflags(port_ifp, 0, IFEF_BOND); 2392 /* release this bondport's reference to the ifbond */ 2393 ifbond_release(ifb); 2394 2395 signal_done: 2396 ifbond_signal(ifb, "bond_remove_interface"); 2397 ifbond_release(ifb); 2398 return (error); 2399} 2400 2401static void 2402bond_set_lacp_mode(ifbond_ref ifb) 2403{ 2404 bondport_ref p; 2405 2406 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { 2407 bondport_disable_distributing(p); 2408 bondport_start(p); 2409 } 2410 return; 2411} 2412 2413static void 2414bond_set_static_mode(ifbond_ref ifb) 2415{ 2416 bondport_ref p; 2417 lacp_actor_partner_state s; 2418 2419 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { 2420 bondport_disable_distributing(p); 2421 bondport_set_selected(p, SelectedState_UNSELECTED); 2422 (void)bondport_remove_from_LAG(p); 2423 bondport_cancel_timers(p); 2424 2425 /* announce that we're Individual now */ 2426 s = p->po_actor_state; 2427 s = lacp_actor_partner_state_set_individual(s); 2428 s = lacp_actor_partner_state_set_not_collecting(s); 2429 s = lacp_actor_partner_state_set_not_distributing(s); 2430 s = lacp_actor_partner_state_set_out_of_sync(s); 2431 p->po_actor_state = s; 2432 bondport_flags_set_ntt(p); 2433 bondport_transmit_machine(p, LAEventStart, 2434 TRANSMIT_MACHINE_TX_IMMEDIATE); 2435 /* clear state */ 2436 p->po_actor_state = 0; 2437 bzero(&p->po_partner_state, sizeof(p->po_partner_state)); 2438 2439 if (media_active(&p->po_media_info)) { 2440 bondport_enable_distributing(p); 2441 } 2442 else { 2443 bondport_disable_distributing(p); 2444 } 2445 } 2446 return; 2447} 2448 2449static int 2450bond_set_mode(struct ifnet * ifp, int mode) 2451{ 2452 int error = 0; 2453 int event_code = 0; 2454 ifbond_ref ifb; 2455 2456 bond_lock(); 2457 ifb = (ifbond_ref)ifnet_softc(ifp); 2458 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2459 bond_unlock(); 2460 return ((ifb == NULL) ? EOPNOTSUPP : EBUSY); 2461 } 2462 if (ifb->ifb_mode == mode) { 2463 bond_unlock(); 2464 return (0); 2465 } 2466 2467 ifbond_retain(ifb); 2468 ifbond_wait(ifb, "bond_set_mode"); 2469 2470 /* verify (again) that the mode is actually different */ 2471 if (ifb->ifb_mode == mode) { 2472 /* nothing to do */ 2473 goto signal_done; 2474 } 2475 2476 ifb->ifb_mode = mode; 2477 if (mode == IF_BOND_MODE_LACP) { 2478 bond_set_lacp_mode(ifb); 2479 2480 /* check if we need to generate a link status event */ 2481 if (ifbond_selection(ifb)) { 2482 event_code = (ifb->ifb_active_lag == NULL) 2483 ? KEV_DL_LINK_OFF 2484 : KEV_DL_LINK_ON; 2485 } 2486 } else { 2487 bond_set_static_mode(ifb); 2488 event_code = (ifb->ifb_distributing_count == 0) 2489 ? KEV_DL_LINK_OFF 2490 : KEV_DL_LINK_ON; 2491 } 2492 ifb->ifb_last_link_event = event_code; 2493 2494 signal_done: 2495 ifbond_signal(ifb, "bond_set_mode"); 2496 bond_unlock(); 2497 ifbond_release(ifb); 2498 2499 if (event_code != 0) { 2500 interface_link_event(ifp, event_code); 2501 } 2502 return (error); 2503} 2504 2505static int 2506bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap) 2507{ 2508 int count; 2509 user_addr_t dst; 2510 int error = 0; 2511 struct if_bond_status_req * ibsr; 2512 struct if_bond_status ibs; 2513 bondport_ref port; 2514 2515 ibsr = &(ibr_p->ibr_ibru.ibru_status); 2516 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) { 2517 return (EINVAL); 2518 } 2519 ibsr->ibsr_key = ifb->ifb_key; 2520 ibsr->ibsr_mode = ifb->ifb_mode; 2521 ibsr->ibsr_total = ifb->ifb_port_count; 2522 dst = proc_is64bit(current_proc()) 2523 ? ibsr->ibsr_ibsru.ibsru_buffer64 2524 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer); 2525 if (dst == USER_ADDR_NULL) { 2526 /* just want to know how many there are */ 2527 goto done; 2528 } 2529 if (ibsr->ibsr_count < 0) { 2530 return (EINVAL); 2531 } 2532 count = (ifb->ifb_port_count < ibsr->ibsr_count) 2533 ? ifb->ifb_port_count : ibsr->ibsr_count; 2534 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) { 2535 struct if_bond_partner_state * ibps_p; 2536 partner_state_ref ps; 2537 2538 if (count == 0) { 2539 break; 2540 } 2541 bzero(&ibs, sizeof(ibs)); 2542 strncpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name)); 2543 ibs.ibs_port_priority = port->po_priority; 2544 if (ifb->ifb_mode == IF_BOND_MODE_LACP) { 2545 ibs.ibs_state = port->po_actor_state; 2546 ibs.ibs_selected_state = port->po_selected; 2547 ps = &port->po_partner_state; 2548 ibps_p = &ibs.ibs_partner_state; 2549 ibps_p->ibps_system = ps->ps_lag_info.li_system; 2550 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority; 2551 ibps_p->ibps_key = ps->ps_lag_info.li_key; 2552 ibps_p->ibps_port = ps->ps_port; 2553 ibps_p->ibps_port_priority = ps->ps_port_priority; 2554 ibps_p->ibps_state = ps->ps_state; 2555 } 2556 else { 2557 /* fake the selected information */ 2558 ibs.ibs_selected_state = bondport_flags_distributing(port) 2559 ? SelectedState_SELECTED : SelectedState_UNSELECTED; 2560 } 2561 error = copyout(&ibs, dst, sizeof(ibs)); 2562 if (error != 0) { 2563 break; 2564 } 2565 dst += sizeof(ibs); 2566 count--; 2567 } 2568 2569 done: 2570 if (error == 0) { 2571 error = copyout(ibr_p, datap, sizeof(*ibr_p)); 2572 } 2573 else { 2574 (void)copyout(ibr_p, datap, sizeof(*ibr_p)); 2575 } 2576 return (error); 2577} 2578 2579static int 2580bond_set_promisc(__unused struct ifnet *ifp) 2581{ 2582 int error = 0; 2583 return (error); 2584} 2585 2586static void 2587bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max) 2588{ 2589 int mtu_min = 0; 2590 int mtu_max = 0; 2591 bondport_ref p; 2592 2593 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) { 2594 mtu_min = IF_MINMTU; 2595 } 2596 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { 2597 struct ifdevmtu * devmtu_p = &p->po_devmtu; 2598 2599 if (devmtu_p->ifdm_min > mtu_min) { 2600 mtu_min = devmtu_p->ifdm_min; 2601 } 2602 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) { 2603 mtu_max = devmtu_p->ifdm_max; 2604 } 2605 } 2606 *ret_min = mtu_min; 2607 *ret_max = mtu_max; 2608 return; 2609} 2610 2611static int 2612bond_set_mtu_on_ports(ifbond_ref ifb, int mtu) 2613{ 2614 int error = 0; 2615 bondport_ref p; 2616 2617 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { 2618 error = siocsifmtu(p->po_ifp, mtu); 2619 if (error != 0) { 2620 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n", 2621 ifb->ifb_name, bondport_get_name(p), error); 2622 break; 2623 } 2624 } 2625 return (error); 2626} 2627 2628static int 2629bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu) 2630{ 2631 int error = 0; 2632 ifbond_ref ifb; 2633 int mtu_min; 2634 int mtu_max; 2635 int new_max; 2636 int old_max; 2637 2638 bond_lock(); 2639 ifb = (ifbond_ref)ifnet_softc(ifp); 2640 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2641 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY; 2642 goto done; 2643 } 2644 ifbond_retain(ifb); 2645 ifbond_wait(ifb, "bond_set_mtu"); 2646 2647 /* check again */ 2648 if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) { 2649 error = EBUSY; 2650 goto signal_done; 2651 } 2652 bond_get_mtu_values(ifb, &mtu_min, &mtu_max); 2653 if (mtu > mtu_max) { 2654 error = EINVAL; 2655 goto signal_done; 2656 } 2657 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) { 2658 /* allow SIOCSIFALTMTU to set the mtu to 0 */ 2659 error = EINVAL; 2660 goto signal_done; 2661 } 2662 if (isdevmtu) { 2663 new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp); 2664 } 2665 else { 2666 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu; 2667 } 2668 old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu) 2669 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu; 2670 if (new_max != old_max) { 2671 /* we can safely walk the list of port without the lock held */ 2672 bond_unlock(); 2673 error = bond_set_mtu_on_ports(ifb, new_max); 2674 if (error != 0) { 2675 /* try our best to back out of it */ 2676 (void)bond_set_mtu_on_ports(ifb, old_max); 2677 } 2678 bond_lock(); 2679 } 2680 if (error == 0) { 2681 if (isdevmtu) { 2682 ifb->ifb_altmtu = mtu; 2683 } 2684 else { 2685 ifnet_set_mtu(ifp, mtu); 2686 } 2687 } 2688 2689 signal_done: 2690 ifbond_signal(ifb, "bond_set_mtu"); 2691 ifbond_release(ifb); 2692 2693 done: 2694 bond_unlock(); 2695 return (error); 2696} 2697 2698static int 2699bond_ioctl(struct ifnet *ifp, u_long cmd, void * data) 2700{ 2701 int error = 0; 2702 struct if_bond_req ibr; 2703 struct ifaddr * ifa; 2704 ifbond_ref ifb; 2705 struct ifreq * ifr; 2706 struct ifmediareq *ifmr; 2707 struct ifnet * port_ifp = NULL; 2708 user_addr_t user_addr; 2709 2710 if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) { 2711 return (EOPNOTSUPP); 2712 } 2713 ifr = (struct ifreq *)data; 2714 ifa = (struct ifaddr *)data; 2715 2716 switch (cmd) { 2717 case SIOCSIFADDR: 2718 ifnet_set_flags(ifp, IFF_UP, IFF_UP); 2719 break; 2720 2721 case SIOCGIFMEDIA32: 2722 case SIOCGIFMEDIA64: 2723 bond_lock(); 2724 ifb = (ifbond_ref)ifnet_softc(ifp); 2725 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2726 bond_unlock(); 2727 return (ifb == NULL ? EOPNOTSUPP : EBUSY); 2728 } 2729 ifmr = (struct ifmediareq *)data; 2730 ifmr->ifm_current = IFM_ETHER; 2731 ifmr->ifm_mask = 0; 2732 ifmr->ifm_status = IFM_AVALID; 2733 ifmr->ifm_active = IFM_ETHER; 2734 ifmr->ifm_count = 1; 2735 if (ifb->ifb_mode == IF_BOND_MODE_LACP) { 2736 if (ifb->ifb_active_lag != NULL) { 2737 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media; 2738 ifmr->ifm_status |= IFM_ACTIVE; 2739 } 2740 } 2741 else if (ifb->ifb_distributing_count > 0) { 2742 ifmr->ifm_active 2743 = ifb->ifb_distributing_array[0]->po_media_info.mi_active; 2744 ifmr->ifm_status |= IFM_ACTIVE; 2745 } 2746 bond_unlock(); 2747 user_addr = (cmd == SIOCGIFMEDIA64) ? 2748 ((struct ifmediareq64 *)ifmr)->ifmu_ulist : 2749 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist); 2750 if (user_addr != USER_ADDR_NULL) { 2751 error = copyout(&ifmr->ifm_current, 2752 user_addr, 2753 sizeof(int)); 2754 } 2755 break; 2756 2757 case SIOCSIFMEDIA: 2758 /* XXX send the SIFMEDIA to all children? Or force autoselect? */ 2759 error = EINVAL; 2760 break; 2761 2762 case SIOCGIFDEVMTU: 2763 bond_lock(); 2764 ifb = (ifbond_ref)ifnet_softc(ifp); 2765 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2766 bond_unlock(); 2767 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY; 2768 break; 2769 } 2770 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb); 2771 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min, 2772 &ifr->ifr_devmtu.ifdm_max); 2773 bond_unlock(); 2774 break; 2775 2776 case SIOCGIFALTMTU: 2777 bond_lock(); 2778 ifb = (ifbond_ref)ifnet_softc(ifp); 2779 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2780 bond_unlock(); 2781 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY; 2782 break; 2783 } 2784 ifr->ifr_mtu = ifb->ifb_altmtu; 2785 bond_unlock(); 2786 break; 2787 2788 case SIOCSIFALTMTU: 2789 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1); 2790 break; 2791 2792 case SIOCSIFMTU: 2793 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0); 2794 break; 2795 2796 case SIOCSIFBOND: 2797 user_addr = proc_is64bit(current_proc()) 2798 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data); 2799 error = copyin(user_addr, &ibr, sizeof(ibr)); 2800 if (error) { 2801 break; 2802 } 2803 switch (ibr.ibr_op) { 2804 case IF_BOND_OP_ADD_INTERFACE: 2805 case IF_BOND_OP_REMOVE_INTERFACE: 2806 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name); 2807 if (port_ifp == NULL) { 2808 error = ENXIO; 2809 break; 2810 } 2811 if (ifnet_type(port_ifp) != IFT_ETHER) { 2812 error = EPROTONOSUPPORT; 2813 break; 2814 } 2815 break; 2816 case IF_BOND_OP_SET_VERBOSE: 2817 case IF_BOND_OP_SET_MODE: 2818 break; 2819 default: 2820 error = EOPNOTSUPP; 2821 break; 2822 } 2823 if (error != 0) { 2824 break; 2825 } 2826 switch (ibr.ibr_op) { 2827 case IF_BOND_OP_ADD_INTERFACE: 2828 error = bond_add_interface(ifp, port_ifp); 2829 break; 2830 case IF_BOND_OP_REMOVE_INTERFACE: 2831 bond_lock(); 2832 ifb = (ifbond_ref)ifnet_softc(ifp); 2833 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2834 bond_unlock(); 2835 return (ifb == NULL ? EOPNOTSUPP : EBUSY); 2836 } 2837 error = bond_remove_interface(ifb, port_ifp); 2838 bond_unlock(); 2839 break; 2840 case IF_BOND_OP_SET_VERBOSE: 2841 bond_lock(); 2842 if (g_bond == NULL) { 2843 bond_unlock(); 2844 error = ENXIO; 2845 break; 2846 } 2847 g_bond->verbose = ibr.ibr_ibru.ibru_int_val; 2848 bond_unlock(); 2849 break; 2850 case IF_BOND_OP_SET_MODE: 2851 switch (ibr.ibr_ibru.ibru_int_val) { 2852 case IF_BOND_MODE_LACP: 2853 case IF_BOND_MODE_STATIC: 2854 break; 2855 default: 2856 error = EINVAL; 2857 break; 2858 } 2859 if (error != 0) { 2860 break; 2861 } 2862 error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val); 2863 break; 2864 } 2865 break; /* SIOCSIFBOND */ 2866 2867 case SIOCGIFBOND: 2868 user_addr = proc_is64bit(current_proc()) 2869 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data); 2870 error = copyin(user_addr, &ibr, sizeof(ibr)); 2871 if (error) { 2872 break; 2873 } 2874 switch (ibr.ibr_op) { 2875 case IF_BOND_OP_GET_STATUS: 2876 break; 2877 default: 2878 error = EOPNOTSUPP; 2879 break; 2880 } 2881 if (error != 0) { 2882 break; 2883 } 2884 bond_lock(); 2885 ifb = (ifbond_ref)ifnet_softc(ifp); 2886 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { 2887 bond_unlock(); 2888 return (ifb == NULL ? EOPNOTSUPP : EBUSY); 2889 } 2890 switch (ibr.ibr_op) { 2891 case IF_BOND_OP_GET_STATUS: 2892 error = bond_get_status(ifb, &ibr, user_addr); 2893 break; 2894 } 2895 bond_unlock(); 2896 break; /* SIOCGIFBOND */ 2897 2898 case SIOCSIFLLADDR: 2899 error = EOPNOTSUPP; 2900 break; 2901 2902 case SIOCSIFFLAGS: 2903 /* enable/disable promiscuous mode */ 2904 bond_lock(); 2905 error = bond_set_promisc(ifp); 2906 bond_unlock(); 2907 break; 2908 2909 case SIOCADDMULTI: 2910 case SIOCDELMULTI: 2911 error = bond_setmulti(ifp); 2912 break; 2913 default: 2914 error = EOPNOTSUPP; 2915 } 2916 return error; 2917} 2918 2919static void 2920bond_if_free(struct ifnet * ifp) 2921{ 2922 ifbond_ref ifb; 2923 2924 if (ifp == NULL) { 2925 return; 2926 } 2927 bond_lock(); 2928 ifb = (ifbond_ref)ifnet_softc(ifp); 2929 if (ifb == NULL) { 2930 bond_unlock(); 2931 return; 2932 } 2933 ifbond_release(ifb); 2934 bond_unlock(); 2935 ifnet_release(ifp); 2936 return; 2937} 2938 2939static void 2940bond_handle_event(struct ifnet * port_ifp, int event_code) 2941{ 2942 struct ifnet * bond_ifp = NULL; 2943 ifbond_ref ifb; 2944 int old_distributing_count; 2945 bondport_ref p; 2946 struct media_info media_info = { 0, 0}; 2947 2948 switch (event_code) { 2949 case KEV_DL_IF_DETACHED: 2950 break; 2951 case KEV_DL_LINK_OFF: 2952 case KEV_DL_LINK_ON: 2953 media_info = interface_media_info(port_ifp); 2954 break; 2955 default: 2956 return; 2957 } 2958 bond_lock(); 2959 p = bond_lookup_port(port_ifp); 2960 if (p == NULL) { 2961 bond_unlock(); 2962 return; 2963 } 2964 ifb = p->po_bond; 2965 old_distributing_count = ifb->ifb_distributing_count; 2966 switch (event_code) { 2967 case KEV_DL_IF_DETACHED: 2968 bond_remove_interface(ifb, p->po_ifp); 2969 break; 2970 case KEV_DL_LINK_OFF: 2971 case KEV_DL_LINK_ON: 2972 p->po_media_info = media_info; 2973 if (p->po_enabled) { 2974 bondport_link_status_changed(p); 2975 } 2976 break; 2977 } 2978 /* generate a link-event */ 2979 if (ifb->ifb_mode == IF_BOND_MODE_LACP) { 2980 if (ifbond_selection(ifb)) { 2981 event_code = (ifb->ifb_active_lag == NULL) 2982 ? KEV_DL_LINK_OFF 2983 : KEV_DL_LINK_ON; 2984 /* XXX need to take a reference on bond_ifp */ 2985 bond_ifp = ifb->ifb_ifp; 2986 ifb->ifb_last_link_event = event_code; 2987 } 2988 else { 2989 event_code = (ifb->ifb_active_lag == NULL) 2990 ? KEV_DL_LINK_OFF 2991 : KEV_DL_LINK_ON; 2992 if (event_code != ifb->ifb_last_link_event) { 2993 if (g_bond->verbose) { 2994 timestamp_printf("%s: (event) generating LINK event\n", 2995 ifb->ifb_name); 2996 } 2997 bond_ifp = ifb->ifb_ifp; 2998 ifb->ifb_last_link_event = event_code; 2999 } 3000 } 3001 } 3002 else { 3003 /* 3004 * if the distributing array membership changed from 0 <-> !0 3005 * generate a link event 3006 */ 3007 if (old_distributing_count == 0 3008 && ifb->ifb_distributing_count != 0) { 3009 event_code = KEV_DL_LINK_ON; 3010 } 3011 else if (old_distributing_count != 0 3012 && ifb->ifb_distributing_count == 0) { 3013 event_code = KEV_DL_LINK_OFF; 3014 } 3015 if (event_code != 0 && event_code != ifb->ifb_last_link_event) { 3016 bond_ifp = ifb->ifb_ifp; 3017 ifb->ifb_last_link_event = event_code; 3018 } 3019 } 3020 3021 bond_unlock(); 3022 if (bond_ifp != NULL) { 3023 interface_link_event(bond_ifp, event_code); 3024 } 3025 return; 3026} 3027 3028static void 3029bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol, 3030 const struct kev_msg * event) 3031{ 3032 int event_code; 3033 3034 if (event->vendor_code != KEV_VENDOR_APPLE 3035 || event->kev_class != KEV_NETWORK_CLASS 3036 || event->kev_subclass != KEV_DL_SUBCLASS) { 3037 return; 3038 } 3039 event_code = event->event_code; 3040 switch (event_code) { 3041 case KEV_DL_LINK_OFF: 3042 case KEV_DL_LINK_ON: 3043 /* we only care about link status changes */ 3044 bond_handle_event(port_ifp, event_code); 3045 break; 3046 default: 3047 break; 3048 } 3049 return; 3050} 3051 3052static errno_t 3053bond_detached(ifnet_t port_ifp, __unused protocol_family_t protocol) 3054{ 3055 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED); 3056 return (0); 3057} 3058 3059static void 3060interface_link_event(struct ifnet * ifp, u_int32_t event_code) 3061{ 3062 struct { 3063 struct kern_event_msg header; 3064 u_int32_t unit; 3065 char if_name[IFNAMSIZ]; 3066 } event; 3067 3068 bzero(&event, sizeof(event)); 3069 event.header.total_size = sizeof(event); 3070 event.header.vendor_code = KEV_VENDOR_APPLE; 3071 event.header.kev_class = KEV_NETWORK_CLASS; 3072 event.header.kev_subclass = KEV_DL_SUBCLASS; 3073 event.header.event_code = event_code; 3074 event.header.event_data[0] = ifnet_family(ifp); 3075 event.unit = (u_int32_t) ifnet_unit(ifp); 3076 strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); 3077 ifnet_event(ifp, &event.header); 3078 return; 3079} 3080 3081/* 3082 * Function: bond_attach_protocol 3083 * Purpose: 3084 * Attach a DLIL protocol to the interface. 3085 * 3086 * The ethernet demux special cases to always return PF_BOND if the 3087 * interface is bonded. That means we receive all traffic from that 3088 * interface without passing any of the traffic to any other attached 3089 * protocol. 3090 */ 3091static int 3092bond_attach_protocol(struct ifnet *ifp) 3093{ 3094 int error; 3095 struct ifnet_attach_proto_param reg; 3096 3097 bzero(®, sizeof(reg)); 3098 reg.input = bond_input; 3099 reg.event = bond_event; 3100 reg.detached = bond_detached; 3101 3102 error = ifnet_attach_protocol(ifp, PF_BOND, ®); 3103 if (error) { 3104 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n", 3105 ifnet_name(ifp), ifnet_unit(ifp), error); 3106 } 3107 return (error); 3108} 3109 3110/* 3111 * Function: bond_detach_protocol 3112 * Purpose: 3113 * Detach our DLIL protocol from an interface 3114 */ 3115static int 3116bond_detach_protocol(struct ifnet *ifp) 3117{ 3118 int error; 3119 3120 error = ifnet_detach_protocol(ifp, PF_BOND); 3121 if (error) { 3122 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n", 3123 ifnet_name(ifp), ifnet_unit(ifp), error); 3124 } 3125 return (error); 3126} 3127 3128/* 3129 * DLIL interface family functions 3130 */ 3131extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family); 3132extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family); 3133extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family); 3134extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family); 3135extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family); 3136extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family); 3137 3138__private_extern__ int 3139bond_family_init(void) 3140{ 3141 int error=0; 3142 3143 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND, 3144 ether_attach_inet, 3145 ether_detach_inet); 3146 if (error != 0) { 3147 printf("bond: proto_register_plumber failed for AF_INET error=%d\n", 3148 error); 3149 goto done; 3150 } 3151#if INET6 3152 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND, 3153 ether_attach_inet6, 3154 ether_detach_inet6); 3155 if (error != 0) { 3156 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n", 3157 error); 3158 goto done; 3159 } 3160#endif 3161 error = bond_clone_attach(); 3162 if (error != 0) { 3163 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n", 3164 error); 3165 goto done; 3166 } 3167 3168 done: 3169 return (error); 3170} 3171/** 3172 ** 3173 ** LACP routines: 3174 ** 3175 **/ 3176 3177/** 3178 ** LACP ifbond_list routines 3179 **/ 3180static bondport_ref 3181ifbond_list_find_moved_port(bondport_ref rx_port, 3182 const lacp_actor_partner_tlv_ref atlv) 3183{ 3184 ifbond_ref bond; 3185 bondport_ref p; 3186 partner_state_ref ps; 3187 LAG_info_ref ps_li; 3188 3189 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) { 3190 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) { 3191 3192 if (rx_port == p) { 3193 /* no point in comparing against ourselves */ 3194 continue; 3195 } 3196 if (p->po_receive_state != ReceiveState_PORT_DISABLED) { 3197 /* it's not clear that we should be checking this */ 3198 continue; 3199 } 3200 ps = &p->po_partner_state; 3201 if (lacp_actor_partner_state_defaulted(ps->ps_state)) { 3202 continue; 3203 } 3204 ps_li = &ps->ps_lag_info; 3205 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv) 3206 && bcmp(&ps_li->li_system, atlv->lap_system, 3207 sizeof(ps_li->li_system)) == 0) { 3208 if (g_bond->verbose) { 3209 timestamp_printf("System " EA_FORMAT 3210 " Port 0x%x moved from %s to %s\n", 3211 EA_LIST(&ps_li->li_system), ps->ps_port, 3212 bondport_get_name(p), 3213 bondport_get_name(rx_port)); 3214 } 3215 return (p); 3216 } 3217 } 3218 } 3219 return (NULL); 3220} 3221 3222/** 3223 ** LACP ifbond, LAG routines 3224 **/ 3225 3226static int 3227ifbond_selection(ifbond_ref bond) 3228{ 3229 int all_ports_ready = 0; 3230 int active_media = 0; 3231 LAG_ref lag = NULL; 3232 int lag_changed = 0; 3233 bondport_ref p; 3234 int port_speed = 0; 3235 3236 lag = ifbond_find_best_LAG(bond, &active_media); 3237 if (lag != bond->ifb_active_lag) { 3238 if (bond->ifb_active_lag != NULL) { 3239 ifbond_deactivate_LAG(bond, bond->ifb_active_lag); 3240 bond->ifb_active_lag = NULL; 3241 } 3242 bond->ifb_active_lag = lag; 3243 if (lag != NULL) { 3244 ifbond_activate_LAG(bond, lag, active_media); 3245 } 3246 lag_changed = 1; 3247 } 3248 else if (lag != NULL) { 3249 if (lag->lag_active_media != active_media) { 3250 if (g_bond->verbose) { 3251 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n", 3252 link_speed(lag->lag_active_media), 3253 link_speed(active_media)); 3254 } 3255 ifbond_deactivate_LAG(bond, lag); 3256 ifbond_activate_LAG(bond, lag, active_media); 3257 lag_changed = 1; 3258 } 3259 } 3260 if (lag != NULL) { 3261 port_speed = link_speed(active_media); 3262 all_ports_ready = ifbond_all_ports_ready(bond); 3263 } 3264 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) { 3265 if (lag != NULL && p->po_lag == lag 3266 && media_speed(&p->po_media_info) == port_speed 3267 && (p->po_mux_state == MuxState_DETACHED 3268 || p->po_selected == SelectedState_SELECTED 3269 || p->po_selected == SelectedState_STANDBY) 3270 && bondport_aggregatable(p)) { 3271 if (bond->ifb_max_active > 0) { 3272 if (lag->lag_selected_port_count < bond->ifb_max_active) { 3273 if (p->po_selected == SelectedState_STANDBY 3274 || p->po_selected == SelectedState_UNSELECTED) { 3275 bondport_set_selected(p, SelectedState_SELECTED); 3276 } 3277 } 3278 else if (p->po_selected == SelectedState_UNSELECTED) { 3279 bondport_set_selected(p, SelectedState_STANDBY); 3280 } 3281 } 3282 else { 3283 bondport_set_selected(p, SelectedState_SELECTED); 3284 } 3285 } 3286 if (bondport_flags_selected_changed(p)) { 3287 bondport_flags_clear_selected_changed(p); 3288 bondport_mux_machine(p, LAEventSelectedChange, NULL); 3289 } 3290 if (all_ports_ready 3291 && bondport_flags_ready(p) 3292 && p->po_mux_state == MuxState_WAITING) { 3293 bondport_mux_machine(p, LAEventReady, NULL); 3294 } 3295 bondport_transmit_machine(p, LAEventStart, NULL); 3296 } 3297 return (lag_changed); 3298} 3299 3300static LAG_ref 3301ifbond_find_best_LAG(ifbond_ref bond, int * active_media) 3302{ 3303 int best_active = 0; 3304 LAG_ref best_lag = NULL; 3305 int best_count = 0; 3306 int best_speed = 0; 3307 LAG_ref lag; 3308 3309 if (bond->ifb_active_lag != NULL) { 3310 best_lag = bond->ifb_active_lag; 3311 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active); 3312 if (bond->ifb_max_active > 0 3313 && best_count > bond->ifb_max_active) { 3314 best_count = bond->ifb_max_active; 3315 } 3316 best_speed = link_speed(best_active); 3317 } 3318 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) { 3319 int active; 3320 int count; 3321 int speed; 3322 3323 if (lag == bond->ifb_active_lag) { 3324 /* we've already computed it */ 3325 continue; 3326 } 3327 count = LAG_get_aggregatable_port_count(lag, &active); 3328 if (count == 0) { 3329 continue; 3330 } 3331 if (bond->ifb_max_active > 0 3332 && count > bond->ifb_max_active) { 3333 /* if there's a limit, don't count extra links */ 3334 count = bond->ifb_max_active; 3335 } 3336 speed = link_speed(active); 3337 if ((count * speed) > (best_count * best_speed)) { 3338 best_count = count; 3339 best_speed = speed; 3340 best_active = active; 3341 best_lag = lag; 3342 } 3343 } 3344 if (best_count == 0) { 3345 return (NULL); 3346 } 3347 *active_media = best_active; 3348 return (best_lag); 3349} 3350 3351static void 3352ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag) 3353{ 3354 bondport_ref p; 3355 3356 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { 3357 bondport_set_selected(p, SelectedState_UNSELECTED); 3358 } 3359 return; 3360} 3361 3362static void 3363ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media) 3364{ 3365 int need = 0; 3366 bondport_ref p; 3367 3368 if (bond->ifb_max_active > 0) { 3369 need = bond->ifb_max_active; 3370 } 3371 lag->lag_active_media = active_media; 3372 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { 3373 if (bondport_aggregatable(p) == 0) { 3374 bondport_set_selected(p, SelectedState_UNSELECTED); 3375 } 3376 else if (media_speed(&p->po_media_info) != link_speed(active_media)) { 3377 bondport_set_selected(p, SelectedState_UNSELECTED); 3378 } 3379 else if (p->po_mux_state == MuxState_DETACHED) { 3380 if (bond->ifb_max_active > 0) { 3381 if (need > 0) { 3382 bondport_set_selected(p, SelectedState_SELECTED); 3383 need--; 3384 } 3385 else { 3386 bondport_set_selected(p, SelectedState_STANDBY); 3387 } 3388 } 3389 else { 3390 bondport_set_selected(p, SelectedState_SELECTED); 3391 } 3392 } 3393 else { 3394 bondport_set_selected(p, SelectedState_UNSELECTED); 3395 } 3396 } 3397 return; 3398} 3399 3400#if 0 3401static void 3402ifbond_set_max_active(ifbond_ref bond, int max_active) 3403{ 3404 LAG_ref lag = bond->ifb_active_lag; 3405 3406 bond->ifb_max_active = max_active; 3407 if (bond->ifb_max_active <= 0 || lag == NULL) { 3408 return; 3409 } 3410 if (lag->lag_selected_port_count > bond->ifb_max_active) { 3411 bondport_ref p; 3412 int remove_count; 3413 3414 remove_count = lag->lag_selected_port_count - bond->ifb_max_active; 3415 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { 3416 if (p->po_selected == SelectedState_SELECTED) { 3417 bondport_set_selected(p, SelectedState_UNSELECTED); 3418 remove_count--; 3419 if (remove_count == 0) { 3420 break; 3421 } 3422 } 3423 } 3424 } 3425 return; 3426} 3427#endif 3428 3429static int 3430ifbond_all_ports_ready(ifbond_ref bond) 3431{ 3432 int ready = 0; 3433 bondport_ref p; 3434 3435 if (bond->ifb_active_lag == NULL) { 3436 return (0); 3437 } 3438 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) { 3439 if (p->po_mux_state == MuxState_WAITING 3440 && p->po_selected == SelectedState_SELECTED) { 3441 if (bondport_flags_ready(p) == 0) { 3442 return (0); 3443 } 3444 } 3445 /* note that there was at least one ready port */ 3446 ready = 1; 3447 } 3448 return (ready); 3449} 3450 3451static int 3452ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port) 3453{ 3454 bondport_ref p; 3455 3456 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) { 3457 if (this_port == p) { 3458 continue; 3459 } 3460 if (bondport_flags_mux_attached(p) == 0) { 3461 return (0); 3462 } 3463 } 3464 return (1); 3465} 3466 3467static LAG_ref 3468ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p) 3469{ 3470 LAG_ref lag; 3471 3472 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) { 3473 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info, 3474 sizeof(lag->lag_info)) == 0) { 3475 return (lag); 3476 } 3477 } 3478 return (NULL); 3479} 3480 3481static int 3482LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media) 3483{ 3484 int active; 3485 int count; 3486 bondport_ref p; 3487 int speed; 3488 3489 active = 0; 3490 count = 0; 3491 speed = 0; 3492 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { 3493 if (bondport_aggregatable(p)) { 3494 int this_speed; 3495 3496 this_speed = media_speed(&p->po_media_info); 3497 if (this_speed == 0) { 3498 continue; 3499 } 3500 if (this_speed > speed) { 3501 active = p->po_media_info.mi_active; 3502 speed = this_speed; 3503 count = 1; 3504 } 3505 else if (this_speed == speed) { 3506 count++; 3507 } 3508 } 3509 } 3510 *active_media = active; 3511 return (count); 3512} 3513 3514 3515/** 3516 ** LACP bondport routines 3517 **/ 3518static void 3519bondport_link_status_changed(bondport_ref p) 3520{ 3521 ifbond_ref bond = p->po_bond; 3522 3523 if (g_bond->verbose) { 3524 if (media_active(&p->po_media_info)) { 3525 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n", 3526 bondport_get_name(p), 3527 media_speed(&p->po_media_info), 3528 media_full_duplex(&p->po_media_info) 3529 ? "full" : "half"); 3530 } 3531 else { 3532 timestamp_printf("[%s] Link DOWN\n", bondport_get_name(p)); 3533 } 3534 } 3535 if (bond->ifb_mode == IF_BOND_MODE_LACP) { 3536 if (media_active(&p->po_media_info) 3537 && bond->ifb_active_lag != NULL 3538 && p->po_lag == bond->ifb_active_lag 3539 && p->po_selected != SelectedState_UNSELECTED) { 3540 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) { 3541 if (g_bond->verbose) { 3542 timestamp_printf("[%s] Port speed %d differs from LAG %d\n", 3543 bondport_get_name(p), 3544 media_speed(&p->po_media_info), 3545 link_speed(p->po_lag->lag_active_media)); 3546 } 3547 bondport_set_selected(p, SelectedState_UNSELECTED); 3548 } 3549 } 3550 bondport_receive_machine(p, LAEventMediaChange, NULL); 3551 bondport_mux_machine(p, LAEventMediaChange, NULL); 3552 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL); 3553 } 3554 else { 3555 if (media_active(&p->po_media_info)) { 3556 bondport_enable_distributing(p); 3557 } 3558 else { 3559 bondport_disable_distributing(p); 3560 } 3561 } 3562 return; 3563} 3564 3565static int 3566bondport_aggregatable(bondport_ref p) 3567{ 3568 partner_state_ref ps = &p->po_partner_state; 3569 3570 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0 3571 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) { 3572 /* we and/or our partner are individual */ 3573 return (0); 3574 } 3575 if (p->po_lag == NULL) { 3576 return (0); 3577 } 3578 switch (p->po_receive_state) { 3579 default: 3580 if (g_bond->verbose) { 3581 timestamp_printf("[%s] Port is not selectable\n", 3582 bondport_get_name(p)); 3583 } 3584 return (0); 3585 case ReceiveState_CURRENT: 3586 case ReceiveState_EXPIRED: 3587 break; 3588 } 3589 return (1); 3590} 3591 3592static int 3593bondport_matches_LAG(bondport_ref p, LAG_ref lag) 3594{ 3595 LAG_info_ref lag_li; 3596 partner_state_ref ps; 3597 LAG_info_ref ps_li; 3598 3599 ps = &p->po_partner_state; 3600 ps_li = &ps->ps_lag_info; 3601 lag_li = &lag->lag_info; 3602 if (ps_li->li_system_priority == lag_li->li_system_priority 3603 && ps_li->li_key == lag_li->li_key 3604 && (bcmp(&ps_li->li_system, &lag_li->li_system, 3605 sizeof(lag_li->li_system)) 3606 == 0)) { 3607 return (1); 3608 } 3609 return (0); 3610} 3611 3612static int 3613bondport_remove_from_LAG(bondport_ref p) 3614{ 3615 int active_lag = 0; 3616 ifbond_ref bond = p->po_bond; 3617 LAG_ref lag = p->po_lag; 3618 3619 if (lag == NULL) { 3620 return (0); 3621 } 3622 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list); 3623 if (g_bond->verbose) { 3624 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT 3625 ",0x%04x)\n", 3626 bondport_get_name(p), 3627 lag->lag_info.li_system_priority, 3628 EA_LIST(&lag->lag_info.li_system), 3629 lag->lag_info.li_key); 3630 } 3631 p->po_lag = NULL; 3632 lag->lag_port_count--; 3633 if (lag->lag_port_count > 0) { 3634 return (bond->ifb_active_lag == lag); 3635 } 3636 if (g_bond->verbose) { 3637 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT 3638 ",0x%04x)\n", 3639 bond->ifb_key, 3640 lag->lag_info.li_system_priority, 3641 EA_LIST(&lag->lag_info.li_system), 3642 lag->lag_info.li_key); 3643 } 3644 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list); 3645 if (bond->ifb_active_lag == lag) { 3646 bond->ifb_active_lag = NULL; 3647 active_lag = 1; 3648 } 3649 FREE(lag, M_BOND); 3650 return (active_lag); 3651} 3652 3653static void 3654bondport_add_to_LAG(bondport_ref p, LAG_ref lag) 3655{ 3656 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list); 3657 p->po_lag = lag; 3658 lag->lag_port_count++; 3659 if (g_bond->verbose) { 3660 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n", 3661 bondport_get_name(p), 3662 lag->lag_info.li_system_priority, 3663 EA_LIST(&lag->lag_info.li_system), 3664 lag->lag_info.li_key); 3665 } 3666 return; 3667} 3668 3669static void 3670bondport_assign_to_LAG(bondport_ref p) 3671{ 3672 ifbond_ref bond = p->po_bond; 3673 LAG_ref lag; 3674 3675 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) { 3676 bondport_remove_from_LAG(p); 3677 return; 3678 } 3679 lag = p->po_lag; 3680 if (lag != NULL) { 3681 if (bondport_matches_LAG(p, lag)) { 3682 /* still OK */ 3683 return; 3684 } 3685 bondport_remove_from_LAG(p); 3686 } 3687 lag = ifbond_get_LAG_matching_port(bond, p); 3688 if (lag != NULL) { 3689 bondport_add_to_LAG(p, lag); 3690 return; 3691 } 3692 lag = (LAG_ref)_MALLOC(sizeof(*lag), M_BOND, M_WAITOK); 3693 TAILQ_INIT(&lag->lag_port_list); 3694 lag->lag_port_count = 0; 3695 lag->lag_selected_port_count = 0; 3696 lag->lag_info = p->po_partner_state.ps_lag_info; 3697 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list); 3698 if (g_bond->verbose) { 3699 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT 3700 ",0x%04x)\n", 3701 bond->ifb_key, 3702 lag->lag_info.li_system_priority, 3703 EA_LIST(&lag->lag_info.li_system), 3704 lag->lag_info.li_key); 3705 } 3706 bondport_add_to_LAG(p, lag); 3707 return; 3708} 3709 3710static void 3711bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p) 3712{ 3713 bondport_ref moved_port; 3714 3715 moved_port 3716 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref) 3717 &in_lacpdu_p->la_actor_tlv); 3718 if (moved_port != NULL) { 3719 bondport_receive_machine(moved_port, LAEventPortMoved, NULL); 3720 } 3721 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p); 3722 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p); 3723 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p); 3724 return; 3725} 3726 3727static void 3728bondport_set_selected(bondport_ref p, SelectedState s) 3729{ 3730 if (s != p->po_selected) { 3731 ifbond_ref bond = p->po_bond; 3732 LAG_ref lag = p->po_lag; 3733 3734 bondport_flags_set_selected_changed(p); 3735 if (lag != NULL && bond->ifb_active_lag == lag) { 3736 if (p->po_selected == SelectedState_SELECTED) { 3737 lag->lag_selected_port_count--; 3738 } 3739 else if (s == SelectedState_SELECTED) { 3740 lag->lag_selected_port_count++; 3741 } 3742 if (g_bond->verbose) { 3743 timestamp_printf("[%s] SetSelected: %s (was %s)\n", 3744 bondport_get_name(p), 3745 SelectedStateString(s), 3746 SelectedStateString(p->po_selected)); 3747 } 3748 } 3749 } 3750 p->po_selected = s; 3751 return; 3752} 3753 3754/** 3755 ** Receive machine 3756 **/ 3757 3758static void 3759bondport_UpdateDefaultSelected(bondport_ref p) 3760{ 3761 bondport_set_selected(p, SelectedState_UNSELECTED); 3762 return; 3763} 3764 3765static void 3766bondport_RecordDefault(bondport_ref p) 3767{ 3768 bzero(&p->po_partner_state, sizeof(p->po_partner_state)); 3769 p->po_actor_state 3770 = lacp_actor_partner_state_set_defaulted(p->po_actor_state); 3771 bondport_assign_to_LAG(p); 3772 return; 3773} 3774 3775static void 3776bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p) 3777{ 3778 lacp_actor_partner_tlv_ref actor; 3779 partner_state_ref ps; 3780 LAG_info_ref ps_li; 3781 3782 /* compare the PDU's Actor information to our Partner state */ 3783 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv; 3784 ps = &p->po_partner_state; 3785 ps_li = &ps->ps_lag_info; 3786 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port 3787 || (lacp_actor_partner_tlv_get_port_priority(actor) 3788 != ps->ps_port_priority) 3789 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system)) 3790 || (lacp_actor_partner_tlv_get_system_priority(actor) 3791 != ps_li->li_system_priority) 3792 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key) 3793 || (lacp_actor_partner_state_aggregatable(actor->lap_state) 3794 != lacp_actor_partner_state_aggregatable(ps->ps_state))) { 3795 bondport_set_selected(p, SelectedState_UNSELECTED); 3796 if (g_bond->verbose) { 3797 timestamp_printf("[%s] updateSelected UNSELECTED\n", 3798 bondport_get_name(p)); 3799 } 3800 } 3801 return; 3802} 3803 3804static void 3805bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p) 3806{ 3807 lacp_actor_partner_tlv_ref actor; 3808 ifbond_ref bond = p->po_bond; 3809 int lacp_maintain = 0; 3810 partner_state_ref ps; 3811 lacp_actor_partner_tlv_ref partner; 3812 LAG_info_ref ps_li; 3813 3814 /* copy the PDU's Actor information into our Partner state */ 3815 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv; 3816 ps = &p->po_partner_state; 3817 ps_li = &ps->ps_lag_info; 3818 ps->ps_port = lacp_actor_partner_tlv_get_port(actor); 3819 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor); 3820 ps_li->li_system = *((lacp_system_ref)actor->lap_system); 3821 ps_li->li_system_priority 3822 = lacp_actor_partner_tlv_get_system_priority(actor); 3823 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor); 3824 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state); 3825 p->po_actor_state 3826 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state); 3827 3828 /* compare the PDU's Partner information to our own information */ 3829 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv; 3830 3831 if (lacp_actor_partner_state_active_lacp(ps->ps_state) 3832 || (lacp_actor_partner_state_active_lacp(p->po_actor_state) 3833 && lacp_actor_partner_state_active_lacp(partner->lap_state))) { 3834 if (g_bond->verbose) { 3835 timestamp_printf("[%s] recordPDU: LACP will maintain\n", 3836 bondport_get_name(p)); 3837 } 3838 lacp_maintain = 1; 3839 } 3840 if ((lacp_actor_partner_tlv_get_port(partner) 3841 == bondport_get_index(p)) 3842 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority 3843 && bcmp(partner->lap_system, &g_bond->system, 3844 sizeof(g_bond->system)) == 0 3845 && (lacp_actor_partner_tlv_get_system_priority(partner) 3846 == g_bond->system_priority) 3847 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key 3848 && (lacp_actor_partner_state_aggregatable(partner->lap_state) 3849 == lacp_actor_partner_state_aggregatable(p->po_actor_state)) 3850 && lacp_actor_partner_state_in_sync(actor->lap_state) 3851 && lacp_maintain) { 3852 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state); 3853 if (g_bond->verbose) { 3854 timestamp_printf("[%s] recordPDU: LACP partner in sync\n", 3855 bondport_get_name(p)); 3856 } 3857 } 3858 else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0 3859 && lacp_actor_partner_state_in_sync(actor->lap_state) 3860 && lacp_maintain) { 3861 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state); 3862 if (g_bond->verbose) { 3863 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n", 3864 bondport_get_name(p)); 3865 } 3866 } 3867 bondport_assign_to_LAG(p); 3868 return; 3869} 3870 3871static __inline__ lacp_actor_partner_state 3872updateNTTBits(lacp_actor_partner_state s) 3873{ 3874 return (s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY 3875 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT 3876 | LACP_ACTOR_PARTNER_STATE_AGGREGATION 3877 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION)); 3878} 3879 3880static void 3881bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p) 3882{ 3883 ifbond_ref bond = p->po_bond; 3884 lacp_actor_partner_tlv_ref partner; 3885 3886 /* compare the PDU's Actor information to our Partner state */ 3887 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv; 3888 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p)) 3889 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority 3890 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system)) 3891 || (lacp_actor_partner_tlv_get_system_priority(partner) 3892 != g_bond->system_priority) 3893 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key 3894 || (updateNTTBits(partner->lap_state) 3895 != updateNTTBits(p->po_actor_state))) { 3896 bondport_flags_set_ntt(p); 3897 if (g_bond->verbose) { 3898 timestamp_printf("[%s] updateNTT: Need To Transmit\n", 3899 bondport_get_name(p)); 3900 } 3901 } 3902 return; 3903} 3904 3905static void 3906bondport_AttachMuxToAggregator(bondport_ref p) 3907{ 3908 if (bondport_flags_mux_attached(p) == 0) { 3909 if (g_bond->verbose) { 3910 timestamp_printf("[%s] Attached Mux To Aggregator\n", 3911 bondport_get_name(p)); 3912 } 3913 bondport_flags_set_mux_attached(p); 3914 } 3915 return; 3916} 3917 3918static void 3919bondport_DetachMuxFromAggregator(bondport_ref p) 3920{ 3921 if (bondport_flags_mux_attached(p)) { 3922 if (g_bond->verbose) { 3923 timestamp_printf("[%s] Detached Mux From Aggregator\n", 3924 bondport_get_name(p)); 3925 } 3926 bondport_flags_clear_mux_attached(p); 3927 } 3928 return; 3929} 3930 3931static void 3932bondport_enable_distributing(bondport_ref p) 3933{ 3934 if (bondport_flags_distributing(p) == 0) { 3935 ifbond_ref bond = p->po_bond; 3936 3937 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p; 3938 if (g_bond->verbose) { 3939 timestamp_printf("[%s] Distribution Enabled\n", 3940 bondport_get_name(p)); 3941 } 3942 bondport_flags_set_distributing(p); 3943 } 3944 return; 3945} 3946 3947static void 3948bondport_disable_distributing(bondport_ref p) 3949{ 3950 if (bondport_flags_distributing(p)) { 3951 bondport_ref * array; 3952 ifbond_ref bond; 3953 int count; 3954 int i; 3955 3956 bond = p->po_bond; 3957 array = bond->ifb_distributing_array; 3958 count = bond->ifb_distributing_count; 3959 for (i = 0; i < count; i++) { 3960 if (array[i] == p) { 3961 int j; 3962 3963 for (j = i; j < (count - 1); j++) { 3964 array[j] = array[j + 1]; 3965 } 3966 break; 3967 } 3968 } 3969 bond->ifb_distributing_count--; 3970 if (g_bond->verbose) { 3971 timestamp_printf("[%s] Distribution Disabled\n", 3972 bondport_get_name(p)); 3973 } 3974 bondport_flags_clear_distributing(p); 3975 } 3976 return; 3977} 3978 3979/** 3980 ** Receive machine functions 3981 **/ 3982static void 3983bondport_receive_machine_initialize(bondport_ref p, LAEvent event, 3984 void * event_data); 3985static void 3986bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event, 3987 void * event_data); 3988static void 3989bondport_receive_machine_expired(bondport_ref p, LAEvent event, 3990 void * event_data); 3991static void 3992bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event, 3993 void * event_data); 3994static void 3995bondport_receive_machine_defaulted(bondport_ref p, LAEvent event, 3996 void * event_data); 3997static void 3998bondport_receive_machine_current(bondport_ref p, LAEvent event, 3999 void * event_data); 4000 4001static void 4002bondport_receive_machine_event(bondport_ref p, LAEvent event, 4003 void * event_data) 4004{ 4005 switch (p->po_receive_state) { 4006 case ReceiveState_none: 4007 bondport_receive_machine_initialize(p, LAEventStart, NULL); 4008 break; 4009 case ReceiveState_INITIALIZE: 4010 bondport_receive_machine_initialize(p, event, event_data); 4011 break; 4012 case ReceiveState_PORT_DISABLED: 4013 bondport_receive_machine_port_disabled(p, event, event_data); 4014 break; 4015 case ReceiveState_EXPIRED: 4016 bondport_receive_machine_expired(p, event, event_data); 4017 break; 4018 case ReceiveState_LACP_DISABLED: 4019 bondport_receive_machine_lacp_disabled(p, event, event_data); 4020 break; 4021 case ReceiveState_DEFAULTED: 4022 bondport_receive_machine_defaulted(p, event, event_data); 4023 break; 4024 case ReceiveState_CURRENT: 4025 bondport_receive_machine_current(p, event, event_data); 4026 break; 4027 default: 4028 break; 4029 } 4030 return; 4031} 4032 4033static void 4034bondport_receive_machine(bondport_ref p, LAEvent event, 4035 void * event_data) 4036{ 4037 switch (event) { 4038 case LAEventPacket: 4039 if (p->po_receive_state != ReceiveState_LACP_DISABLED) { 4040 bondport_receive_machine_current(p, event, event_data); 4041 } 4042 break; 4043 case LAEventMediaChange: 4044 if (media_active(&p->po_media_info)) { 4045 switch (p->po_receive_state) { 4046 case ReceiveState_PORT_DISABLED: 4047 case ReceiveState_LACP_DISABLED: 4048 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL); 4049 break; 4050 default: 4051 break; 4052 } 4053 } 4054 else { 4055 bondport_receive_machine_port_disabled(p, LAEventStart, NULL); 4056 } 4057 break; 4058 default: 4059 bondport_receive_machine_event(p, event, event_data); 4060 break; 4061 } 4062 return; 4063} 4064 4065static void 4066bondport_receive_machine_initialize(bondport_ref p, LAEvent event, 4067 __unused void * event_data) 4068{ 4069 switch (event) { 4070 case LAEventStart: 4071 devtimer_cancel(p->po_current_while_timer); 4072 if (g_bond->verbose) { 4073 timestamp_printf("[%s] Receive INITIALIZE\n", 4074 bondport_get_name(p)); 4075 } 4076 p->po_receive_state = ReceiveState_INITIALIZE; 4077 bondport_set_selected(p, SelectedState_UNSELECTED); 4078 bondport_RecordDefault(p); 4079 p->po_actor_state 4080 = lacp_actor_partner_state_set_not_expired(p->po_actor_state); 4081 bondport_receive_machine_port_disabled(p, LAEventStart, NULL); 4082 break; 4083 default: 4084 break; 4085 } 4086 return; 4087} 4088 4089static void 4090bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event, 4091 __unused void * event_data) 4092{ 4093 partner_state_ref ps; 4094 4095 switch (event) { 4096 case LAEventStart: 4097 devtimer_cancel(p->po_current_while_timer); 4098 if (g_bond->verbose) { 4099 timestamp_printf("[%s] Receive PORT_DISABLED\n", 4100 bondport_get_name(p)); 4101 } 4102 p->po_receive_state = ReceiveState_PORT_DISABLED; 4103 ps = &p->po_partner_state; 4104 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state); 4105 /* FALL THROUGH */ 4106 case LAEventMediaChange: 4107 if (media_active(&p->po_media_info)) { 4108 if (media_full_duplex(&p->po_media_info)) { 4109 bondport_receive_machine_expired(p, LAEventStart, NULL); 4110 } 4111 else { 4112 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL); 4113 } 4114 } 4115 else if (p->po_selected == SelectedState_SELECTED) { 4116 struct timeval tv; 4117 4118 if (g_bond->verbose) { 4119 timestamp_printf("[%s] Receive PORT_DISABLED: " 4120 "link timer started\n", 4121 bondport_get_name(p)); 4122 } 4123 tv.tv_sec = 1; 4124 tv.tv_usec = 0; 4125 devtimer_set_relative(p->po_current_while_timer, tv, 4126 (devtimer_timeout_func) 4127 bondport_receive_machine_port_disabled, 4128 (void *)LAEventTimeout, NULL); 4129 } 4130 else if (p->po_selected == SelectedState_STANDBY) { 4131 bondport_set_selected(p, SelectedState_UNSELECTED); 4132 } 4133 break; 4134 case LAEventTimeout: 4135 if (p->po_selected == SelectedState_SELECTED) { 4136 if (g_bond->verbose) { 4137 timestamp_printf("[%s] Receive PORT_DISABLED: " 4138 "link timer completed, marking UNSELECTED\n", 4139 bondport_get_name(p)); 4140 } 4141 bondport_set_selected(p, SelectedState_UNSELECTED); 4142 } 4143 break; 4144 case LAEventPortMoved: 4145 bondport_receive_machine_initialize(p, LAEventStart, NULL); 4146 break; 4147 default: 4148 break; 4149 } 4150 return; 4151} 4152 4153static void 4154bondport_receive_machine_expired(bondport_ref p, LAEvent event, 4155 __unused void * event_data) 4156{ 4157 lacp_actor_partner_state s; 4158 struct timeval tv; 4159 4160 switch (event) { 4161 case LAEventStart: 4162 devtimer_cancel(p->po_current_while_timer); 4163 if (g_bond->verbose) { 4164 timestamp_printf("[%s] Receive EXPIRED\n", 4165 bondport_get_name(p)); 4166 } 4167 p->po_receive_state = ReceiveState_EXPIRED; 4168 s = p->po_partner_state.ps_state; 4169 s = lacp_actor_partner_state_set_out_of_sync(s); 4170 s = lacp_actor_partner_state_set_short_timeout(s); 4171 p->po_partner_state.ps_state = s; 4172 p->po_actor_state 4173 = lacp_actor_partner_state_set_expired(p->po_actor_state); 4174 /* start current_while timer */ 4175 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME; 4176 tv.tv_usec = 0; 4177 devtimer_set_relative(p->po_current_while_timer, tv, 4178 (devtimer_timeout_func) 4179 bondport_receive_machine_expired, 4180 (void *)LAEventTimeout, NULL); 4181 4182 break; 4183 case LAEventTimeout: 4184 bondport_receive_machine_defaulted(p, LAEventStart, NULL); 4185 break; 4186 default: 4187 break; 4188 } 4189 return; 4190} 4191 4192static void 4193bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event, 4194 __unused void * event_data) 4195{ 4196 partner_state_ref ps; 4197 switch (event) { 4198 case LAEventStart: 4199 devtimer_cancel(p->po_current_while_timer); 4200 if (g_bond->verbose) { 4201 timestamp_printf("[%s] Receive LACP_DISABLED\n", 4202 bondport_get_name(p)); 4203 } 4204 p->po_receive_state = ReceiveState_LACP_DISABLED; 4205 bondport_set_selected(p, SelectedState_UNSELECTED); 4206 bondport_RecordDefault(p); 4207 ps = &p->po_partner_state; 4208 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state); 4209 p->po_actor_state 4210 = lacp_actor_partner_state_set_not_expired(p->po_actor_state); 4211 break; 4212 default: 4213 break; 4214 } 4215 return; 4216} 4217 4218static void 4219bondport_receive_machine_defaulted(bondport_ref p, LAEvent event, 4220 __unused void * event_data) 4221{ 4222 switch (event) { 4223 case LAEventStart: 4224 devtimer_cancel(p->po_current_while_timer); 4225 if (g_bond->verbose) { 4226 timestamp_printf("[%s] Receive DEFAULTED\n", 4227 bondport_get_name(p)); 4228 } 4229 p->po_receive_state = ReceiveState_DEFAULTED; 4230 bondport_UpdateDefaultSelected(p); 4231 bondport_RecordDefault(p); 4232 p->po_actor_state 4233 = lacp_actor_partner_state_set_not_expired(p->po_actor_state); 4234 break; 4235 default: 4236 break; 4237 } 4238 return; 4239} 4240 4241static void 4242bondport_receive_machine_current(bondport_ref p, LAEvent event, 4243 void * event_data) 4244{ 4245 partner_state_ref ps; 4246 struct timeval tv; 4247 4248 switch (event) { 4249 case LAEventPacket: 4250 devtimer_cancel(p->po_current_while_timer); 4251 if (g_bond->verbose) { 4252 timestamp_printf("[%s] Receive CURRENT\n", 4253 bondport_get_name(p)); 4254 } 4255 p->po_receive_state = ReceiveState_CURRENT; 4256 bondport_UpdateSelected(p, event_data); 4257 bondport_UpdateNTT(p, event_data); 4258 bondport_RecordPDU(p, event_data); 4259 p->po_actor_state 4260 = lacp_actor_partner_state_set_not_expired(p->po_actor_state); 4261 bondport_assign_to_LAG(p); 4262 /* start current_while timer */ 4263 ps = &p->po_partner_state; 4264 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) { 4265 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME; 4266 } 4267 else { 4268 tv.tv_sec = LACP_LONG_TIMEOUT_TIME; 4269 } 4270 tv.tv_usec = 0; 4271 devtimer_set_relative(p->po_current_while_timer, tv, 4272 (devtimer_timeout_func) 4273 bondport_receive_machine_current, 4274 (void *)LAEventTimeout, NULL); 4275 break; 4276 case LAEventTimeout: 4277 bondport_receive_machine_expired(p, LAEventStart, NULL); 4278 break; 4279 default: 4280 break; 4281 } 4282 return; 4283} 4284 4285/** 4286 ** Periodic Transmission machine 4287 **/ 4288 4289static void 4290bondport_periodic_transmit_machine(bondport_ref p, LAEvent event, 4291 __unused void * event_data) 4292{ 4293 int interval; 4294 partner_state_ref ps; 4295 struct timeval tv; 4296 4297 switch (event) { 4298 case LAEventStart: 4299 if (g_bond->verbose) { 4300 timestamp_printf("[%s] periodic_transmit Start\n", 4301 bondport_get_name(p)); 4302 } 4303 /* FALL THROUGH */ 4304 case LAEventMediaChange: 4305 devtimer_cancel(p->po_periodic_timer); 4306 p->po_periodic_interval = 0; 4307 if (media_active(&p->po_media_info) == 0 4308 || media_full_duplex(&p->po_media_info) == 0) { 4309 break; 4310 } 4311 case LAEventPacket: 4312 /* Neither Partner nor Actor are LACP Active, no periodic tx */ 4313 ps = &p->po_partner_state; 4314 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0 4315 && (lacp_actor_partner_state_active_lacp(ps->ps_state) 4316 == 0)) { 4317 devtimer_cancel(p->po_periodic_timer); 4318 p->po_periodic_interval = 0; 4319 break; 4320 } 4321 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) { 4322 interval = LACP_FAST_PERIODIC_TIME; 4323 } 4324 else { 4325 interval = LACP_SLOW_PERIODIC_TIME; 4326 } 4327 if (p->po_periodic_interval != interval) { 4328 if (interval == LACP_FAST_PERIODIC_TIME 4329 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) { 4330 if (g_bond->verbose) { 4331 timestamp_printf("[%s] periodic_transmit:" 4332 " Need To Transmit\n", 4333 bondport_get_name(p)); 4334 } 4335 bondport_flags_set_ntt(p); 4336 } 4337 p->po_periodic_interval = interval; 4338 tv.tv_usec = 0; 4339 tv.tv_sec = interval; 4340 devtimer_set_relative(p->po_periodic_timer, tv, 4341 (devtimer_timeout_func) 4342 bondport_periodic_transmit_machine, 4343 (void *)LAEventTimeout, NULL); 4344 if (g_bond->verbose) { 4345 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n", 4346 bondport_get_name(p), 4347 p->po_periodic_interval); 4348 } 4349 } 4350 break; 4351 case LAEventTimeout: 4352 bondport_flags_set_ntt(p); 4353 tv.tv_sec = p->po_periodic_interval; 4354 tv.tv_usec = 0; 4355 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func) 4356 bondport_periodic_transmit_machine, 4357 (void *)LAEventTimeout, NULL); 4358 if (g_bond->verbose > 1) { 4359 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n", 4360 bondport_get_name(p), p->po_periodic_interval); 4361 } 4362 break; 4363 default: 4364 break; 4365 } 4366 return; 4367} 4368 4369/** 4370 ** Transmit machine 4371 **/ 4372static int 4373bondport_can_transmit(bondport_ref p, int32_t current_secs, 4374 __darwin_time_t * next_secs) 4375{ 4376 if (p->po_last_transmit_secs != current_secs) { 4377 p->po_last_transmit_secs = current_secs; 4378 p->po_n_transmit = 0; 4379 } 4380 if (p->po_n_transmit < LACP_PACKET_RATE) { 4381 p->po_n_transmit++; 4382 return (1); 4383 } 4384 if (next_secs != NULL) { 4385 *next_secs = current_secs + 1; 4386 } 4387 return (0); 4388} 4389 4390static void 4391bondport_transmit_machine(bondport_ref p, LAEvent event, 4392 void * event_data) 4393{ 4394 lacp_actor_partner_tlv_ref aptlv; 4395 lacp_collector_tlv_ref ctlv; 4396 struct timeval next_tick_time = {0, 0}; 4397 lacpdu_ref out_lacpdu_p; 4398 packet_buffer_ref pkt; 4399 partner_state_ref ps; 4400 LAG_info_ref ps_li; 4401 4402 switch (event) { 4403 case LAEventTimeout: 4404 case LAEventStart: 4405 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) { 4406 break; 4407 } 4408 if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) { 4409 /* we're going away, transmit the packet no matter what */ 4410 } 4411 else if (bondport_can_transmit(p, devtimer_current_secs(), 4412 &next_tick_time.tv_sec) == 0) { 4413 if (devtimer_enabled(p->po_transmit_timer)) { 4414 if (g_bond->verbose > 0) { 4415 timestamp_printf("[%s] Transmit Timer Already Set\n", 4416 bondport_get_name(p)); 4417 } 4418 } 4419 else { 4420 devtimer_set_absolute(p->po_transmit_timer, next_tick_time, 4421 (devtimer_timeout_func) 4422 bondport_transmit_machine, 4423 (void *)LAEventTimeout, NULL); 4424 if (g_bond->verbose > 0) { 4425 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n", 4426 bondport_get_name(p), 4427 (int)next_tick_time.tv_sec); 4428 } 4429 } 4430 break; 4431 } 4432 if (g_bond->verbose > 0) { 4433 if (event == LAEventTimeout) { 4434 timestamp_printf("[%s] Transmit Timer Complete\n", 4435 bondport_get_name(p)); 4436 } 4437 } 4438 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p)); 4439 if (pkt == NULL) { 4440 printf("[%s] Transmit: failed to allocate packet buffer\n", 4441 bondport_get_name(p)); 4442 break; 4443 } 4444 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt); 4445 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p)); 4446 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP; 4447 out_lacpdu_p->la_version = LACPDU_VERSION_1; 4448 4449 /* Actor */ 4450 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv; 4451 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR; 4452 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH; 4453 *((lacp_system_ref)aptlv->lap_system) = g_bond->system; 4454 lacp_actor_partner_tlv_set_system_priority(aptlv, 4455 g_bond->system_priority); 4456 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority); 4457 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p)); 4458 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key); 4459 aptlv->lap_state = p->po_actor_state; 4460 4461 /* Partner */ 4462 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv; 4463 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER; 4464 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH; 4465 ps = &p->po_partner_state; 4466 ps_li = &ps->ps_lag_info; 4467 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port); 4468 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority); 4469 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system; 4470 lacp_actor_partner_tlv_set_system_priority(aptlv, 4471 ps_li->li_system_priority); 4472 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key); 4473 aptlv->lap_state = ps->ps_state; 4474 4475 /* Collector */ 4476 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv; 4477 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR; 4478 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH; 4479 4480 bondport_slow_proto_transmit(p, pkt); 4481 bondport_flags_clear_ntt(p); 4482 if (g_bond->verbose > 0) { 4483 timestamp_printf("[%s] Transmit Packet %d\n", 4484 bondport_get_name(p), p->po_n_transmit); 4485 } 4486 break; 4487 default: 4488 break; 4489 } 4490 return; 4491} 4492 4493/** 4494 ** Mux machine functions 4495 **/ 4496 4497static void 4498bondport_mux_machine_detached(bondport_ref p, LAEvent event, 4499 void * event_data); 4500static void 4501bondport_mux_machine_waiting(bondport_ref p, LAEvent event, 4502 void * event_data); 4503static void 4504bondport_mux_machine_attached(bondport_ref p, LAEvent event, 4505 void * event_data); 4506 4507static void 4508bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event, 4509 void * event_data); 4510 4511static void 4512bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data) 4513{ 4514 switch (p->po_mux_state) { 4515 case MuxState_none: 4516 bondport_mux_machine_detached(p, LAEventStart, NULL); 4517 break; 4518 case MuxState_DETACHED: 4519 bondport_mux_machine_detached(p, event, event_data); 4520 break; 4521 case MuxState_WAITING: 4522 bondport_mux_machine_waiting(p, event, event_data); 4523 break; 4524 case MuxState_ATTACHED: 4525 bondport_mux_machine_attached(p, event, event_data); 4526 break; 4527 case MuxState_COLLECTING_DISTRIBUTING: 4528 bondport_mux_machine_collecting_distributing(p, event, event_data); 4529 break; 4530 default: 4531 break; 4532 } 4533 return; 4534} 4535 4536static void 4537bondport_mux_machine_detached(bondport_ref p, LAEvent event, 4538 __unused void * event_data) 4539{ 4540 lacp_actor_partner_state s; 4541 4542 switch (event) { 4543 case LAEventStart: 4544 devtimer_cancel(p->po_wait_while_timer); 4545 if (g_bond->verbose) { 4546 timestamp_printf("[%s] Mux DETACHED\n", 4547 bondport_get_name(p)); 4548 } 4549 p->po_mux_state = MuxState_DETACHED; 4550 bondport_flags_clear_ready(p); 4551 bondport_DetachMuxFromAggregator(p); 4552 bondport_disable_distributing(p); 4553 s = p->po_actor_state; 4554 s = lacp_actor_partner_state_set_out_of_sync(s); 4555 s = lacp_actor_partner_state_set_not_collecting(s); 4556 s = lacp_actor_partner_state_set_not_distributing(s); 4557 p->po_actor_state = s; 4558 bondport_flags_set_ntt(p); 4559 break; 4560 case LAEventSelectedChange: 4561 case LAEventPacket: 4562 case LAEventMediaChange: 4563 if (p->po_selected == SelectedState_SELECTED 4564 || p->po_selected == SelectedState_STANDBY) { 4565 bondport_mux_machine_waiting(p, LAEventStart, NULL); 4566 } 4567 break; 4568 default: 4569 break; 4570 } 4571 return; 4572} 4573 4574static void 4575bondport_mux_machine_waiting(bondport_ref p, LAEvent event, 4576 __unused void * event_data) 4577{ 4578 struct timeval tv; 4579 4580 switch (event) { 4581 case LAEventStart: 4582 devtimer_cancel(p->po_wait_while_timer); 4583 if (g_bond->verbose) { 4584 timestamp_printf("[%s] Mux WAITING\n", 4585 bondport_get_name(p)); 4586 } 4587 p->po_mux_state = MuxState_WAITING; 4588 /* FALL THROUGH */ 4589 default: 4590 case LAEventSelectedChange: 4591 if (p->po_selected == SelectedState_UNSELECTED) { 4592 bondport_mux_machine_detached(p, LAEventStart, NULL); 4593 break; 4594 } 4595 if (p->po_selected == SelectedState_STANDBY) { 4596 devtimer_cancel(p->po_wait_while_timer); 4597 /* wait until state changes to SELECTED */ 4598 if (g_bond->verbose) { 4599 timestamp_printf("[%s] Mux WAITING: Standby\n", 4600 bondport_get_name(p)); 4601 } 4602 break; 4603 } 4604 if (bondport_flags_ready(p)) { 4605 if (g_bond->verbose) { 4606 timestamp_printf("[%s] Mux WAITING: Port is already ready\n", 4607 bondport_get_name(p)); 4608 } 4609 break; 4610 } 4611 if (devtimer_enabled(p->po_wait_while_timer)) { 4612 if (g_bond->verbose) { 4613 timestamp_printf("[%s] Mux WAITING: Timer already set\n", 4614 bondport_get_name(p)); 4615 } 4616 break; 4617 } 4618 if (ifbond_all_ports_attached(p->po_bond, p)) { 4619 devtimer_cancel(p->po_wait_while_timer); 4620 if (g_bond->verbose) { 4621 timestamp_printf("[%s] Mux WAITING: No waiting\n", 4622 bondport_get_name(p)); 4623 } 4624 bondport_flags_set_ready(p); 4625 goto no_waiting; 4626 } 4627 if (g_bond->verbose) { 4628 timestamp_printf("[%s] Mux WAITING: 2 seconds\n", 4629 bondport_get_name(p)); 4630 } 4631 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME; 4632 tv.tv_usec = 0; 4633 devtimer_set_relative(p->po_wait_while_timer, tv, 4634 (devtimer_timeout_func) 4635 bondport_mux_machine_waiting, 4636 (void *)LAEventTimeout, NULL); 4637 break; 4638 case LAEventTimeout: 4639 if (g_bond->verbose) { 4640 timestamp_printf("[%s] Mux WAITING: Ready\n", 4641 bondport_get_name(p)); 4642 } 4643 bondport_flags_set_ready(p); 4644 break; 4645 case LAEventReady: 4646 no_waiting: 4647 if (bondport_flags_ready(p)){ 4648 if (g_bond->verbose) { 4649 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n", 4650 bondport_get_name(p)); 4651 } 4652 bondport_mux_machine_attached(p, LAEventStart, NULL); 4653 break; 4654 } 4655 break; 4656 } 4657 return; 4658} 4659 4660static void 4661bondport_mux_machine_attached(bondport_ref p, LAEvent event, 4662 __unused void * event_data) 4663{ 4664 lacp_actor_partner_state s; 4665 4666 switch (event) { 4667 case LAEventStart: 4668 devtimer_cancel(p->po_wait_while_timer); 4669 if (g_bond->verbose) { 4670 timestamp_printf("[%s] Mux ATTACHED\n", 4671 bondport_get_name(p)); 4672 } 4673 p->po_mux_state = MuxState_ATTACHED; 4674 bondport_AttachMuxToAggregator(p); 4675 s = p->po_actor_state; 4676 s = lacp_actor_partner_state_set_in_sync(s); 4677 s = lacp_actor_partner_state_set_not_collecting(s); 4678 s = lacp_actor_partner_state_set_not_distributing(s); 4679 bondport_disable_distributing(p); 4680 p->po_actor_state = s; 4681 bondport_flags_set_ntt(p); 4682 /* FALL THROUGH */ 4683 default: 4684 switch (p->po_selected) { 4685 case SelectedState_SELECTED: 4686 s = p->po_partner_state.ps_state; 4687 if (lacp_actor_partner_state_in_sync(s)) { 4688 bondport_mux_machine_collecting_distributing(p, LAEventStart, 4689 NULL); 4690 } 4691 break; 4692 default: 4693 bondport_mux_machine_detached(p, LAEventStart, NULL); 4694 break; 4695 } 4696 break; 4697 } 4698 return; 4699} 4700 4701static void 4702bondport_mux_machine_collecting_distributing(bondport_ref p, 4703 LAEvent event, 4704 __unused void * event_data) 4705{ 4706 lacp_actor_partner_state s; 4707 4708 switch (event) { 4709 case LAEventStart: 4710 devtimer_cancel(p->po_wait_while_timer); 4711 if (g_bond->verbose) { 4712 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n", 4713 bondport_get_name(p)); 4714 } 4715 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING; 4716 bondport_enable_distributing(p); 4717 s = p->po_actor_state; 4718 s = lacp_actor_partner_state_set_collecting(s); 4719 s = lacp_actor_partner_state_set_distributing(s); 4720 p->po_actor_state = s; 4721 bondport_flags_set_ntt(p); 4722 /* FALL THROUGH */ 4723 default: 4724 s = p->po_partner_state.ps_state; 4725 if (lacp_actor_partner_state_in_sync(s) == 0) { 4726 bondport_mux_machine_attached(p, LAEventStart, NULL); 4727 break; 4728 } 4729 switch (p->po_selected) { 4730 case SelectedState_UNSELECTED: 4731 case SelectedState_STANDBY: 4732 bondport_mux_machine_attached(p, LAEventStart, NULL); 4733 break; 4734 default: 4735 break; 4736 } 4737 break; 4738 } 4739 return; 4740} 4741