ieee8023ad_lacp.c revision 170599
1/* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */ 2 3/*- 4 * Copyright (c)2005 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/net/ieee8023ad_lacp.c 170599 2007-06-12 07:29:11Z thompsa $"); 31 32#include <sys/param.h> 33#include <sys/callout.h> 34#include <sys/mbuf.h> 35#include <sys/systm.h> 36#include <sys/malloc.h> 37#include <sys/kernel.h> /* hz */ 38#include <sys/socket.h> /* for net/if.h */ 39#include <sys/sockio.h> 40#include <machine/stdarg.h> 41#include <sys/lock.h> 42#include <sys/rwlock.h> 43#include <sys/taskqueue.h> 44 45#include <net/if.h> 46#include <net/if_dl.h> 47#include <net/ethernet.h> 48#include <net/if_media.h> 49#include <net/if_types.h> 50 51#include <net/if_lagg.h> 52#include <net/ieee8023ad_lacp.h> 53 54/* 55 * actor system priority and port priority. 56 * XXX should be configurable. 57 */ 58 59#define LACP_SYSTEM_PRIO 0x8000 60#define LACP_PORT_PRIO 0x8000 61 62const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] = 63 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; 64 65static const struct tlv_template lacp_info_tlv_template[] = { 66 { LACP_TYPE_ACTORINFO, 67 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, 68 { LACP_TYPE_PARTNERINFO, 69 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, 70 { LACP_TYPE_COLLECTORINFO, 71 sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) }, 72 { 0, 0 }, 73}; 74 75typedef void (*lacp_timer_func_t)(struct lacp_port *); 76 77static const struct tlv_template marker_info_tlv_template[] = { 78 { MARKER_TYPE_INFO, 79 sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, 80 { 0, 0 }, 81}; 82 83static const struct tlv_template marker_response_tlv_template[] = { 84 { MARKER_TYPE_RESPONSE, 85 sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, 86 { 0, 0 }, 87}; 88 89static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *); 90static void lacp_fill_markerinfo(struct lacp_port *, 91 struct lacp_markerinfo *); 92 93static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *); 94static void lacp_suppress_distributing(struct lacp_softc *, 95 struct lacp_aggregator *); 96static void lacp_transit_expire(void *); 97static void lacp_select_active_aggregator(struct lacp_softc *); 98static uint16_t lacp_compose_key(struct lacp_port *); 99static int tlv_check(const void *, size_t, const struct tlvhdr *, 100 const struct tlv_template *, boolean_t); 101static void lacp_tick(void *); 102 103static void lacp_fill_aggregator_id(struct lacp_aggregator *, 104 const struct lacp_port *); 105static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *, 106 const struct lacp_peerinfo *); 107static int lacp_aggregator_is_compatible(const struct lacp_aggregator *, 108 const struct lacp_port *); 109static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *, 110 const struct lacp_peerinfo *); 111 112static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *, 113 struct lacp_port *); 114static void lacp_aggregator_addref(struct lacp_softc *, 115 struct lacp_aggregator *); 116static void lacp_aggregator_delref(struct lacp_softc *, 117 struct lacp_aggregator *); 118 119/* receive machine */ 120 121static void lacp_dequeue(void *, int); 122static int lacp_pdu_input(struct lagg_port *, struct mbuf *); 123static int lacp_marker_input(struct lagg_port *, struct mbuf *); 124static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *); 125static void lacp_sm_rx_timer(struct lacp_port *); 126static void lacp_sm_rx_set_expired(struct lacp_port *); 127static void lacp_sm_rx_update_ntt(struct lacp_port *, 128 const struct lacpdu *); 129static void lacp_sm_rx_record_pdu(struct lacp_port *, 130 const struct lacpdu *); 131static void lacp_sm_rx_update_selected(struct lacp_port *, 132 const struct lacpdu *); 133static void lacp_sm_rx_record_default(struct lacp_port *); 134static void lacp_sm_rx_update_default_selected(struct lacp_port *); 135static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *, 136 const struct lacp_peerinfo *); 137 138/* mux machine */ 139 140static void lacp_sm_mux(struct lacp_port *); 141static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state); 142static void lacp_sm_mux_timer(struct lacp_port *); 143 144/* periodic transmit machine */ 145 146static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t); 147static void lacp_sm_ptx_tx_schedule(struct lacp_port *); 148static void lacp_sm_ptx_timer(struct lacp_port *); 149 150/* transmit machine */ 151 152static void lacp_sm_tx(struct lacp_port *); 153static void lacp_sm_assert_ntt(struct lacp_port *); 154 155static void lacp_run_timers(struct lacp_port *); 156static int lacp_compare_peerinfo(const struct lacp_peerinfo *, 157 const struct lacp_peerinfo *); 158static int lacp_compare_systemid(const struct lacp_systemid *, 159 const struct lacp_systemid *); 160static void lacp_port_enable(struct lacp_port *); 161static void lacp_port_disable(struct lacp_port *); 162static void lacp_select(struct lacp_port *); 163static void lacp_unselect(struct lacp_port *); 164static void lacp_disable_collecting(struct lacp_port *); 165static void lacp_enable_collecting(struct lacp_port *); 166static void lacp_disable_distributing(struct lacp_port *); 167static void lacp_enable_distributing(struct lacp_port *); 168static int lacp_xmit_lacpdu(struct lacp_port *); 169static int lacp_xmit_marker(struct lacp_port *); 170 171#if defined(LACP_DEBUG) 172static void lacp_dump_lacpdu(const struct lacpdu *); 173static const char *lacp_format_partner(const struct lacp_peerinfo *, char *, 174 size_t); 175static const char *lacp_format_lagid(const struct lacp_peerinfo *, 176 const struct lacp_peerinfo *, char *, size_t); 177static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *, 178 char *, size_t); 179static const char *lacp_format_state(uint8_t, char *, size_t); 180static const char *lacp_format_mac(const uint8_t *, char *, size_t); 181static const char *lacp_format_systemid(const struct lacp_systemid *, char *, 182 size_t); 183static const char *lacp_format_portid(const struct lacp_portid *, char *, 184 size_t); 185static void lacp_dprintf(const struct lacp_port *, const char *, ...) 186 __attribute__((__format__(__printf__, 2, 3))); 187#define LACP_DPRINTF(a) lacp_dprintf a 188#else 189#define LACP_DPRINTF(a) /* nothing */ 190#endif 191 192/* 193 * partner administration variables. 194 * XXX should be configurable. 195 */ 196 197static const struct lacp_peerinfo lacp_partner_admin = { 198 .lip_systemid = { .lsi_prio = 0xffff }, 199 .lip_portid = { .lpi_prio = 0xffff }, 200#if 1 201 /* optimistic */ 202 .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION | 203 LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING, 204#else 205 /* pessimistic */ 206 .lip_state = 0, 207#endif 208}; 209 210static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = { 211 [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer, 212 [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer, 213 [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer, 214}; 215 216void 217lacp_input(struct lagg_port *lgp, struct mbuf *m) 218{ 219 struct lagg_softc *sc = lgp->lp_softc; 220 struct lacp_softc *lsc = LACP_SOFTC(sc); 221 uint8_t subtype; 222 223 if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) { 224 m_freem(m); 225 return; 226 } 227 228 m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype); 229 switch (subtype) { 230 case SLOWPROTOCOLS_SUBTYPE_LACP: 231 IF_HANDOFF(&lsc->lsc_queue, m, NULL); 232 taskqueue_enqueue(taskqueue_swi, &lsc->lsc_qtask); 233 break; 234 235 case SLOWPROTOCOLS_SUBTYPE_MARKER: 236 lacp_marker_input(lgp, m); 237 break; 238 239 default: 240 /* Unknown LACP packet type */ 241 m_freem(m); 242 break; 243 } 244} 245 246static void 247lacp_dequeue(void *arg, int pending) 248{ 249 struct lacp_softc *lsc = (struct lacp_softc *)arg; 250 struct lagg_softc *sc = lsc->lsc_softc; 251 struct lagg_port *lgp; 252 struct mbuf *m; 253 254 LAGG_WLOCK(sc); 255 for (;;) { 256 IF_DEQUEUE(&lsc->lsc_queue, m); 257 if (m == NULL) 258 break; 259 lgp = m->m_pkthdr.rcvif->if_lagg; 260 lacp_pdu_input(lgp, m); 261 } 262 LAGG_WUNLOCK(sc); 263} 264 265/* 266 * lacp_pdu_input: process lacpdu 267 */ 268static int 269lacp_pdu_input(struct lagg_port *lgp, struct mbuf *m) 270{ 271 struct lacp_port *lp = LACP_PORT(lgp); 272 struct lacpdu *du; 273 int error = 0; 274 275 LAGG_WLOCK_ASSERT(lgp->lp_softc); 276 277 if (m->m_pkthdr.len != sizeof(*du)) { 278 goto bad; 279 } 280 281 if ((m->m_flags & M_MCAST) == 0) { 282 goto bad; 283 } 284 285 if (m->m_len < sizeof(*du)) { 286 m = m_pullup(m, sizeof(*du)); 287 if (m == NULL) { 288 return (ENOMEM); 289 } 290 } 291 292 du = mtod(m, struct lacpdu *); 293 294 if (memcmp(&du->ldu_eh.ether_dhost, 295 ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { 296 goto bad; 297 } 298 299 /* 300 * ignore the version for compatibility with 301 * the future protocol revisions. 302 */ 303#if 0 304 if (du->ldu_sph.sph_version != 1) { 305 goto bad; 306 } 307#endif 308 309 /* 310 * ignore tlv types for compatibility with 311 * the future protocol revisions. 312 */ 313 if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor, 314 lacp_info_tlv_template, FALSE)) { 315 goto bad; 316 } 317 318#if defined(LACP_DEBUG) 319 LACP_DPRINTF((lp, "lacpdu receive\n")); 320 lacp_dump_lacpdu(du); 321#endif /* defined(LACP_DEBUG) */ 322 lacp_sm_rx(lp, du); 323 324 m_freem(m); 325 326 return (error); 327 328bad: 329 m_freem(m); 330 return (EINVAL); 331} 332 333static void 334lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info) 335{ 336 struct lagg_port *lgp = lp->lp_lagg; 337 struct lagg_softc *sc = lgp->lp_softc; 338 339 info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO); 340 memcpy(&info->lip_systemid.lsi_mac, 341 IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); 342 info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO); 343 info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index); 344 info->lip_state = lp->lp_state; 345} 346 347static void 348lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info) 349{ 350 struct ifnet *ifp = lp->lp_ifp; 351 352 /* Fill in the port index and system id (encoded as the MAC) */ 353 info->mi_rq_port = htons(ifp->if_index); 354 memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN); 355 info->mi_rq_xid = htonl(0); 356} 357 358static int 359lacp_xmit_lacpdu(struct lacp_port *lp) 360{ 361 struct lagg_port *lgp = lp->lp_lagg; 362 struct mbuf *m; 363 struct lacpdu *du; 364 int error; 365 366 LAGG_WLOCK_ASSERT(lgp->lp_softc); 367 368 m = m_gethdr(M_DONTWAIT, MT_DATA); 369 if (m == NULL) { 370 return (ENOMEM); 371 } 372 m->m_len = m->m_pkthdr.len = sizeof(*du); 373 374 du = mtod(m, struct lacpdu *); 375 memset(du, 0, sizeof(*du)); 376 377 memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols, 378 ETHER_ADDR_LEN); 379 memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); 380 du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW); 381 382 du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; 383 du->ldu_sph.sph_version = 1; 384 385 TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor)); 386 du->ldu_actor = lp->lp_actor; 387 388 TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO, 389 sizeof(du->ldu_partner)); 390 du->ldu_partner = lp->lp_partner; 391 392 TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO, 393 sizeof(du->ldu_collector)); 394 du->ldu_collector.lci_maxdelay = 0; 395 396#if defined(LACP_DEBUG) 397 LACP_DPRINTF((lp, "lacpdu transmit\n")); 398 lacp_dump_lacpdu(du); 399#endif /* defined(LACP_DEBUG) */ 400 401 m->m_flags |= M_MCAST; 402 403 /* 404 * XXX should use higher priority queue. 405 * otherwise network congestion can break aggregation. 406 */ 407 408 error = lagg_enqueue(lp->lp_ifp, m); 409 return (error); 410} 411 412static int 413lacp_xmit_marker(struct lacp_port *lp) 414{ 415 struct lagg_port *lgp = lp->lp_lagg; 416 struct mbuf *m; 417 struct markerdu *mdu; 418 int error; 419 420 LAGG_WLOCK_ASSERT(lgp->lp_softc); 421 422 m = m_gethdr(M_DONTWAIT, MT_DATA); 423 if (m == NULL) { 424 return (ENOMEM); 425 } 426 m->m_len = m->m_pkthdr.len = sizeof(*mdu); 427 428 mdu = mtod(m, struct markerdu *); 429 memset(mdu, 0, sizeof(*mdu)); 430 431 memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols, 432 ETHER_ADDR_LEN); 433 memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); 434 mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW); 435 436 mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER; 437 mdu->mdu_sph.sph_version = 1; 438 439 /* Bump the transaction id and copy over the marker info */ 440 lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1); 441 TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info)); 442 mdu->mdu_info = lp->lp_marker; 443 444 LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n", 445 ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":", 446 ntohl(mdu->mdu_info.mi_rq_xid))); 447 448 m->m_flags |= M_MCAST; 449 error = lagg_enqueue(lp->lp_ifp, m); 450 return (error); 451} 452void 453lacp_linkstate(struct lagg_port *lgp) 454{ 455 struct lacp_port *lp = LACP_PORT(lgp); 456 struct ifnet *ifp = lgp->lp_ifp; 457 struct ifmediareq ifmr; 458 int error = 0; 459 u_int media; 460 uint8_t old_state; 461 uint16_t old_key; 462 463 LAGG_WLOCK_ASSERT(lgp->lp_softc); 464 465 bzero((char *)&ifmr, sizeof(ifmr)); 466 error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); 467 if (error != 0) 468 return; 469 470 media = ifmr.ifm_active; 471 LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, " 472 "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER, 473 (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP)); 474 old_state = lp->lp_state; 475 old_key = lp->lp_key; 476 477 lp->lp_media = media; 478 /* 479 * If the port is not an active full duplex Ethernet link then it can 480 * not be aggregated. 481 */ 482 if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 || 483 ifp->if_link_state != LINK_STATE_UP) { 484 lacp_port_disable(lp); 485 } else { 486 lacp_port_enable(lp); 487 } 488 lp->lp_key = lacp_compose_key(lp); 489 490 if (old_state != lp->lp_state || old_key != lp->lp_key) { 491 LACP_DPRINTF((lp, "-> UNSELECTED\n")); 492 lp->lp_selected = LACP_UNSELECTED; 493 } 494} 495 496static void 497lacp_tick(void *arg) 498{ 499 struct lacp_softc *lsc = arg; 500 struct lagg_softc *sc = lsc->lsc_softc; 501 struct lacp_port *lp; 502 503 LAGG_WLOCK(sc); 504 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { 505 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) 506 continue; 507 508 lacp_run_timers(lp); 509 510 lacp_select(lp); 511 lacp_sm_mux(lp); 512 lacp_sm_tx(lp); 513 lacp_sm_ptx_tx_schedule(lp); 514 } 515 LAGG_WUNLOCK(sc); 516 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); 517} 518 519int 520lacp_port_create(struct lagg_port *lgp) 521{ 522 struct lagg_softc *sc = lgp->lp_softc; 523 struct lacp_softc *lsc = LACP_SOFTC(sc); 524 struct lacp_port *lp; 525 struct ifnet *ifp = lgp->lp_ifp; 526 struct sockaddr_dl sdl; 527 struct ifmultiaddr *rifma = NULL; 528 int error; 529 530 boolean_t active = TRUE; /* XXX should be configurable */ 531 boolean_t fast = FALSE; /* XXX should be configurable */ 532 533 LAGG_WLOCK_ASSERT(sc); 534 535 bzero((char *)&sdl, sizeof(sdl)); 536 sdl.sdl_len = sizeof(sdl); 537 sdl.sdl_family = AF_LINK; 538 sdl.sdl_index = ifp->if_index; 539 sdl.sdl_type = IFT_ETHER; 540 sdl.sdl_alen = ETHER_ADDR_LEN; 541 542 bcopy(ðermulticastaddr_slowprotocols, 543 LLADDR(&sdl), ETHER_ADDR_LEN); 544 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); 545 if (error) { 546 printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname); 547 return (error); 548 } 549 550 lp = malloc(sizeof(struct lacp_port), 551 M_DEVBUF, M_NOWAIT|M_ZERO); 552 if (lp == NULL) 553 return (ENOMEM); 554 555 lgp->lp_psc = (caddr_t)lp; 556 lp->lp_ifp = ifp; 557 lp->lp_lagg = lgp; 558 lp->lp_lsc = lsc; 559 lp->lp_ifma = rifma; 560 561 LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next); 562 563 lacp_fill_actorinfo(lp, &lp->lp_actor); 564 lacp_fill_markerinfo(lp, &lp->lp_marker); 565 lp->lp_state = 566 (active ? LACP_STATE_ACTIVITY : 0) | 567 (fast ? LACP_STATE_TIMEOUT : 0); 568 lp->lp_aggregator = NULL; 569 lacp_linkstate(lgp); 570 lacp_sm_rx_set_expired(lp); 571 572 return (0); 573} 574 575void 576lacp_port_destroy(struct lagg_port *lgp) 577{ 578 struct lacp_port *lp = LACP_PORT(lgp); 579 int i; 580 581 LAGG_WLOCK_ASSERT(lgp->lp_softc); 582 583 for (i = 0; i < LACP_NTIMER; i++) { 584 LACP_TIMER_DISARM(lp, i); 585 } 586 587 lacp_disable_collecting(lp); 588 lacp_disable_distributing(lp); 589 lacp_unselect(lp); 590 lgp->lp_flags &= ~LAGG_PORT_DISABLED; 591 592 /* The address may have already been removed by if_purgemaddrs() */ 593 if (!lgp->lp_detaching) 594 if_delmulti_ifma(lp->lp_ifma); 595 596 LIST_REMOVE(lp, lp_next); 597 free(lp, M_DEVBUF); 598} 599 600int 601lacp_port_isactive(struct lagg_port *lgp) 602{ 603 struct lacp_port *lp = LACP_PORT(lgp); 604 struct lacp_softc *lsc = lp->lp_lsc; 605 struct lacp_aggregator *la = lp->lp_aggregator; 606 607 /* This port is joined to the active aggregator */ 608 if (la != NULL && la == lsc->lsc_active_aggregator) 609 return (1); 610 611 return (0); 612} 613 614static void 615lacp_disable_collecting(struct lacp_port *lp) 616{ 617 struct lagg_port *lgp = lp->lp_lagg; 618 619 LACP_DPRINTF((lp, "collecting disabled\n")); 620 621 lp->lp_state &= ~LACP_STATE_COLLECTING; 622 lgp->lp_flags &= ~LAGG_PORT_COLLECTING; 623} 624 625static void 626lacp_enable_collecting(struct lacp_port *lp) 627{ 628 struct lagg_port *lgp = lp->lp_lagg; 629 630 LACP_DPRINTF((lp, "collecting enabled\n")); 631 632 lp->lp_state |= LACP_STATE_COLLECTING; 633 lgp->lp_flags |= LAGG_PORT_COLLECTING; 634} 635 636static void 637lacp_disable_distributing(struct lacp_port *lp) 638{ 639 struct lacp_aggregator *la = lp->lp_aggregator; 640 struct lacp_softc *lsc = lp->lp_lsc; 641 struct lagg_port *lgp = lp->lp_lagg; 642#if defined(LACP_DEBUG) 643 char buf[LACP_LAGIDSTR_MAX+1]; 644#endif /* defined(LACP_DEBUG) */ 645 646 LAGG_WLOCK_ASSERT(lgp->lp_softc); 647 648 if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) { 649 return; 650 } 651 652 KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports")); 653 KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports)); 654 KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid")); 655 656 LACP_DPRINTF((lp, "disable distributing on aggregator %s, " 657 "nports %d -> %d\n", 658 lacp_format_lagid_aggregator(la, buf, sizeof(buf)), 659 la->la_nports, la->la_nports - 1)); 660 661 TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q); 662 la->la_nports--; 663 664 lacp_suppress_distributing(lsc, la); 665 666 lp->lp_state &= ~LACP_STATE_DISTRIBUTING; 667 lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING; 668 669 if (lsc->lsc_active_aggregator == la) { 670 lacp_select_active_aggregator(lsc); 671 } 672} 673 674static void 675lacp_enable_distributing(struct lacp_port *lp) 676{ 677 struct lacp_aggregator *la = lp->lp_aggregator; 678 struct lacp_softc *lsc = lp->lp_lsc; 679 struct lagg_port *lgp = lp->lp_lagg; 680#if defined(LACP_DEBUG) 681 char buf[LACP_LAGIDSTR_MAX+1]; 682#endif /* defined(LACP_DEBUG) */ 683 684 LAGG_WLOCK_ASSERT(lgp->lp_softc); 685 686 if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) { 687 return; 688 } 689 690 LACP_DPRINTF((lp, "enable distributing on aggregator %s, " 691 "nports %d -> %d\n", 692 lacp_format_lagid_aggregator(la, buf, sizeof(buf)), 693 la->la_nports, la->la_nports + 1)); 694 695 KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid")); 696 TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q); 697 la->la_nports++; 698 699 lacp_suppress_distributing(lsc, la); 700 701 lp->lp_state |= LACP_STATE_DISTRIBUTING; 702 lgp->lp_flags |= LAGG_PORT_DISTRIBUTING; 703 704 if (lsc->lsc_active_aggregator != la) { 705 lacp_select_active_aggregator(lsc); 706 } 707} 708 709static void 710lacp_transit_expire(void *vp) 711{ 712 struct lacp_softc *lsc = vp; 713 714 LACP_DPRINTF((NULL, "%s\n", __func__)); 715 lsc->lsc_suppress_distributing = FALSE; 716} 717 718int 719lacp_attach(struct lagg_softc *sc) 720{ 721 struct lacp_softc *lsc; 722 723 LAGG_WLOCK_ASSERT(sc); 724 725 lsc = malloc(sizeof(struct lacp_softc), 726 M_DEVBUF, M_NOWAIT|M_ZERO); 727 if (lsc == NULL) 728 return (ENOMEM); 729 730 sc->sc_psc = (caddr_t)lsc; 731 lsc->lsc_softc = sc; 732 733 lsc->lsc_hashkey = arc4random(); 734 lsc->lsc_active_aggregator = NULL; 735 TAILQ_INIT(&lsc->lsc_aggregators); 736 LIST_INIT(&lsc->lsc_ports); 737 738 TASK_INIT(&lsc->lsc_qtask, 0, lacp_dequeue, lsc); 739 mtx_init(&lsc->lsc_queue.ifq_mtx, "lacp queue", NULL, MTX_DEF); 740 lsc->lsc_queue.ifq_maxlen = ifqmaxlen; 741 742 callout_init(&lsc->lsc_transit_callout, CALLOUT_MPSAFE); 743 callout_init(&lsc->lsc_callout, CALLOUT_MPSAFE); 744 745 /* if the lagg is already up then do the same */ 746 if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) 747 lacp_init(sc); 748 749 return (0); 750} 751 752int 753lacp_detach(struct lagg_softc *sc) 754{ 755 struct lacp_softc *lsc = LACP_SOFTC(sc); 756 757 KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), 758 ("aggregators still active")); 759 KASSERT(lsc->lsc_active_aggregator == NULL, 760 ("aggregator still attached")); 761 762 sc->sc_psc = NULL; 763 callout_drain(&lsc->lsc_transit_callout); 764 callout_drain(&lsc->lsc_callout); 765 taskqueue_drain(taskqueue_swi, &lsc->lsc_qtask); 766 IF_DRAIN(&lsc->lsc_queue); 767 mtx_destroy(&lsc->lsc_queue.ifq_mtx); 768 769 free(lsc, M_DEVBUF); 770 return (0); 771} 772 773void 774lacp_init(struct lagg_softc *sc) 775{ 776 struct lacp_softc *lsc = LACP_SOFTC(sc); 777 778 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); 779} 780 781void 782lacp_stop(struct lagg_softc *sc) 783{ 784 struct lacp_softc *lsc = LACP_SOFTC(sc); 785 786 callout_stop(&lsc->lsc_transit_callout); 787 callout_stop(&lsc->lsc_callout); 788} 789 790struct lagg_port * 791lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m) 792{ 793 struct lacp_softc *lsc = LACP_SOFTC(sc); 794 struct lacp_aggregator *la; 795 struct lacp_port *lp; 796 uint32_t hash; 797 int nports; 798 799 LAGG_RLOCK_ASSERT(sc); 800 801 if (__predict_false(lsc->lsc_suppress_distributing)) { 802 LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); 803 return (NULL); 804 } 805 806 la = lsc->lsc_active_aggregator; 807 if (__predict_false(la == NULL)) { 808 LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); 809 return (NULL); 810 } 811 812 nports = la->la_nports; 813 KASSERT(nports > 0, ("no ports available")); 814 815 hash = lagg_hashmbuf(m, lsc->lsc_hashkey); 816 hash %= nports; 817 lp = TAILQ_FIRST(&la->la_ports); 818 while (hash--) { 819 lp = TAILQ_NEXT(lp, lp_dist_q); 820 } 821 822 KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0, 823 ("aggregated port is not distributing")); 824 825 return (lp->lp_lagg); 826} 827/* 828 * lacp_suppress_distributing: drop transmit packets for a while 829 * to preserve packet ordering. 830 */ 831 832static void 833lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la) 834{ 835 struct lacp_port *lp; 836 837 if (lsc->lsc_active_aggregator != la) { 838 return; 839 } 840 841 LACP_DPRINTF((NULL, "%s\n", __func__)); 842 lsc->lsc_suppress_distributing = TRUE; 843 844 /* send a marker frame down each port to verify the queues are empty */ 845 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { 846 lp->lp_flags |= LACP_PORT_MARK; 847 lacp_xmit_marker(lp); 848 } 849 850 /* set a timeout for the marker frames */ 851 callout_reset(&lsc->lsc_transit_callout, 852 LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc); 853} 854 855static int 856lacp_compare_peerinfo(const struct lacp_peerinfo *a, 857 const struct lacp_peerinfo *b) 858{ 859 return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state))); 860} 861 862static int 863lacp_compare_systemid(const struct lacp_systemid *a, 864 const struct lacp_systemid *b) 865{ 866 return (memcmp(a, b, sizeof(*a))); 867} 868 869#if 0 /* unused */ 870static int 871lacp_compare_portid(const struct lacp_portid *a, 872 const struct lacp_portid *b) 873{ 874 return (memcmp(a, b, sizeof(*a))); 875} 876#endif 877 878static uint64_t 879lacp_aggregator_bandwidth(struct lacp_aggregator *la) 880{ 881 struct lacp_port *lp; 882 uint64_t speed; 883 884 lp = TAILQ_FIRST(&la->la_ports); 885 if (lp == NULL) { 886 return (0); 887 } 888 889 speed = ifmedia_baudrate(lp->lp_media); 890 speed *= la->la_nports; 891 if (speed == 0) { 892 LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n", 893 lp->lp_media, la->la_nports)); 894 } 895 896 return (speed); 897} 898 899/* 900 * lacp_select_active_aggregator: select an aggregator to be used to transmit 901 * packets from lagg(4) interface. 902 */ 903 904static void 905lacp_select_active_aggregator(struct lacp_softc *lsc) 906{ 907 struct lacp_aggregator *la; 908 struct lacp_aggregator *best_la = NULL; 909 uint64_t best_speed = 0; 910#if defined(LACP_DEBUG) 911 char buf[LACP_LAGIDSTR_MAX+1]; 912#endif /* defined(LACP_DEBUG) */ 913 914 LACP_DPRINTF((NULL, "%s:\n", __func__)); 915 916 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { 917 uint64_t speed; 918 919 if (la->la_nports == 0) { 920 continue; 921 } 922 923 speed = lacp_aggregator_bandwidth(la); 924 LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n", 925 lacp_format_lagid_aggregator(la, buf, sizeof(buf)), 926 speed, la->la_nports)); 927 928 /* This aggregator is chosen if 929 * the partner has a better system priority 930 * or, the total aggregated speed is higher 931 * or, it is already the chosen aggregator 932 */ 933 if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) < 934 LACP_SYS_PRI(best_la->la_partner)) || 935 speed > best_speed || 936 (speed == best_speed && 937 la == lsc->lsc_active_aggregator)) { 938 best_la = la; 939 best_speed = speed; 940 } 941 } 942 943 KASSERT(best_la == NULL || best_la->la_nports > 0, 944 ("invalid aggregator refcnt")); 945 KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports), 946 ("invalid aggregator list")); 947 948#if defined(LACP_DEBUG) 949 if (lsc->lsc_active_aggregator != best_la) { 950 LACP_DPRINTF((NULL, "active aggregator changed\n")); 951 LACP_DPRINTF((NULL, "old %s\n", 952 lacp_format_lagid_aggregator(lsc->lsc_active_aggregator, 953 buf, sizeof(buf)))); 954 } else { 955 LACP_DPRINTF((NULL, "active aggregator not changed\n")); 956 } 957 LACP_DPRINTF((NULL, "new %s\n", 958 lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); 959#endif /* defined(LACP_DEBUG) */ 960 961 if (lsc->lsc_active_aggregator != best_la) { 962 lsc->lsc_active_aggregator = best_la; 963 if (best_la) { 964 lacp_suppress_distributing(lsc, best_la); 965 } 966 } 967} 968 969static uint16_t 970lacp_compose_key(struct lacp_port *lp) 971{ 972 struct lagg_port *lgp = lp->lp_lagg; 973 struct lagg_softc *sc = lgp->lp_softc; 974 u_int media = lp->lp_media; 975 uint16_t key; 976 977 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) { 978 979 /* 980 * non-aggregatable links should have unique keys. 981 * 982 * XXX this isn't really unique as if_index is 16 bit. 983 */ 984 985 /* bit 0..14: (some bits of) if_index of this port */ 986 key = lp->lp_ifp->if_index; 987 /* bit 15: 1 */ 988 key |= 0x8000; 989 } else { 990 u_int subtype = IFM_SUBTYPE(media); 991 992 KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type")); 993 KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface")); 994 995 /* bit 0..4: IFM_SUBTYPE */ 996 key = subtype; 997 /* bit 5..14: (some bits of) if_index of lagg device */ 998 key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5); 999 /* bit 15: 0 */ 1000 } 1001 return (htons(key)); 1002} 1003 1004static void 1005lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) 1006{ 1007#if defined(LACP_DEBUG) 1008 char buf[LACP_LAGIDSTR_MAX+1]; 1009#endif 1010 1011 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", 1012 __func__, 1013 lacp_format_lagid(&la->la_actor, &la->la_partner, 1014 buf, sizeof(buf)), 1015 la->la_refcnt, la->la_refcnt + 1)); 1016 1017 KASSERT(la->la_refcnt > 0, ("refcount <= 0")); 1018 la->la_refcnt++; 1019 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount")); 1020} 1021 1022static void 1023lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la) 1024{ 1025#if defined(LACP_DEBUG) 1026 char buf[LACP_LAGIDSTR_MAX+1]; 1027#endif 1028 1029 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", 1030 __func__, 1031 lacp_format_lagid(&la->la_actor, &la->la_partner, 1032 buf, sizeof(buf)), 1033 la->la_refcnt, la->la_refcnt - 1)); 1034 1035 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt")); 1036 la->la_refcnt--; 1037 if (la->la_refcnt > 0) { 1038 return; 1039 } 1040 1041 KASSERT(la->la_refcnt == 0, ("refcount not zero")); 1042 KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active")); 1043 1044 TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q); 1045 1046 free(la, M_DEVBUF); 1047} 1048 1049/* 1050 * lacp_aggregator_get: allocate an aggregator. 1051 */ 1052 1053static struct lacp_aggregator * 1054lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp) 1055{ 1056 struct lacp_aggregator *la; 1057 1058 la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT); 1059 if (la) { 1060 la->la_refcnt = 1; 1061 la->la_nports = 0; 1062 TAILQ_INIT(&la->la_ports); 1063 la->la_pending = 0; 1064 TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q); 1065 } 1066 1067 return (la); 1068} 1069 1070/* 1071 * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port. 1072 */ 1073 1074static void 1075lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp) 1076{ 1077 lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner); 1078 lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor); 1079 1080 la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION; 1081} 1082 1083static void 1084lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr, 1085 const struct lacp_peerinfo *lpi_port) 1086{ 1087 memset(lpi_aggr, 0, sizeof(*lpi_aggr)); 1088 lpi_aggr->lip_systemid = lpi_port->lip_systemid; 1089 lpi_aggr->lip_key = lpi_port->lip_key; 1090} 1091 1092/* 1093 * lacp_aggregator_is_compatible: check if a port can join to an aggregator. 1094 */ 1095 1096static int 1097lacp_aggregator_is_compatible(const struct lacp_aggregator *la, 1098 const struct lacp_port *lp) 1099{ 1100 if (!(lp->lp_state & LACP_STATE_AGGREGATION) || 1101 !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) { 1102 return (0); 1103 } 1104 1105 if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) { 1106 return (0); 1107 } 1108 1109 if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) { 1110 return (0); 1111 } 1112 1113 if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) { 1114 return (0); 1115 } 1116 1117 return (1); 1118} 1119 1120static int 1121lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a, 1122 const struct lacp_peerinfo *b) 1123{ 1124 if (memcmp(&a->lip_systemid, &b->lip_systemid, 1125 sizeof(a->lip_systemid))) { 1126 return (0); 1127 } 1128 1129 if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) { 1130 return (0); 1131 } 1132 1133 return (1); 1134} 1135 1136static void 1137lacp_port_enable(struct lacp_port *lp) 1138{ 1139 struct lagg_port *lgp = lp->lp_lagg; 1140 1141 lp->lp_state |= LACP_STATE_AGGREGATION; 1142 lgp->lp_flags &= ~LAGG_PORT_DISABLED; 1143} 1144 1145static void 1146lacp_port_disable(struct lacp_port *lp) 1147{ 1148 struct lagg_port *lgp = lp->lp_lagg; 1149 1150 lacp_set_mux(lp, LACP_MUX_DETACHED); 1151 1152 lp->lp_state &= ~LACP_STATE_AGGREGATION; 1153 lp->lp_selected = LACP_UNSELECTED; 1154 lacp_sm_rx_record_default(lp); 1155 lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION; 1156 lp->lp_state &= ~LACP_STATE_EXPIRED; 1157 lgp->lp_flags |= LAGG_PORT_DISABLED; 1158} 1159 1160/* 1161 * lacp_select: select an aggregator. create one if necessary. 1162 */ 1163static void 1164lacp_select(struct lacp_port *lp) 1165{ 1166 struct lacp_softc *lsc = lp->lp_lsc; 1167 struct lacp_aggregator *la; 1168#if defined(LACP_DEBUG) 1169 char buf[LACP_LAGIDSTR_MAX+1]; 1170#endif 1171 1172 if (lp->lp_aggregator) { 1173 return; 1174 } 1175 1176 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), 1177 ("timer_wait_while still active")); 1178 1179 LACP_DPRINTF((lp, "port lagid=%s\n", 1180 lacp_format_lagid(&lp->lp_actor, &lp->lp_partner, 1181 buf, sizeof(buf)))); 1182 1183 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { 1184 if (lacp_aggregator_is_compatible(la, lp)) { 1185 break; 1186 } 1187 } 1188 1189 if (la == NULL) { 1190 la = lacp_aggregator_get(lsc, lp); 1191 if (la == NULL) { 1192 LACP_DPRINTF((lp, "aggregator creation failed\n")); 1193 1194 /* 1195 * will retry on the next tick. 1196 */ 1197 1198 return; 1199 } 1200 lacp_fill_aggregator_id(la, lp); 1201 LACP_DPRINTF((lp, "aggregator created\n")); 1202 } else { 1203 LACP_DPRINTF((lp, "compatible aggregator found\n")); 1204 lacp_aggregator_addref(lsc, la); 1205 } 1206 1207 LACP_DPRINTF((lp, "aggregator lagid=%s\n", 1208 lacp_format_lagid(&la->la_actor, &la->la_partner, 1209 buf, sizeof(buf)))); 1210 1211 lp->lp_aggregator = la; 1212 lp->lp_selected = LACP_SELECTED; 1213} 1214 1215/* 1216 * lacp_unselect: finish unselect/detach process. 1217 */ 1218 1219static void 1220lacp_unselect(struct lacp_port *lp) 1221{ 1222 struct lacp_softc *lsc = lp->lp_lsc; 1223 struct lacp_aggregator *la = lp->lp_aggregator; 1224 1225 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), 1226 ("timer_wait_while still active")); 1227 1228 if (la == NULL) { 1229 return; 1230 } 1231 1232 lp->lp_aggregator = NULL; 1233 lacp_aggregator_delref(lsc, la); 1234} 1235 1236/* mux machine */ 1237 1238static void 1239lacp_sm_mux(struct lacp_port *lp) 1240{ 1241 enum lacp_mux_state new_state; 1242 boolean_t p_sync = 1243 (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0; 1244 boolean_t p_collecting = 1245 (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0; 1246 enum lacp_selected selected = lp->lp_selected; 1247 struct lacp_aggregator *la; 1248 1249 /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */ 1250 1251re_eval: 1252 la = lp->lp_aggregator; 1253 KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL, 1254 ("MUX not detached")); 1255 new_state = lp->lp_mux_state; 1256 switch (lp->lp_mux_state) { 1257 case LACP_MUX_DETACHED: 1258 if (selected != LACP_UNSELECTED) { 1259 new_state = LACP_MUX_WAITING; 1260 } 1261 break; 1262 case LACP_MUX_WAITING: 1263 KASSERT(la->la_pending > 0 || 1264 !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), 1265 ("timer_wait_while still active")); 1266 if (selected == LACP_SELECTED && la->la_pending == 0) { 1267 new_state = LACP_MUX_ATTACHED; 1268 } else if (selected == LACP_UNSELECTED) { 1269 new_state = LACP_MUX_DETACHED; 1270 } 1271 break; 1272 case LACP_MUX_ATTACHED: 1273 if (selected == LACP_SELECTED && p_sync) { 1274 new_state = LACP_MUX_COLLECTING; 1275 } else if (selected != LACP_SELECTED) { 1276 new_state = LACP_MUX_DETACHED; 1277 } 1278 break; 1279 case LACP_MUX_COLLECTING: 1280 if (selected == LACP_SELECTED && p_sync && p_collecting) { 1281 new_state = LACP_MUX_DISTRIBUTING; 1282 } else if (selected != LACP_SELECTED || !p_sync) { 1283 new_state = LACP_MUX_ATTACHED; 1284 } 1285 break; 1286 case LACP_MUX_DISTRIBUTING: 1287 if (selected != LACP_SELECTED || !p_sync || !p_collecting) { 1288 new_state = LACP_MUX_COLLECTING; 1289 } 1290 break; 1291 default: 1292 panic("%s: unknown state", __func__); 1293 } 1294 1295 if (lp->lp_mux_state == new_state) { 1296 return; 1297 } 1298 1299 lacp_set_mux(lp, new_state); 1300 goto re_eval; 1301} 1302 1303static void 1304lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state) 1305{ 1306 struct lacp_aggregator *la = lp->lp_aggregator; 1307 1308 if (lp->lp_mux_state == new_state) { 1309 return; 1310 } 1311 1312 switch (new_state) { 1313 case LACP_MUX_DETACHED: 1314 lp->lp_state &= ~LACP_STATE_SYNC; 1315 lacp_disable_distributing(lp); 1316 lacp_disable_collecting(lp); 1317 lacp_sm_assert_ntt(lp); 1318 /* cancel timer */ 1319 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) { 1320 KASSERT(la->la_pending > 0, 1321 ("timer_wait_while not active")); 1322 la->la_pending--; 1323 } 1324 LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE); 1325 lacp_unselect(lp); 1326 break; 1327 case LACP_MUX_WAITING: 1328 LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE, 1329 LACP_AGGREGATE_WAIT_TIME); 1330 la->la_pending++; 1331 break; 1332 case LACP_MUX_ATTACHED: 1333 lp->lp_state |= LACP_STATE_SYNC; 1334 lacp_disable_collecting(lp); 1335 lacp_sm_assert_ntt(lp); 1336 break; 1337 case LACP_MUX_COLLECTING: 1338 lacp_enable_collecting(lp); 1339 lacp_disable_distributing(lp); 1340 lacp_sm_assert_ntt(lp); 1341 break; 1342 case LACP_MUX_DISTRIBUTING: 1343 lacp_enable_distributing(lp); 1344 break; 1345 default: 1346 panic("%s: unknown state", __func__); 1347 } 1348 1349 LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state)); 1350 1351 lp->lp_mux_state = new_state; 1352} 1353 1354static void 1355lacp_sm_mux_timer(struct lacp_port *lp) 1356{ 1357 struct lacp_aggregator *la = lp->lp_aggregator; 1358#if defined(LACP_DEBUG) 1359 char buf[LACP_LAGIDSTR_MAX+1]; 1360#endif 1361 1362 KASSERT(la->la_pending > 0, ("no pending event")); 1363 1364 LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__, 1365 lacp_format_lagid(&la->la_actor, &la->la_partner, 1366 buf, sizeof(buf)), 1367 la->la_pending, la->la_pending - 1)); 1368 1369 la->la_pending--; 1370} 1371 1372/* periodic transmit machine */ 1373 1374static void 1375lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate) 1376{ 1377 if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state, 1378 LACP_STATE_TIMEOUT)) { 1379 return; 1380 } 1381 1382 LACP_DPRINTF((lp, "partner timeout changed\n")); 1383 1384 /* 1385 * FAST_PERIODIC -> SLOW_PERIODIC 1386 * or 1387 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC 1388 * 1389 * let lacp_sm_ptx_tx_schedule to update timeout. 1390 */ 1391 1392 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); 1393 1394 /* 1395 * if timeout has been shortened, assert NTT. 1396 */ 1397 1398 if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) { 1399 lacp_sm_assert_ntt(lp); 1400 } 1401} 1402 1403static void 1404lacp_sm_ptx_tx_schedule(struct lacp_port *lp) 1405{ 1406 int timeout; 1407 1408 if (!(lp->lp_state & LACP_STATE_ACTIVITY) && 1409 !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) { 1410 1411 /* 1412 * NO_PERIODIC 1413 */ 1414 1415 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); 1416 return; 1417 } 1418 1419 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) { 1420 return; 1421 } 1422 1423 timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ? 1424 LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME; 1425 1426 LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout); 1427} 1428 1429static void 1430lacp_sm_ptx_timer(struct lacp_port *lp) 1431{ 1432 lacp_sm_assert_ntt(lp); 1433} 1434 1435static void 1436lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du) 1437{ 1438 int timeout; 1439 1440 /* 1441 * check LACP_DISABLED first 1442 */ 1443 1444 if (!(lp->lp_state & LACP_STATE_AGGREGATION)) { 1445 return; 1446 } 1447 1448 /* 1449 * check loopback condition. 1450 */ 1451 1452 if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid, 1453 &lp->lp_actor.lip_systemid)) { 1454 return; 1455 } 1456 1457 /* 1458 * EXPIRED, DEFAULTED, CURRENT -> CURRENT 1459 */ 1460 1461 lacp_sm_rx_update_selected(lp, du); 1462 lacp_sm_rx_update_ntt(lp, du); 1463 lacp_sm_rx_record_pdu(lp, du); 1464 1465 timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ? 1466 LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME; 1467 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout); 1468 1469 lp->lp_state &= ~LACP_STATE_EXPIRED; 1470 1471 /* 1472 * kick transmit machine without waiting the next tick. 1473 */ 1474 1475 lacp_sm_tx(lp); 1476} 1477 1478static void 1479lacp_sm_rx_set_expired(struct lacp_port *lp) 1480{ 1481 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; 1482 lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT; 1483 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME); 1484 lp->lp_state |= LACP_STATE_EXPIRED; 1485} 1486 1487static void 1488lacp_sm_rx_timer(struct lacp_port *lp) 1489{ 1490 if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) { 1491 /* CURRENT -> EXPIRED */ 1492 LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__)); 1493 lacp_sm_rx_set_expired(lp); 1494 } else { 1495 /* EXPIRED -> DEFAULTED */ 1496 LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__)); 1497 lacp_sm_rx_update_default_selected(lp); 1498 lacp_sm_rx_record_default(lp); 1499 lp->lp_state &= ~LACP_STATE_EXPIRED; 1500 } 1501} 1502 1503static void 1504lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) 1505{ 1506 boolean_t active; 1507 uint8_t oldpstate; 1508#if defined(LACP_DEBUG) 1509 char buf[LACP_STATESTR_MAX+1]; 1510#endif 1511 1512 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1513 1514 oldpstate = lp->lp_partner.lip_state; 1515 1516 active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY) 1517 || ((lp->lp_state & LACP_STATE_ACTIVITY) && 1518 (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY)); 1519 1520 lp->lp_partner = du->ldu_actor; 1521 if (active && 1522 ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, 1523 LACP_STATE_AGGREGATION) && 1524 !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner)) 1525 || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) { 1526 /* XXX nothing? */ 1527 } else { 1528 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; 1529 } 1530 1531 lp->lp_state &= ~LACP_STATE_DEFAULTED; 1532 1533 if (oldpstate != lp->lp_partner.lip_state) { 1534 LACP_DPRINTF((lp, "old pstate %s\n", 1535 lacp_format_state(oldpstate, buf, sizeof(buf)))); 1536 LACP_DPRINTF((lp, "new pstate %s\n", 1537 lacp_format_state(lp->lp_partner.lip_state, buf, 1538 sizeof(buf)))); 1539 } 1540 1541 lacp_sm_ptx_update_timeout(lp, oldpstate); 1542} 1543 1544static void 1545lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du) 1546{ 1547 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1548 1549 if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) || 1550 !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, 1551 LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) { 1552 LACP_DPRINTF((lp, "%s: assert ntt\n", __func__)); 1553 lacp_sm_assert_ntt(lp); 1554 } 1555} 1556 1557static void 1558lacp_sm_rx_record_default(struct lacp_port *lp) 1559{ 1560 uint8_t oldpstate; 1561 1562 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1563 1564 oldpstate = lp->lp_partner.lip_state; 1565 lp->lp_partner = lacp_partner_admin; 1566 lp->lp_state |= LACP_STATE_DEFAULTED; 1567 lacp_sm_ptx_update_timeout(lp, oldpstate); 1568} 1569 1570static void 1571lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, 1572 const struct lacp_peerinfo *info) 1573{ 1574 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1575 1576 if (lacp_compare_peerinfo(&lp->lp_partner, info) || 1577 !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state, 1578 LACP_STATE_AGGREGATION)) { 1579 lp->lp_selected = LACP_UNSELECTED; 1580 /* mux machine will clean up lp->lp_aggregator */ 1581 } 1582} 1583 1584static void 1585lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) 1586{ 1587 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1588 1589 lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor); 1590} 1591 1592static void 1593lacp_sm_rx_update_default_selected(struct lacp_port *lp) 1594{ 1595 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1596 1597 lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin); 1598} 1599 1600/* transmit machine */ 1601 1602static void 1603lacp_sm_tx(struct lacp_port *lp) 1604{ 1605 int error; 1606 1607 if (!(lp->lp_state & LACP_STATE_AGGREGATION) 1608#if 1 1609 || (!(lp->lp_state & LACP_STATE_ACTIVITY) 1610 && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) 1611#endif 1612 ) { 1613 lp->lp_flags &= ~LACP_PORT_NTT; 1614 } 1615 1616 if (!(lp->lp_flags & LACP_PORT_NTT)) { 1617 return; 1618 } 1619 1620 /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */ 1621 if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent, 1622 (3 / LACP_FAST_PERIODIC_TIME)) == 0) { 1623 LACP_DPRINTF((lp, "rate limited pdu\n")); 1624 return; 1625 } 1626 1627 error = lacp_xmit_lacpdu(lp); 1628 1629 if (error == 0) { 1630 lp->lp_flags &= ~LACP_PORT_NTT; 1631 } else { 1632 LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n", 1633 error)); 1634 } 1635} 1636 1637static void 1638lacp_sm_assert_ntt(struct lacp_port *lp) 1639{ 1640 1641 lp->lp_flags |= LACP_PORT_NTT; 1642} 1643 1644static void 1645lacp_run_timers(struct lacp_port *lp) 1646{ 1647 int i; 1648 1649 for (i = 0; i < LACP_NTIMER; i++) { 1650 KASSERT(lp->lp_timer[i] >= 0, 1651 ("invalid timer value %d", lp->lp_timer[i])); 1652 if (lp->lp_timer[i] == 0) { 1653 continue; 1654 } else if (--lp->lp_timer[i] <= 0) { 1655 if (lacp_timer_funcs[i]) { 1656 (*lacp_timer_funcs[i])(lp); 1657 } 1658 } 1659 } 1660} 1661 1662int 1663lacp_marker_input(struct lagg_port *lgp, struct mbuf *m) 1664{ 1665 struct lacp_port *lp = LACP_PORT(lgp); 1666 struct lacp_port *lp2; 1667 struct lacp_softc *lsc = lp->lp_lsc; 1668 struct markerdu *mdu; 1669 int error = 0; 1670 int pending = 0; 1671 1672 LAGG_RLOCK_ASSERT(lgp->lp_softc); 1673 1674 if (m->m_pkthdr.len != sizeof(*mdu)) { 1675 goto bad; 1676 } 1677 1678 if ((m->m_flags & M_MCAST) == 0) { 1679 goto bad; 1680 } 1681 1682 if (m->m_len < sizeof(*mdu)) { 1683 m = m_pullup(m, sizeof(*mdu)); 1684 if (m == NULL) { 1685 return (ENOMEM); 1686 } 1687 } 1688 1689 mdu = mtod(m, struct markerdu *); 1690 1691 if (memcmp(&mdu->mdu_eh.ether_dhost, 1692 ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { 1693 goto bad; 1694 } 1695 1696 if (mdu->mdu_sph.sph_version != 1) { 1697 goto bad; 1698 } 1699 1700 switch (mdu->mdu_tlv.tlv_type) { 1701 case MARKER_TYPE_INFO: 1702 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, 1703 marker_info_tlv_template, TRUE)) { 1704 goto bad; 1705 } 1706 mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE; 1707 memcpy(&mdu->mdu_eh.ether_dhost, 1708 ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN); 1709 memcpy(&mdu->mdu_eh.ether_shost, 1710 lgp->lp_lladdr, ETHER_ADDR_LEN); 1711 error = lagg_enqueue(lp->lp_ifp, m); 1712 break; 1713 1714 case MARKER_TYPE_RESPONSE: 1715 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, 1716 marker_response_tlv_template, TRUE)) { 1717 goto bad; 1718 } 1719 LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n", 1720 ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, 1721 ":", ntohl(mdu->mdu_info.mi_rq_xid))); 1722 1723 /* Verify that it is the last marker we sent out */ 1724 if (memcmp(&mdu->mdu_info, &lp->lp_marker, 1725 sizeof(struct lacp_markerinfo))) 1726 goto bad; 1727 1728 lp->lp_flags &= ~LACP_PORT_MARK; 1729 1730 if (lsc->lsc_suppress_distributing) { 1731 /* Check if any ports are waiting for a response */ 1732 LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) { 1733 if (lp2->lp_flags & LACP_PORT_MARK) { 1734 pending = 1; 1735 break; 1736 } 1737 } 1738 1739 if (pending == 0) { 1740 /* All interface queues are clear */ 1741 LACP_DPRINTF((NULL, "queue flush complete\n")); 1742 lsc->lsc_suppress_distributing = FALSE; 1743 } 1744 } 1745 1746 m_freem(m); 1747 break; 1748 1749 default: 1750 goto bad; 1751 } 1752 1753 return (error); 1754 1755bad: 1756 LACP_DPRINTF((lp, "bad marker frame\n")); 1757 m_freem(m); 1758 return (EINVAL); 1759} 1760 1761static int 1762tlv_check(const void *p, size_t size, const struct tlvhdr *tlv, 1763 const struct tlv_template *tmpl, boolean_t check_type) 1764{ 1765 while (/* CONSTCOND */ 1) { 1766 if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) { 1767 return (EINVAL); 1768 } 1769 if ((check_type && tlv->tlv_type != tmpl->tmpl_type) || 1770 tlv->tlv_length != tmpl->tmpl_length) { 1771 return (EINVAL); 1772 } 1773 if (tmpl->tmpl_type == 0) { 1774 break; 1775 } 1776 tlv = (const struct tlvhdr *) 1777 ((const char *)tlv + tlv->tlv_length); 1778 tmpl++; 1779 } 1780 1781 return (0); 1782} 1783 1784#if defined(LACP_DEBUG) 1785const char * 1786lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) 1787{ 1788 snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", 1789 (int)mac[0], 1790 (int)mac[1], 1791 (int)mac[2], 1792 (int)mac[3], 1793 (int)mac[4], 1794 (int)mac[5]); 1795 1796 return (buf); 1797} 1798 1799const char * 1800lacp_format_systemid(const struct lacp_systemid *sysid, 1801 char *buf, size_t buflen) 1802{ 1803 char macbuf[LACP_MACSTR_MAX+1]; 1804 1805 snprintf(buf, buflen, "%04X,%s", 1806 ntohs(sysid->lsi_prio), 1807 lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf))); 1808 1809 return (buf); 1810} 1811 1812const char * 1813lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen) 1814{ 1815 snprintf(buf, buflen, "%04X,%04X", 1816 ntohs(portid->lpi_prio), 1817 ntohs(portid->lpi_portno)); 1818 1819 return (buf); 1820} 1821 1822const char * 1823lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen) 1824{ 1825 char sysid[LACP_SYSTEMIDSTR_MAX+1]; 1826 char portid[LACP_PORTIDSTR_MAX+1]; 1827 1828 snprintf(buf, buflen, "(%s,%04X,%s)", 1829 lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)), 1830 ntohs(peer->lip_key), 1831 lacp_format_portid(&peer->lip_portid, portid, sizeof(portid))); 1832 1833 return (buf); 1834} 1835 1836const char * 1837lacp_format_lagid(const struct lacp_peerinfo *a, 1838 const struct lacp_peerinfo *b, char *buf, size_t buflen) 1839{ 1840 char astr[LACP_PARTNERSTR_MAX+1]; 1841 char bstr[LACP_PARTNERSTR_MAX+1]; 1842 1843#if 0 1844 /* 1845 * there's a convention to display small numbered peer 1846 * in the left. 1847 */ 1848 1849 if (lacp_compare_peerinfo(a, b) > 0) { 1850 const struct lacp_peerinfo *t; 1851 1852 t = a; 1853 a = b; 1854 b = t; 1855 } 1856#endif 1857 1858 snprintf(buf, buflen, "[%s,%s]", 1859 lacp_format_partner(a, astr, sizeof(astr)), 1860 lacp_format_partner(b, bstr, sizeof(bstr))); 1861 1862 return (buf); 1863} 1864 1865const char * 1866lacp_format_lagid_aggregator(const struct lacp_aggregator *la, 1867 char *buf, size_t buflen) 1868{ 1869 if (la == NULL) { 1870 return ("(none)"); 1871 } 1872 1873 return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen)); 1874} 1875 1876const char * 1877lacp_format_state(uint8_t state, char *buf, size_t buflen) 1878{ 1879 snprintf(buf, buflen, "%b", state, LACP_STATE_BITS); 1880 return (buf); 1881} 1882 1883static void 1884lacp_dump_lacpdu(const struct lacpdu *du) 1885{ 1886 char buf[LACP_PARTNERSTR_MAX+1]; 1887 char buf2[LACP_STATESTR_MAX+1]; 1888 1889 printf("actor=%s\n", 1890 lacp_format_partner(&du->ldu_actor, buf, sizeof(buf))); 1891 printf("actor.state=%s\n", 1892 lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2))); 1893 printf("partner=%s\n", 1894 lacp_format_partner(&du->ldu_partner, buf, sizeof(buf))); 1895 printf("partner.state=%s\n", 1896 lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2))); 1897 1898 printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay)); 1899} 1900 1901static void 1902lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...) 1903{ 1904 va_list va; 1905 1906 if (lp) { 1907 printf("%s: ", lp->lp_ifp->if_xname); 1908 } 1909 1910 va_start(va, fmt); 1911 vprintf(fmt, va); 1912 va_end(va); 1913} 1914#endif 1915