Deleted Added
sdiff udiff text old ( 169227 ) new ( 169327 )
full compact
1/* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */
2
3/*-
4 * Copyright (c)2005 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/net/ieee8023ad_lacp.c 169327 2007-05-07 00:18:56Z thompsa $");
31
32#include <sys/param.h>
33#include <sys/callout.h>
34#include <sys/mbuf.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/kernel.h> /* hz */
38#include <sys/socket.h> /* for net/if.h */
39#include <sys/sockio.h>
40#include <machine/stdarg.h>
41
42#include <net/if.h>
43#include <net/if_dl.h>
44#include <net/ethernet.h>
45#include <net/if_media.h>
46#include <net/if_types.h>
47
48#include <net/if_lagg.h>
49#include <net/ieee8023ad_lacp.h>
50
51/*
52 * actor system priority and port priority.
53 * XXX should be configurable.
54 */
55
56#define LACP_SYSTEM_PRIO 0x8000
57#define LACP_PORT_PRIO 0x8000
58
59const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
60 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
61
62static const struct tlv_template lacp_info_tlv_template[] = {
63 { LACP_TYPE_ACTORINFO,
64 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
65 { LACP_TYPE_PARTNERINFO,
66 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
67 { LACP_TYPE_COLLECTORINFO,
68 sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
69 { 0, 0 },
70};
71
72typedef void (*lacp_timer_func_t)(struct lacp_port *);
73
74static const struct tlv_template marker_info_tlv_template[] = {
75 { MARKER_TYPE_INFO, 16 },
76 { 0, 0 },
77};
78
79static const struct tlv_template marker_response_tlv_template[] = {
80 { MARKER_TYPE_RESPONSE, 16 },
81 { 0, 0 },
82};
83
84static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
85
86static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
87static void lacp_suppress_distributing(struct lacp_softc *,
88 struct lacp_aggregator *);
89static void lacp_transit_expire(void *);
90static void lacp_select_active_aggregator(struct lacp_softc *);
91static uint16_t lacp_compose_key(struct lacp_port *);
92static int tlv_check(const void *, size_t, const struct tlvhdr *,
93 const struct tlv_template *, boolean_t);
94static void lacp_tick(void *);
95
96static void lacp_fill_aggregator_id(struct lacp_aggregator *,
97 const struct lacp_port *);
98static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
99 const struct lacp_peerinfo *);
100static int lacp_aggregator_is_compatible(const struct lacp_aggregator *,
101 const struct lacp_port *);
102static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
103 const struct lacp_peerinfo *);
104
105static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
106 struct lacp_port *);
107static void lacp_aggregator_addref(struct lacp_softc *,
108 struct lacp_aggregator *);
109static void lacp_aggregator_delref(struct lacp_softc *,
110 struct lacp_aggregator *);
111
112/* receive machine */
113
114static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
115static void lacp_sm_rx_timer(struct lacp_port *);
116static void lacp_sm_rx_set_expired(struct lacp_port *);
117static void lacp_sm_rx_update_ntt(struct lacp_port *,
118 const struct lacpdu *);
119static void lacp_sm_rx_record_pdu(struct lacp_port *,
120 const struct lacpdu *);
121static void lacp_sm_rx_update_selected(struct lacp_port *,
122 const struct lacpdu *);
123static void lacp_sm_rx_record_default(struct lacp_port *);
124static void lacp_sm_rx_update_default_selected(struct lacp_port *);
125static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
126 const struct lacp_peerinfo *);
127
128/* mux machine */
129
130static void lacp_sm_mux(struct lacp_port *);
131static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
132static void lacp_sm_mux_timer(struct lacp_port *);
133
134/* periodic transmit machine */
135
136static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
137static void lacp_sm_ptx_tx_schedule(struct lacp_port *);
138static void lacp_sm_ptx_timer(struct lacp_port *);
139
140/* transmit machine */
141
142static void lacp_sm_tx(struct lacp_port *);
143static void lacp_sm_assert_ntt(struct lacp_port *);
144
145static void lacp_run_timers(struct lacp_port *);
146static int lacp_compare_peerinfo(const struct lacp_peerinfo *,
147 const struct lacp_peerinfo *);
148static int lacp_compare_systemid(const struct lacp_systemid *,
149 const struct lacp_systemid *);
150static void lacp_port_enable(struct lacp_port *);
151static void lacp_port_disable(struct lacp_port *);
152static void lacp_select(struct lacp_port *);
153static void lacp_unselect(struct lacp_port *);
154static void lacp_disable_collecting(struct lacp_port *);
155static void lacp_enable_collecting(struct lacp_port *);
156static void lacp_disable_distributing(struct lacp_port *);
157static void lacp_enable_distributing(struct lacp_port *);
158static int lacp_xmit_lacpdu(struct lacp_port *);
159
160#if defined(LACP_DEBUG)
161static void lacp_dump_lacpdu(const struct lacpdu *);
162static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
163 size_t);
164static const char *lacp_format_lagid(const struct lacp_peerinfo *,
165 const struct lacp_peerinfo *, char *, size_t);
166static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
167 char *, size_t);
168static const char *lacp_format_state(uint8_t, char *, size_t);
169static const char *lacp_format_mac(const uint8_t *, char *, size_t);
170static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
171 size_t);
172static const char *lacp_format_portid(const struct lacp_portid *, char *,
173 size_t);
174static void lacp_dprintf(const struct lacp_port *, const char *, ...)
175 __attribute__((__format__(__printf__, 2, 3)));
176#define LACP_DPRINTF(a) lacp_dprintf a
177#else
178#define LACP_DPRINTF(a) /* nothing */
179#endif
180
181/*
182 * partner administration variables.
183 * XXX should be configurable.
184 */
185
186static const struct lacp_peerinfo lacp_partner_admin = {
187 .lip_systemid = { .lsi_prio = 0xffff },
188 .lip_portid = { .lpi_prio = 0xffff },
189#if 1
190 /* optimistic */
191 .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
192 LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
193#else
194 /* pessimistic */
195 .lip_state = 0,
196#endif
197};
198
199static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
200 [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
201 [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
202 [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
203};
204
205/*
206 * lacp_input: process lacpdu
207 */
208int
209lacp_input(struct lagg_port *lgp, struct mbuf *m)
210{
211 struct lacp_port *lp = LACP_PORT(lgp);
212 struct lacpdu *du;
213 int error = 0;
214
215 LAGG_LOCK_ASSERT(lgp->lp_lagg);
216
217 if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
218 goto bad;
219 }
220
221 if (m->m_pkthdr.len != sizeof(*du)) {
222 goto bad;
223 }
224
225 if ((m->m_flags & M_MCAST) == 0) {
226 goto bad;
227 }
228
229 if (m->m_len < sizeof(*du)) {
230 m = m_pullup(m, sizeof(*du));
231 if (m == NULL) {
232 return (ENOMEM);
233 }
234 }
235
236 du = mtod(m, struct lacpdu *);
237
238 if (memcmp(&du->ldu_eh.ether_dhost,
239 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
240 goto bad;
241 }
242
243 /* XXX
244 KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP,
245 ("a very bad kassert!"));
246 */
247
248 /*
249 * ignore the version for compatibility with
250 * the future protocol revisions.
251 */
252
253#if 0
254 if (du->ldu_sph.sph_version != 1) {
255 goto bad;
256 }
257#endif
258
259 /*
260 * ignore tlv types for compatibility with
261 * the future protocol revisions.
262 */
263
264 if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
265 lacp_info_tlv_template, FALSE)) {
266 goto bad;
267 }
268
269#if defined(LACP_DEBUG)
270 LACP_DPRINTF((lp, "lacpdu receive\n"));
271 lacp_dump_lacpdu(du);
272#endif /* defined(LACP_DEBUG) */
273 lacp_sm_rx(lp, du);
274
275 m_freem(m);
276
277 return (error);
278
279bad:
280 m_freem(m);
281 return (EINVAL);
282}
283
284static void
285lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
286{
287 struct lagg_port *lgp = lp->lp_lagg;
288 struct lagg_softc *lgs = lgp->lp_lagg;
289
290 info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
291 memcpy(&info->lip_systemid.lsi_mac,
292 IF_LLADDR(lgs->sc_ifp), ETHER_ADDR_LEN);
293 info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
294 info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
295 info->lip_state = lp->lp_state;
296}
297
298static int
299lacp_xmit_lacpdu(struct lacp_port *lp)
300{
301 struct lagg_port *lgp = lp->lp_lagg;
302 struct mbuf *m;
303 struct lacpdu *du;
304 int error;
305
306 LAGG_LOCK_ASSERT(lgp->lp_lagg);
307
308 m = m_gethdr(M_DONTWAIT, MT_DATA);
309 if (m == NULL) {
310 return (ENOMEM);
311 }
312 m->m_len = m->m_pkthdr.len = sizeof(*du);
313
314 du = mtod(m, struct lacpdu *);
315 memset(du, 0, sizeof(*du));
316
317 memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
318 ETHER_ADDR_LEN);
319 memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
320 du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
321
322 du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
323 du->ldu_sph.sph_version = 1;
324
325 TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
326 du->ldu_actor = lp->lp_actor;
327
328 TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
329 sizeof(du->ldu_partner));
330 du->ldu_partner = lp->lp_partner;
331
332 TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
333 sizeof(du->ldu_collector));
334 du->ldu_collector.lci_maxdelay = 0;
335
336#if defined(LACP_DEBUG)
337 LACP_DPRINTF((lp, "lacpdu transmit\n"));
338 lacp_dump_lacpdu(du);
339#endif /* defined(LACP_DEBUG) */
340
341 m->m_flags |= M_MCAST;
342
343 /*
344 * XXX should use higher priority queue.
345 * otherwise network congestion can break aggregation.
346 */
347
348 error = lagg_enqueue(lp->lp_ifp, m);
349 return (error);
350}
351
352void
353lacp_linkstate(struct lagg_port *lgp)
354{
355 struct lacp_port *lp = LACP_PORT(lgp);
356 struct ifnet *ifp = lgp->lp_ifp;
357 struct ifmediareq ifmr;
358 int error = 0;
359 u_int media;
360 uint8_t old_state;
361 uint16_t old_key;
362
363 LAGG_LOCK_ASSERT(lgp->lp_lagg);
364
365 bzero((char *)&ifmr, sizeof(ifmr));
366 error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
367 if (error != 0)
368 return;
369
370 media = ifmr.ifm_active;
371 LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
372 "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
373 (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
374 old_state = lp->lp_state;
375 old_key = lp->lp_key;
376
377 lp->lp_media = media;
378 /*
379 * If the port is not an active full duplex Ethernet link then it can
380 * not be aggregated.
381 */
382 if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
383 ifp->if_link_state != LINK_STATE_UP) {
384 lacp_port_disable(lp);
385 } else {
386 lacp_port_enable(lp);
387 }
388 lp->lp_key = lacp_compose_key(lp);
389
390 if (old_state != lp->lp_state || old_key != lp->lp_key) {
391 LACP_DPRINTF((lp, "-> UNSELECTED\n"));
392 lp->lp_selected = LACP_UNSELECTED;
393 }
394}
395
396static void
397lacp_tick(void *arg)
398{
399 struct lacp_softc *lsc = arg;
400 struct lacp_port *lp;
401
402 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
403 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
404 continue;
405
406 lacp_run_timers(lp);
407
408 lacp_select(lp);
409 lacp_sm_mux(lp);
410 lacp_sm_tx(lp);
411 lacp_sm_ptx_tx_schedule(lp);
412 }
413 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
414}
415
416int
417lacp_port_create(struct lagg_port *lgp)
418{
419 struct lagg_softc *lgs = lgp->lp_lagg;
420 struct lacp_softc *lsc = LACP_SOFTC(lgs);
421 struct lacp_port *lp;
422 struct ifnet *ifp = lgp->lp_ifp;
423 struct sockaddr_dl sdl;
424 struct ifmultiaddr *rifma = NULL;
425 int error;
426
427 boolean_t active = TRUE; /* XXX should be configurable */
428 boolean_t fast = FALSE; /* XXX should be configurable */
429
430 LAGG_LOCK_ASSERT(lgs);
431
432 bzero((char *)&sdl, sizeof(sdl));
433 sdl.sdl_len = sizeof(sdl);
434 sdl.sdl_family = AF_LINK;
435 sdl.sdl_index = ifp->if_index;
436 sdl.sdl_type = IFT_ETHER;
437 sdl.sdl_alen = ETHER_ADDR_LEN;
438
439 bcopy(&ethermulticastaddr_slowprotocols,
440 LLADDR(&sdl), ETHER_ADDR_LEN);
441 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
442 if (error) {
443 printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
444 return (error);
445 }
446
447 lp = malloc(sizeof(struct lacp_port),
448 M_DEVBUF, M_NOWAIT|M_ZERO);
449 if (lp == NULL)
450 return (ENOMEM);
451
452 lgp->lp_psc = (caddr_t)lp;
453 lp->lp_ifp = ifp;
454 lp->lp_lagg = lgp;
455 lp->lp_lsc = lsc;
456 lp->lp_ifma = rifma;
457
458 LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
459
460 lacp_fill_actorinfo(lp, &lp->lp_actor);
461 lp->lp_state =
462 (active ? LACP_STATE_ACTIVITY : 0) |
463 (fast ? LACP_STATE_TIMEOUT : 0);
464 lp->lp_aggregator = NULL;
465 lacp_linkstate(lgp);
466 lacp_sm_rx_set_expired(lp);
467
468 return (0);
469}
470
471void
472lacp_port_destroy(struct lagg_port *lgp)
473{
474 struct lacp_port *lp = LACP_PORT(lgp);
475 int i;
476
477 LAGG_LOCK_ASSERT(lgp->lp_lagg);
478
479 for (i = 0; i < LACP_NTIMER; i++) {
480 LACP_TIMER_DISARM(lp, i);
481 }
482
483 lacp_disable_collecting(lp);
484 lacp_disable_distributing(lp);
485 lacp_unselect(lp);
486 lgp->lp_flags &= ~LAGG_PORT_DISABLED;
487
488 if_delmulti_ifma(lp->lp_ifma);
489
490 LIST_REMOVE(lp, lp_next);
491 free(lp, M_DEVBUF);
492}
493
494int
495lacp_port_isactive(struct lagg_port *lgp)
496{
497 struct lacp_port *lp = LACP_PORT(lgp);
498 struct lacp_softc *lsc = lp->lp_lsc;
499 struct lacp_aggregator *la = lp->lp_aggregator;
500
501 /* This port is joined to the active aggregator */
502 if (la != NULL && la == lsc->lsc_active_aggregator)
503 return (1);
504
505 return (0);
506}
507
508static void
509lacp_disable_collecting(struct lacp_port *lp)
510{
511 struct lagg_port *lgp = lp->lp_lagg;
512
513 LACP_DPRINTF((lp, "collecting disabled\n"));
514
515 lp->lp_state &= ~LACP_STATE_COLLECTING;
516 lgp->lp_flags &= ~LAGG_PORT_COLLECTING;
517}
518
519static void
520lacp_enable_collecting(struct lacp_port *lp)
521{
522 struct lagg_port *lgp = lp->lp_lagg;
523
524 LACP_DPRINTF((lp, "collecting enabled\n"));
525
526 lp->lp_state |= LACP_STATE_COLLECTING;
527 lgp->lp_flags |= LAGG_PORT_COLLECTING;
528}
529
530static void
531lacp_disable_distributing(struct lacp_port *lp)
532{
533 struct lacp_aggregator *la = lp->lp_aggregator;
534 struct lacp_softc *lsc = lp->lp_lsc;
535 struct lagg_port *lgp = lp->lp_lagg;
536#if defined(LACP_DEBUG)
537 char buf[LACP_LAGIDSTR_MAX+1];
538#endif /* defined(LACP_DEBUG) */
539
540 LAGG_LOCK_ASSERT(lgp->lp_lagg);
541
542 if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
543 return;
544 }
545
546 KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
547 KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
548 KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
549
550 LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
551 "nports %d -> %d\n",
552 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
553 la->la_nports, la->la_nports - 1));
554
555 TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
556 la->la_nports--;
557
558 lacp_suppress_distributing(lsc, la);
559
560 lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
561 lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING;
562
563 if (lsc->lsc_active_aggregator == la) {
564 lacp_select_active_aggregator(lsc);
565 }
566}
567
568static void
569lacp_enable_distributing(struct lacp_port *lp)
570{
571 struct lacp_aggregator *la = lp->lp_aggregator;
572 struct lacp_softc *lsc = lp->lp_lsc;
573 struct lagg_port *lgp = lp->lp_lagg;
574#if defined(LACP_DEBUG)
575 char buf[LACP_LAGIDSTR_MAX+1];
576#endif /* defined(LACP_DEBUG) */
577
578 LAGG_LOCK_ASSERT(lgp->lp_lagg);
579
580 if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
581 return;
582 }
583
584 LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
585 "nports %d -> %d\n",
586 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
587 la->la_nports, la->la_nports + 1));
588
589 KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
590 TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
591 la->la_nports++;
592
593 lacp_suppress_distributing(lsc, la);
594
595 lp->lp_state |= LACP_STATE_DISTRIBUTING;
596 lgp->lp_flags |= LAGG_PORT_DISTRIBUTING;
597
598 if (lsc->lsc_active_aggregator != la) {
599 lacp_select_active_aggregator(lsc);
600 }
601}
602
603static void
604lacp_transit_expire(void *vp)
605{
606 struct lacp_softc *lsc = vp;
607
608 LACP_DPRINTF((NULL, "%s\n", __func__));
609 lsc->lsc_suppress_distributing = FALSE;
610}
611
612int
613lacp_attach(struct lagg_softc *lgs)
614{
615 struct lacp_softc *lsc;
616
617 LAGG_LOCK_ASSERT(lgs);
618
619 lsc = malloc(sizeof(struct lacp_softc),
620 M_DEVBUF, M_NOWAIT|M_ZERO);
621 if (lsc == NULL)
622 return (ENOMEM);
623
624 lgs->sc_psc = (caddr_t)lsc;
625 lsc->lsc_lagg = lgs;
626
627 lsc->lsc_hashkey = arc4random();
628 lsc->lsc_active_aggregator = NULL;
629 TAILQ_INIT(&lsc->lsc_aggregators);
630 LIST_INIT(&lsc->lsc_ports);
631
632 callout_init_mtx(&lsc->lsc_transit_callout, &lgs->sc_mtx, 0);
633 callout_init_mtx(&lsc->lsc_callout, &lgs->sc_mtx, 0);
634
635 /* if the lagg is already up then do the same */
636 if (lgs->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
637 lacp_init(lgs);
638
639 return (0);
640}
641
642int
643lacp_detach(struct lagg_softc *lgs)
644{
645 struct lacp_softc *lsc = LACP_SOFTC(lgs);
646
647 KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
648 ("aggregators still active"));
649 KASSERT(lsc->lsc_active_aggregator == NULL,
650 ("aggregator still attached"));
651
652 lgs->sc_psc = NULL;
653 callout_drain(&lsc->lsc_transit_callout);
654 callout_drain(&lsc->lsc_callout);
655
656 free(lsc, M_DEVBUF);
657 return (0);
658}
659
660void
661lacp_init(struct lagg_softc *lgs)
662{
663 struct lacp_softc *lsc = LACP_SOFTC(lgs);
664
665 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
666}
667
668void
669lacp_stop(struct lagg_softc *lgs)
670{
671 struct lacp_softc *lsc = LACP_SOFTC(lgs);
672
673 callout_stop(&lsc->lsc_transit_callout);
674 callout_stop(&lsc->lsc_callout);
675}
676
677struct lagg_port *
678lacp_select_tx_port(struct lagg_softc *lgs, struct mbuf *m)
679{
680 struct lacp_softc *lsc = LACP_SOFTC(lgs);
681 struct lacp_aggregator *la;
682 struct lacp_port *lp;
683 uint32_t hash;
684 int nports;
685
686 LAGG_LOCK_ASSERT(lgs);
687
688 if (__predict_false(lsc->lsc_suppress_distributing)) {
689 LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
690 return (NULL);
691 }
692
693 la = lsc->lsc_active_aggregator;
694 if (__predict_false(la == NULL)) {
695 LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
696 return (NULL);
697 }
698
699 nports = la->la_nports;
700 KASSERT(nports > 0, ("no ports available"));
701
702 hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
703 hash %= nports;
704 lp = TAILQ_FIRST(&la->la_ports);
705 while (hash--) {
706 lp = TAILQ_NEXT(lp, lp_dist_q);
707 }
708
709 KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
710 ("aggregated port is not distributing"));
711
712 return (lp->lp_lagg);
713}
714/*
715 * lacp_suppress_distributing: drop transmit packets for a while
716 * to preserve packet ordering.
717 */
718
719static void
720lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
721{
722 if (lsc->lsc_active_aggregator != la) {
723 return;
724 }
725
726 LACP_DPRINTF((NULL, "%s\n", __func__));
727 lsc->lsc_suppress_distributing = TRUE;
728 /* XXX should consider collector max delay */
729 callout_reset(&lsc->lsc_transit_callout,
730 LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
731}
732
733static int
734lacp_compare_peerinfo(const struct lacp_peerinfo *a,
735 const struct lacp_peerinfo *b)
736{
737 return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
738}
739
740static int
741lacp_compare_systemid(const struct lacp_systemid *a,
742 const struct lacp_systemid *b)
743{
744 return (memcmp(a, b, sizeof(*a)));
745}
746
747#if 0 /* unused */
748static int
749lacp_compare_portid(const struct lacp_portid *a,
750 const struct lacp_portid *b)
751{
752 return (memcmp(a, b, sizeof(*a)));
753}
754#endif
755
756static uint64_t
757lacp_aggregator_bandwidth(struct lacp_aggregator *la)
758{
759 struct lacp_port *lp;
760 uint64_t speed;
761
762 lp = TAILQ_FIRST(&la->la_ports);
763 if (lp == NULL) {
764 return (0);
765 }
766
767 speed = ifmedia_baudrate(lp->lp_media);
768 speed *= la->la_nports;
769 if (speed == 0) {
770 LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
771 lp->lp_media, la->la_nports));
772 }
773
774 return (speed);
775}
776
777/*
778 * lacp_select_active_aggregator: select an aggregator to be used to transmit
779 * packets from lagg(4) interface.
780 */
781
782static void
783lacp_select_active_aggregator(struct lacp_softc *lsc)
784{
785 struct lacp_aggregator *la;
786 struct lacp_aggregator *best_la = NULL;
787 uint64_t best_speed = 0;
788#if defined(LACP_DEBUG)
789 char buf[LACP_LAGIDSTR_MAX+1];
790#endif /* defined(LACP_DEBUG) */
791
792 LACP_DPRINTF((NULL, "%s:\n", __func__));
793
794 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
795 uint64_t speed;
796
797 if (la->la_nports == 0) {
798 continue;
799 }
800
801 speed = lacp_aggregator_bandwidth(la);
802 LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
803 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
804 speed, la->la_nports));
805 if (speed > best_speed ||
806 (speed == best_speed &&
807 la == lsc->lsc_active_aggregator)) {
808 best_la = la;
809 best_speed = speed;
810 }
811 }
812
813 KASSERT(best_la == NULL || best_la->la_nports > 0,
814 ("invalid aggregator refcnt"));
815 KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
816 ("invalid aggregator list"));
817
818#if defined(LACP_DEBUG)
819 if (lsc->lsc_active_aggregator != best_la) {
820 LACP_DPRINTF((NULL, "active aggregator changed\n"));
821 LACP_DPRINTF((NULL, "old %s\n",
822 lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
823 buf, sizeof(buf))));
824 } else {
825 LACP_DPRINTF((NULL, "active aggregator not changed\n"));
826 }
827 LACP_DPRINTF((NULL, "new %s\n",
828 lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
829#endif /* defined(LACP_DEBUG) */
830
831 if (lsc->lsc_active_aggregator != best_la) {
832 lsc->lsc_active_aggregator = best_la;
833 if (best_la) {
834 lacp_suppress_distributing(lsc, best_la);
835 }
836 }
837}
838
839static uint16_t
840lacp_compose_key(struct lacp_port *lp)
841{
842 struct lagg_port *lgp = lp->lp_lagg;
843 struct lagg_softc *lgs = lgp->lp_lagg;
844 u_int media = lp->lp_media;
845 uint16_t key;
846
847 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
848
849 /*
850 * non-aggregatable links should have unique keys.
851 *
852 * XXX this isn't really unique as if_index is 16 bit.
853 */
854
855 /* bit 0..14: (some bits of) if_index of this port */
856 key = lp->lp_ifp->if_index;
857 /* bit 15: 1 */
858 key |= 0x8000;
859 } else {
860 u_int subtype = IFM_SUBTYPE(media);
861
862 KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
863 KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
864
865 /* bit 0..4: IFM_SUBTYPE */
866 key = subtype;
867 /* bit 5..14: (some bits of) if_index of lagg device */
868 key |= 0x7fe0 & ((lgs->sc_ifp->if_index) << 5);
869 /* bit 15: 0 */
870 }
871 return (htons(key));
872}
873
874static void
875lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
876{
877#if defined(LACP_DEBUG)
878 char buf[LACP_LAGIDSTR_MAX+1];
879#endif
880
881 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
882 __func__,
883 lacp_format_lagid(&la->la_actor, &la->la_partner,
884 buf, sizeof(buf)),
885 la->la_refcnt, la->la_refcnt + 1));
886
887 KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
888 la->la_refcnt++;
889 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
890}
891
892static void
893lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
894{
895#if defined(LACP_DEBUG)
896 char buf[LACP_LAGIDSTR_MAX+1];
897#endif
898
899 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
900 __func__,
901 lacp_format_lagid(&la->la_actor, &la->la_partner,
902 buf, sizeof(buf)),
903 la->la_refcnt, la->la_refcnt - 1));
904
905 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
906 la->la_refcnt--;
907 if (la->la_refcnt > 0) {
908 return;
909 }
910
911 KASSERT(la->la_refcnt == 0, ("refcount not zero"));
912 KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
913
914 TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
915
916 free(la, M_DEVBUF);
917}
918
919/*
920 * lacp_aggregator_get: allocate an aggregator.
921 */
922
923static struct lacp_aggregator *
924lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
925{
926 struct lacp_aggregator *la;
927
928 la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
929 if (la) {
930 la->la_refcnt = 1;
931 la->la_nports = 0;
932 TAILQ_INIT(&la->la_ports);
933 la->la_pending = 0;
934 TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
935 }
936
937 return (la);
938}
939
940/*
941 * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
942 */
943
944static void
945lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
946{
947 lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
948 lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
949
950 la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
951}
952
953static void
954lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
955 const struct lacp_peerinfo *lpi_port)
956{
957 memset(lpi_aggr, 0, sizeof(*lpi_aggr));
958 lpi_aggr->lip_systemid = lpi_port->lip_systemid;
959 lpi_aggr->lip_key = lpi_port->lip_key;
960}
961
962/*
963 * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
964 */
965
966static int
967lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
968 const struct lacp_port *lp)
969{
970 if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
971 !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
972 return (0);
973 }
974
975 if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
976 return (0);
977 }
978
979 if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
980 return (0);
981 }
982
983 if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
984 return (0);
985 }
986
987 return (1);
988}
989
990static int
991lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
992 const struct lacp_peerinfo *b)
993{
994 if (memcmp(&a->lip_systemid, &b->lip_systemid,
995 sizeof(a->lip_systemid))) {
996 return (0);
997 }
998
999 if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
1000 return (0);
1001 }
1002
1003 return (1);
1004}
1005
1006static void
1007lacp_port_enable(struct lacp_port *lp)
1008{
1009 struct lagg_port *lgp = lp->lp_lagg;
1010
1011 lp->lp_state |= LACP_STATE_AGGREGATION;
1012 lgp->lp_flags &= ~LAGG_PORT_DISABLED;
1013}
1014
1015static void
1016lacp_port_disable(struct lacp_port *lp)
1017{
1018 struct lagg_port *lgp = lp->lp_lagg;
1019
1020 lacp_set_mux(lp, LACP_MUX_DETACHED);
1021
1022 lp->lp_state &= ~LACP_STATE_AGGREGATION;
1023 lp->lp_selected = LACP_UNSELECTED;
1024 lacp_sm_rx_record_default(lp);
1025 lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
1026 lp->lp_state &= ~LACP_STATE_EXPIRED;
1027 lgp->lp_flags |= LAGG_PORT_DISABLED;
1028}
1029
1030/*
1031 * lacp_select: select an aggregator. create one if necessary.
1032 */
1033static void
1034lacp_select(struct lacp_port *lp)
1035{
1036 struct lacp_softc *lsc = lp->lp_lsc;
1037 struct lacp_aggregator *la;
1038#if defined(LACP_DEBUG)
1039 char buf[LACP_LAGIDSTR_MAX+1];
1040#endif
1041
1042 if (lp->lp_aggregator) {
1043 return;
1044 }
1045
1046 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1047 ("timer_wait_while still active"));
1048
1049 LACP_DPRINTF((lp, "port lagid=%s\n",
1050 lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
1051 buf, sizeof(buf))));
1052
1053 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
1054 if (lacp_aggregator_is_compatible(la, lp)) {
1055 break;
1056 }
1057 }
1058
1059 if (la == NULL) {
1060 la = lacp_aggregator_get(lsc, lp);
1061 if (la == NULL) {
1062 LACP_DPRINTF((lp, "aggregator creation failed\n"));
1063
1064 /*
1065 * will retry on the next tick.
1066 */
1067
1068 return;
1069 }
1070 lacp_fill_aggregator_id(la, lp);
1071 LACP_DPRINTF((lp, "aggregator created\n"));
1072 } else {
1073 LACP_DPRINTF((lp, "compatible aggregator found\n"));
1074 lacp_aggregator_addref(lsc, la);
1075 }
1076
1077 LACP_DPRINTF((lp, "aggregator lagid=%s\n",
1078 lacp_format_lagid(&la->la_actor, &la->la_partner,
1079 buf, sizeof(buf))));
1080
1081 lp->lp_aggregator = la;
1082 lp->lp_selected = LACP_SELECTED;
1083}
1084
1085/*
1086 * lacp_unselect: finish unselect/detach process.
1087 */
1088
1089static void
1090lacp_unselect(struct lacp_port *lp)
1091{
1092 struct lacp_softc *lsc = lp->lp_lsc;
1093 struct lacp_aggregator *la = lp->lp_aggregator;
1094
1095 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1096 ("timer_wait_while still active"));
1097
1098 if (la == NULL) {
1099 return;
1100 }
1101
1102 lp->lp_aggregator = NULL;
1103 lacp_aggregator_delref(lsc, la);
1104}
1105
1106/* mux machine */
1107
1108static void
1109lacp_sm_mux(struct lacp_port *lp)
1110{
1111 enum lacp_mux_state new_state;
1112 boolean_t p_sync =
1113 (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
1114 boolean_t p_collecting =
1115 (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
1116 enum lacp_selected selected = lp->lp_selected;
1117 struct lacp_aggregator *la;
1118
1119 /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
1120
1121re_eval:
1122 la = lp->lp_aggregator;
1123 KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
1124 ("MUX not detached"));
1125 new_state = lp->lp_mux_state;
1126 switch (lp->lp_mux_state) {
1127 case LACP_MUX_DETACHED:
1128 if (selected != LACP_UNSELECTED) {
1129 new_state = LACP_MUX_WAITING;
1130 }
1131 break;
1132 case LACP_MUX_WAITING:
1133 KASSERT(la->la_pending > 0 ||
1134 !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1135 ("timer_wait_while still active"));
1136 if (selected == LACP_SELECTED && la->la_pending == 0) {
1137 new_state = LACP_MUX_ATTACHED;
1138 } else if (selected == LACP_UNSELECTED) {
1139 new_state = LACP_MUX_DETACHED;
1140 }
1141 break;
1142 case LACP_MUX_ATTACHED:
1143 if (selected == LACP_SELECTED && p_sync) {
1144 new_state = LACP_MUX_COLLECTING;
1145 } else if (selected != LACP_SELECTED) {
1146 new_state = LACP_MUX_DETACHED;
1147 }
1148 break;
1149 case LACP_MUX_COLLECTING:
1150 if (selected == LACP_SELECTED && p_sync && p_collecting) {
1151 new_state = LACP_MUX_DISTRIBUTING;
1152 } else if (selected != LACP_SELECTED || !p_sync) {
1153 new_state = LACP_MUX_ATTACHED;
1154 }
1155 break;
1156 case LACP_MUX_DISTRIBUTING:
1157 if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
1158 new_state = LACP_MUX_COLLECTING;
1159 }
1160 break;
1161 default:
1162 panic("%s: unknown state", __func__);
1163 }
1164
1165 if (lp->lp_mux_state == new_state) {
1166 return;
1167 }
1168
1169 lacp_set_mux(lp, new_state);
1170 goto re_eval;
1171}
1172
1173static void
1174lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
1175{
1176 struct lacp_aggregator *la = lp->lp_aggregator;
1177
1178 if (lp->lp_mux_state == new_state) {
1179 return;
1180 }
1181
1182 switch (new_state) {
1183 case LACP_MUX_DETACHED:
1184 lp->lp_state &= ~LACP_STATE_SYNC;
1185 lacp_disable_distributing(lp);
1186 lacp_disable_collecting(lp);
1187 lacp_sm_assert_ntt(lp);
1188 /* cancel timer */
1189 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
1190 KASSERT(la->la_pending > 0,
1191 ("timer_wait_while not active"));
1192 la->la_pending--;
1193 }
1194 LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
1195 lacp_unselect(lp);
1196 break;
1197 case LACP_MUX_WAITING:
1198 LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
1199 LACP_AGGREGATE_WAIT_TIME);
1200 la->la_pending++;
1201 break;
1202 case LACP_MUX_ATTACHED:
1203 lp->lp_state |= LACP_STATE_SYNC;
1204 lacp_disable_collecting(lp);
1205 lacp_sm_assert_ntt(lp);
1206 break;
1207 case LACP_MUX_COLLECTING:
1208 lacp_enable_collecting(lp);
1209 lacp_disable_distributing(lp);
1210 lacp_sm_assert_ntt(lp);
1211 break;
1212 case LACP_MUX_DISTRIBUTING:
1213 lacp_enable_distributing(lp);
1214 break;
1215 default:
1216 panic("%s: unknown state", __func__);
1217 }
1218
1219 LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
1220
1221 lp->lp_mux_state = new_state;
1222}
1223
1224static void
1225lacp_sm_mux_timer(struct lacp_port *lp)
1226{
1227 struct lacp_aggregator *la = lp->lp_aggregator;
1228#if defined(LACP_DEBUG)
1229 char buf[LACP_LAGIDSTR_MAX+1];
1230#endif
1231
1232 KASSERT(la->la_pending > 0, ("no pending event"));
1233
1234 LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
1235 lacp_format_lagid(&la->la_actor, &la->la_partner,
1236 buf, sizeof(buf)),
1237 la->la_pending, la->la_pending - 1));
1238
1239 la->la_pending--;
1240}
1241
1242/* periodic transmit machine */
1243
1244static void
1245lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
1246{
1247 if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
1248 LACP_STATE_TIMEOUT)) {
1249 return;
1250 }
1251
1252 LACP_DPRINTF((lp, "partner timeout changed\n"));
1253
1254 /*
1255 * FAST_PERIODIC -> SLOW_PERIODIC
1256 * or
1257 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
1258 *
1259 * let lacp_sm_ptx_tx_schedule to update timeout.
1260 */
1261
1262 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1263
1264 /*
1265 * if timeout has been shortened, assert NTT.
1266 */
1267
1268 if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
1269 lacp_sm_assert_ntt(lp);
1270 }
1271}
1272
1273static void
1274lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
1275{
1276 int timeout;
1277
1278 if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
1279 !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
1280
1281 /*
1282 * NO_PERIODIC
1283 */
1284
1285 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1286 return;
1287 }
1288
1289 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
1290 return;
1291 }
1292
1293 timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
1294 LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
1295
1296 LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
1297}
1298
1299static void
1300lacp_sm_ptx_timer(struct lacp_port *lp)
1301{
1302 lacp_sm_assert_ntt(lp);
1303}
1304
1305static void
1306lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
1307{
1308 int timeout;
1309
1310 /*
1311 * check LACP_DISABLED first
1312 */
1313
1314 if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
1315 return;
1316 }
1317
1318 /*
1319 * check loopback condition.
1320 */
1321
1322 if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
1323 &lp->lp_actor.lip_systemid)) {
1324 return;
1325 }
1326
1327 /*
1328 * EXPIRED, DEFAULTED, CURRENT -> CURRENT
1329 */
1330
1331 lacp_sm_rx_update_selected(lp, du);
1332 lacp_sm_rx_update_ntt(lp, du);
1333 lacp_sm_rx_record_pdu(lp, du);
1334
1335 timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
1336 LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
1337 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
1338
1339 lp->lp_state &= ~LACP_STATE_EXPIRED;
1340
1341 /*
1342 * kick transmit machine without waiting the next tick.
1343 */
1344
1345 lacp_sm_tx(lp);
1346}
1347
1348static void
1349lacp_sm_rx_set_expired(struct lacp_port *lp)
1350{
1351 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1352 lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
1353 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
1354 lp->lp_state |= LACP_STATE_EXPIRED;
1355}
1356
1357static void
1358lacp_sm_rx_timer(struct lacp_port *lp)
1359{
1360 if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
1361 /* CURRENT -> EXPIRED */
1362 LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
1363 lacp_sm_rx_set_expired(lp);
1364 } else {
1365 /* EXPIRED -> DEFAULTED */
1366 LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
1367 lacp_sm_rx_update_default_selected(lp);
1368 lacp_sm_rx_record_default(lp);
1369 lp->lp_state &= ~LACP_STATE_EXPIRED;
1370 }
1371}
1372
1373static void
1374lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
1375{
1376 boolean_t active;
1377 uint8_t oldpstate;
1378#if defined(LACP_DEBUG)
1379 char buf[LACP_STATESTR_MAX+1];
1380#endif
1381
1382 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1383
1384 oldpstate = lp->lp_partner.lip_state;
1385
1386 active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
1387 || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
1388 (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
1389
1390 lp->lp_partner = du->ldu_actor;
1391 if (active &&
1392 ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1393 LACP_STATE_AGGREGATION) &&
1394 !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
1395 || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
1396 /* XXX nothing? */
1397 } else {
1398 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1399 }
1400
1401 lp->lp_state &= ~LACP_STATE_DEFAULTED;
1402
1403 if (oldpstate != lp->lp_partner.lip_state) {
1404 LACP_DPRINTF((lp, "old pstate %s\n",
1405 lacp_format_state(oldpstate, buf, sizeof(buf))));
1406 LACP_DPRINTF((lp, "new pstate %s\n",
1407 lacp_format_state(lp->lp_partner.lip_state, buf,
1408 sizeof(buf))));
1409 }
1410
1411 lacp_sm_ptx_update_timeout(lp, oldpstate);
1412}
1413
1414static void
1415lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
1416{
1417 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1418
1419 if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
1420 !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1421 LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
1422 LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
1423 lacp_sm_assert_ntt(lp);
1424 }
1425}
1426
1427static void
1428lacp_sm_rx_record_default(struct lacp_port *lp)
1429{
1430 uint8_t oldpstate;
1431
1432 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1433
1434 oldpstate = lp->lp_partner.lip_state;
1435 lp->lp_partner = lacp_partner_admin;
1436 lp->lp_state |= LACP_STATE_DEFAULTED;
1437 lacp_sm_ptx_update_timeout(lp, oldpstate);
1438}
1439
1440static void
1441lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
1442 const struct lacp_peerinfo *info)
1443{
1444 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1445
1446 if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
1447 !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
1448 LACP_STATE_AGGREGATION)) {
1449 lp->lp_selected = LACP_UNSELECTED;
1450 /* mux machine will clean up lp->lp_aggregator */
1451 }
1452}
1453
1454static void
1455lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
1456{
1457 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1458
1459 lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
1460}
1461
1462static void
1463lacp_sm_rx_update_default_selected(struct lacp_port *lp)
1464{
1465 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1466
1467 lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
1468}
1469
1470/* transmit machine */
1471
1472static void
1473lacp_sm_tx(struct lacp_port *lp)
1474{
1475 int error;
1476
1477 if (!(lp->lp_state & LACP_STATE_AGGREGATION)
1478#if 1
1479 || (!(lp->lp_state & LACP_STATE_ACTIVITY)
1480 && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
1481#endif
1482 ) {
1483 lp->lp_flags &= ~LACP_PORT_NTT;
1484 }
1485
1486 if (!(lp->lp_flags & LACP_PORT_NTT)) {
1487 return;
1488 }
1489
1490 /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
1491 if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
1492 (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
1493 LACP_DPRINTF((lp, "rate limited pdu\n"));
1494 return;
1495 }
1496
1497 error = lacp_xmit_lacpdu(lp);
1498
1499 if (error == 0) {
1500 lp->lp_flags &= ~LACP_PORT_NTT;
1501 } else {
1502 LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
1503 error));
1504 }
1505}
1506
1507static void
1508lacp_sm_assert_ntt(struct lacp_port *lp)
1509{
1510
1511 lp->lp_flags |= LACP_PORT_NTT;
1512}
1513
1514static void
1515lacp_run_timers(struct lacp_port *lp)
1516{
1517 int i;
1518
1519 for (i = 0; i < LACP_NTIMER; i++) {
1520 KASSERT(lp->lp_timer[i] >= 0,
1521 ("invalid timer value %d", lp->lp_timer[i]));
1522 if (lp->lp_timer[i] == 0) {
1523 continue;
1524 } else if (--lp->lp_timer[i] <= 0) {
1525 if (lacp_timer_funcs[i]) {
1526 (*lacp_timer_funcs[i])(lp);
1527 }
1528 }
1529 }
1530}
1531
1532int
1533lacp_marker_input(struct lagg_port *lgp, struct mbuf *m)
1534{
1535 struct lacp_port *lp = LACP_PORT(lgp);
1536 struct markerdu *mdu;
1537 int error = 0;
1538
1539 LAGG_LOCK_ASSERT(lgp->lp_lagg);
1540
1541 if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
1542 goto bad;
1543 }
1544
1545 if (m->m_pkthdr.len != sizeof(*mdu)) {
1546 goto bad;
1547 }
1548
1549 if ((m->m_flags & M_MCAST) == 0) {
1550 goto bad;
1551 }
1552
1553 if (m->m_len < sizeof(*mdu)) {
1554 m = m_pullup(m, sizeof(*mdu));
1555 if (m == NULL) {
1556 return (ENOMEM);
1557 }
1558 }
1559
1560 mdu = mtod(m, struct markerdu *);
1561
1562 if (memcmp(&mdu->mdu_eh.ether_dhost,
1563 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
1564 goto bad;
1565 }
1566
1567 /* XXX
1568 KASSERT(mdu->mdu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_MARKER,
1569 ("a very bad kassert!"));
1570 */
1571
1572 if (mdu->mdu_sph.sph_version != 1) {
1573 goto bad;
1574 }
1575
1576 switch (mdu->mdu_tlv.tlv_type) {
1577 case MARKER_TYPE_INFO:
1578 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1579 marker_info_tlv_template, TRUE)) {
1580 goto bad;
1581 }
1582 mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
1583 memcpy(&mdu->mdu_eh.ether_dhost,
1584 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
1585 memcpy(&mdu->mdu_eh.ether_shost,
1586 lgp->lp_lladdr, ETHER_ADDR_LEN);
1587 error = lagg_enqueue(lp->lp_ifp, m);
1588 break;
1589
1590 case MARKER_TYPE_RESPONSE:
1591 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1592 marker_response_tlv_template, TRUE)) {
1593 goto bad;
1594 }
1595 /*
1596 * we are not interested in responses as
1597 * we don't have a marker sender.
1598 */
1599 /* FALLTHROUGH */
1600 default:
1601 goto bad;
1602 }
1603
1604 return (error);
1605
1606bad:
1607 m_freem(m);
1608 return (EINVAL);
1609}
1610
1611static int
1612tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
1613 const struct tlv_template *tmpl, boolean_t check_type)
1614{
1615 while (/* CONSTCOND */ 1) {
1616 if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
1617 return (EINVAL);
1618 }
1619 if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
1620 tlv->tlv_length != tmpl->tmpl_length) {
1621 return (EINVAL);
1622 }
1623 if (tmpl->tmpl_type == 0) {
1624 break;
1625 }
1626 tlv = (const struct tlvhdr *)
1627 ((const char *)tlv + tlv->tlv_length);
1628 tmpl++;
1629 }
1630
1631 return (0);
1632}
1633
1634#if defined(LACP_DEBUG)
1635const char *
1636lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
1637{
1638 snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
1639 (int)mac[0],
1640 (int)mac[1],
1641 (int)mac[2],
1642 (int)mac[3],
1643 (int)mac[4],
1644 (int)mac[5]);
1645
1646 return (buf);
1647}
1648
1649const char *
1650lacp_format_systemid(const struct lacp_systemid *sysid,
1651 char *buf, size_t buflen)
1652{
1653 char macbuf[LACP_MACSTR_MAX+1];
1654
1655 snprintf(buf, buflen, "%04X,%s",
1656 ntohs(sysid->lsi_prio),
1657 lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
1658
1659 return (buf);
1660}
1661
1662const char *
1663lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
1664{
1665 snprintf(buf, buflen, "%04X,%04X",
1666 ntohs(portid->lpi_prio),
1667 ntohs(portid->lpi_portno));
1668
1669 return (buf);
1670}
1671
1672const char *
1673lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
1674{
1675 char sysid[LACP_SYSTEMIDSTR_MAX+1];
1676 char portid[LACP_PORTIDSTR_MAX+1];
1677
1678 snprintf(buf, buflen, "(%s,%04X,%s)",
1679 lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
1680 ntohs(peer->lip_key),
1681 lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
1682
1683 return (buf);
1684}
1685
1686const char *
1687lacp_format_lagid(const struct lacp_peerinfo *a,
1688 const struct lacp_peerinfo *b, char *buf, size_t buflen)
1689{
1690 char astr[LACP_PARTNERSTR_MAX+1];
1691 char bstr[LACP_PARTNERSTR_MAX+1];
1692
1693#if 0
1694 /*
1695 * there's a convention to display small numbered peer
1696 * in the left.
1697 */
1698
1699 if (lacp_compare_peerinfo(a, b) > 0) {
1700 const struct lacp_peerinfo *t;
1701
1702 t = a;
1703 a = b;
1704 b = t;
1705 }
1706#endif
1707
1708 snprintf(buf, buflen, "[%s,%s]",
1709 lacp_format_partner(a, astr, sizeof(astr)),
1710 lacp_format_partner(b, bstr, sizeof(bstr)));
1711
1712 return (buf);
1713}
1714
1715const char *
1716lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
1717 char *buf, size_t buflen)
1718{
1719 if (la == NULL) {
1720 return ("(none)");
1721 }
1722
1723 return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
1724}
1725
1726const char *
1727lacp_format_state(uint8_t state, char *buf, size_t buflen)
1728{
1729 snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
1730 return (buf);
1731}
1732
1733static void
1734lacp_dump_lacpdu(const struct lacpdu *du)
1735{
1736 char buf[LACP_PARTNERSTR_MAX+1];
1737 char buf2[LACP_STATESTR_MAX+1];
1738
1739 printf("actor=%s\n",
1740 lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
1741 printf("actor.state=%s\n",
1742 lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
1743 printf("partner=%s\n",
1744 lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
1745 printf("partner.state=%s\n",
1746 lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
1747
1748 printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
1749}
1750
1751static void
1752lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
1753{
1754 va_list va;
1755
1756 if (lp) {
1757 printf("%s: ", lp->lp_ifp->if_xname);
1758 }
1759
1760 va_start(va, fmt);
1761 vprintf(fmt, va);
1762 va_end(va);
1763}
1764#endif