Deleted Added
sdiff udiff text old ( 169227 ) new ( 169327 )
full compact
1/* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */
2
3/*-
4 * Copyright (c)2005 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/net/ieee8023ad_lacp.c 169227 2007-05-03 08:56:20Z thompsa $");
31
32#include <sys/param.h>
33#include <sys/callout.h>
34#include <sys/mbuf.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/kernel.h> /* hz */
38#include <sys/socket.h> /* for net/if.h */
39#include <sys/sockio.h>
40#include <machine/stdarg.h>
41
42#include <net/if.h>
43#include <net/if_dl.h>
44#include <net/ethernet.h>
45#include <net/if_media.h>
46#include <net/if_types.h>
47
48#include <net/if_lagg.h>
49#include <net/ieee8023ad_lacp.h>
50
51/*
52 * actor system priority and port priority.
53 * XXX should be configurable.
54 */
55
56#define LACP_SYSTEM_PRIO 0x8000
57#define LACP_PORT_PRIO 0x8000
58
59const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
60 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
61
62static const struct tlv_template lacp_info_tlv_template[] = {
63 { LACP_TYPE_ACTORINFO,
64 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
65 { LACP_TYPE_PARTNERINFO,
66 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
67 { LACP_TYPE_COLLECTORINFO,
68 sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
69 { 0, 0 },
70};
71
72typedef void (*lacp_timer_func_t)(struct lacp_port *);
73
74static const struct tlv_template marker_info_tlv_template[] = {
75 { MARKER_TYPE_INFO, 16 },
76 { 0, 0 },
77};
78
79static const struct tlv_template marker_response_tlv_template[] = {
80 { MARKER_TYPE_RESPONSE, 16 },
81 { 0, 0 },
82};
83
84static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
85
86static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
87static void lacp_suppress_distributing(struct lacp_softc *,
88 struct lacp_aggregator *);
89static void lacp_transit_expire(void *);
90static void lacp_select_active_aggregator(struct lacp_softc *);
91static uint16_t lacp_compose_key(struct lacp_port *);
92static int tlv_check(const void *, size_t, const struct tlvhdr *,
93 const struct tlv_template *, boolean_t);
94static void lacp_tick(void *);
95
96static void lacp_fill_aggregator_id(struct lacp_aggregator *,
97 const struct lacp_port *);
98static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
99 const struct lacp_peerinfo *);
100static int lacp_aggregator_is_compatible(const struct lacp_aggregator *,
101 const struct lacp_port *);
102static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
103 const struct lacp_peerinfo *);
104
105static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
106 struct lacp_port *);
107static void lacp_aggregator_addref(struct lacp_softc *,
108 struct lacp_aggregator *);
109static void lacp_aggregator_delref(struct lacp_softc *,
110 struct lacp_aggregator *);
111
112/* receive machine */
113
114static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
115static void lacp_sm_rx_timer(struct lacp_port *);
116static void lacp_sm_rx_set_expired(struct lacp_port *);
117static void lacp_sm_rx_update_ntt(struct lacp_port *,
118 const struct lacpdu *);
119static void lacp_sm_rx_record_pdu(struct lacp_port *,
120 const struct lacpdu *);
121static void lacp_sm_rx_update_selected(struct lacp_port *,
122 const struct lacpdu *);
123static void lacp_sm_rx_record_default(struct lacp_port *);
124static void lacp_sm_rx_update_default_selected(struct lacp_port *);
125static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
126 const struct lacp_peerinfo *);
127
128/* mux machine */
129
130static void lacp_sm_mux(struct lacp_port *);
131static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
132static void lacp_sm_mux_timer(struct lacp_port *);
133
134/* periodic transmit machine */
135
136static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
137static void lacp_sm_ptx_tx_schedule(struct lacp_port *);
138static void lacp_sm_ptx_timer(struct lacp_port *);
139
140/* transmit machine */
141
142static void lacp_sm_tx(struct lacp_port *);
143static void lacp_sm_assert_ntt(struct lacp_port *);
144
145static void lacp_run_timers(struct lacp_port *);
146static int lacp_compare_peerinfo(const struct lacp_peerinfo *,
147 const struct lacp_peerinfo *);
148static int lacp_compare_systemid(const struct lacp_systemid *,
149 const struct lacp_systemid *);
150static void lacp_port_enable(struct lacp_port *);
151static void lacp_port_disable(struct lacp_port *);
152static void lacp_select(struct lacp_port *);
153static void lacp_unselect(struct lacp_port *);
154static void lacp_disable_collecting(struct lacp_port *);
155static void lacp_enable_collecting(struct lacp_port *);
156static void lacp_disable_distributing(struct lacp_port *);
157static void lacp_enable_distributing(struct lacp_port *);
158static int lacp_xmit_lacpdu(struct lacp_port *);
159
160#if defined(LACP_DEBUG)
161static void lacp_dump_lacpdu(const struct lacpdu *);
162static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
163 size_t);
164static const char *lacp_format_lagid(const struct lacp_peerinfo *,
165 const struct lacp_peerinfo *, char *, size_t);
166static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
167 char *, size_t);
168static const char *lacp_format_state(uint8_t, char *, size_t);
169static const char *lacp_format_mac(const uint8_t *, char *, size_t);
170static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
171 size_t);
172static const char *lacp_format_portid(const struct lacp_portid *, char *,
173 size_t);
174static void lacp_dprintf(const struct lacp_port *, const char *, ...)
175 __attribute__((__format__(__printf__, 2, 3)));
176#define LACP_DPRINTF(a) lacp_dprintf a
177#else
178#define LACP_DPRINTF(a) /* nothing */
179#endif
180
181/*
182 * partner administration variables.
183 * XXX should be configurable.
184 */
185
186static const struct lacp_peerinfo lacp_partner_admin = {
187 .lip_systemid = { .lsi_prio = 0xffff },
188 .lip_portid = { .lpi_prio = 0xffff },
189#if 1
190 /* optimistic */
191 .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
192 LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
193#else
194 /* pessimistic */
195 .lip_state = 0,
196#endif
197};
198
199static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
200 [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
201 [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
202 [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
203};
204
205/*
206 * lacp_input: process lacpdu
207 */
208int
209lacp_input(struct lagg_port *lgp, struct mbuf *m)
210{
211 struct lacp_port *lp = LACP_PORT(lgp);
212 struct lacpdu *du;
213 int error = 0;
214
215 LAGG_LOCK_ASSERT(lgp->lp_lagg);
216
217 if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
218 goto bad;
219 }
220
221 if (m->m_pkthdr.len != sizeof(*du)) {
222 goto bad;
223 }
224
225 if ((m->m_flags & M_MCAST) == 0) {
226 goto bad;
227 }
228
229 if (m->m_len < sizeof(*du)) {
230 m = m_pullup(m, sizeof(*du));
231 if (m == NULL) {
232 return (ENOMEM);
233 }
234 }
235
236 du = mtod(m, struct lacpdu *);
237
238 if (memcmp(&du->ldu_eh.ether_dhost,
239 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
240 goto bad;
241 }
242
243 /* XXX
244 KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP,
245 ("a very bad kassert!"));
246 */
247
248 /*
249 * ignore the version for compatibility with
250 * the future protocol revisions.
251 */
252
253#if 0
254 if (du->ldu_sph.sph_version != 1) {
255 goto bad;
256 }
257#endif
258
259 /*
260 * ignore tlv types for compatibility with
261 * the future protocol revisions.
262 */
263
264 if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
265 lacp_info_tlv_template, FALSE)) {
266 goto bad;
267 }
268
269#if defined(LACP_DEBUG)
270 LACP_DPRINTF((lp, "lacpdu receive\n"));
271 lacp_dump_lacpdu(du);
272#endif /* defined(LACP_DEBUG) */
273 lacp_sm_rx(lp, du);
274
275 m_freem(m);
276
277 return (error);
278
279bad:
280 m_freem(m);
281 return (EINVAL);
282}
283
284static void
285lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
286{
287 struct lagg_port *lgp = lp->lp_lagg;
288 struct lagg_softc *lgs = lgp->lp_lagg;
289
290 info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
291 memcpy(&info->lip_systemid.lsi_mac,
292 IF_LLADDR(lgs->sc_ifp), ETHER_ADDR_LEN);
293 info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
294 info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
295 info->lip_state = lp->lp_state;
296}
297
298static int
299lacp_xmit_lacpdu(struct lacp_port *lp)
300{
301 struct lagg_port *lgp = lp->lp_lagg;
302 struct mbuf *m;
303 struct lacpdu *du;
304 int error;
305
306 LAGG_LOCK_ASSERT(lgp->lp_lagg);
307
308 m = m_gethdr(M_DONTWAIT, MT_DATA);
309 if (m == NULL) {
310 return (ENOMEM);
311 }
312 m->m_len = m->m_pkthdr.len = sizeof(*du);
313
314 du = mtod(m, struct lacpdu *);
315 memset(du, 0, sizeof(*du));
316
317 memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
318 ETHER_ADDR_LEN);
319 memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
320 du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
321
322 du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
323 du->ldu_sph.sph_version = 1;
324
325 TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
326 du->ldu_actor = lp->lp_actor;
327
328 TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
329 sizeof(du->ldu_partner));
330 du->ldu_partner = lp->lp_partner;
331
332 TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
333 sizeof(du->ldu_collector));
334 du->ldu_collector.lci_maxdelay = 0;
335
336#if defined(LACP_DEBUG)
337 LACP_DPRINTF((lp, "lacpdu transmit\n"));
338 lacp_dump_lacpdu(du);
339#endif /* defined(LACP_DEBUG) */
340
341 m->m_flags |= M_MCAST;
342
343 /*
344 * XXX should use higher priority queue.
345 * otherwise network congestion can break aggregation.
346 */
347
348 error = lagg_enqueue(lp->lp_ifp, m);
349 return (error);
350}
351
352void
353lacp_linkstate(struct lagg_port *lgp)
354{
355 struct lacp_port *lp = LACP_PORT(lgp);
356 struct ifnet *ifp = lgp->lp_ifp;
357 struct ifmediareq ifmr;
358 int error = 0;
359 u_int media;
360 uint8_t old_state;
361 uint16_t old_key;
362
363 LAGG_LOCK_ASSERT(lgp->lp_lagg);
364
365 bzero((char *)&ifmr, sizeof(ifmr));
366 error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
367 if (error != 0)
368 return;
369
370 media = ifmr.ifm_active;
371 LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
372 "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
373 (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
374 old_state = lp->lp_state;
375 old_key = lp->lp_key;
376
377 lp->lp_media = media;
378 /*
379 * If the port is not an active full duplex Ethernet link then it can
380 * not be aggregated.
381 */
382 if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
383 ifp->if_link_state != LINK_STATE_UP) {
384 lacp_port_disable(lp);
385 } else {
386 lacp_port_enable(lp);
387 }
388 lp->lp_key = lacp_compose_key(lp);
389
390 if (old_state != lp->lp_state || old_key != lp->lp_key) {
391 LACP_DPRINTF((lp, "-> UNSELECTED\n"));
392 lp->lp_selected = LACP_UNSELECTED;
393 }
394}
395
396static void
397lacp_tick(void *arg)
398{
399 struct lacp_softc *lsc = arg;
400 struct lacp_port *lp;
401
402 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
403 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
404 continue;
405
406 lacp_run_timers(lp);
407
408 lacp_select(lp);
409 lacp_sm_mux(lp);
410 lacp_sm_tx(lp);
411 lacp_sm_ptx_tx_schedule(lp);
412 }
413 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
414}
415
416int
417lacp_port_create(struct lagg_port *lgp)
418{
419 struct lagg_softc *lgs = lgp->lp_lagg;
420 struct lacp_softc *lsc = LACP_SOFTC(lgs);
421 struct lacp_port *lp;
422 struct ifnet *ifp = lgp->lp_ifp;
423 struct sockaddr_dl sdl;
424 struct ifmultiaddr *rifma = NULL;
425 int error;
426
427 boolean_t active = TRUE; /* XXX should be configurable */
428 boolean_t fast = FALSE; /* XXX should be configurable */
429
430 LAGG_LOCK_ASSERT(lgs);
431
432 bzero((char *)&sdl, sizeof(sdl));
433 sdl.sdl_len = sizeof(sdl);
434 sdl.sdl_family = AF_LINK;
435 sdl.sdl_index = ifp->if_index;
436 sdl.sdl_type = IFT_ETHER;
437 sdl.sdl_alen = ETHER_ADDR_LEN;
438
439 bcopy(&ethermulticastaddr_slowprotocols,
440 LLADDR(&sdl), ETHER_ADDR_LEN);
441 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
442 if (error) {
443 printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
444 return (error);
445 }
446
447 lp = malloc(sizeof(struct lacp_port),
448 M_DEVBUF, M_NOWAIT|M_ZERO);
449 if (lp == NULL)
450 return (ENOMEM);
451
452 lgp->lp_psc = (caddr_t)lp;
453 lp->lp_ifp = ifp;
454 lp->lp_lagg = lgp;
455 lp->lp_lsc = lsc;
456
457 LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
458
459 lacp_fill_actorinfo(lp, &lp->lp_actor);
460 lp->lp_state =
461 (active ? LACP_STATE_ACTIVITY : 0) |
462 (fast ? LACP_STATE_TIMEOUT : 0);
463 lp->lp_aggregator = NULL;
464 lacp_linkstate(lgp);
465 lacp_sm_rx_set_expired(lp);
466
467 return (0);
468}
469
470void
471lacp_port_destroy(struct lagg_port *lgp)
472{
473 struct lacp_port *lp = LACP_PORT(lgp);
474 struct ifnet *ifp = lgp->lp_ifp;
475 struct sockaddr_dl sdl;
476 int i, error;
477
478 LAGG_LOCK_ASSERT(lgp->lp_lagg);
479
480 for (i = 0; i < LACP_NTIMER; i++) {
481 LACP_TIMER_DISARM(lp, i);
482 }
483
484 lacp_disable_collecting(lp);
485 lacp_disable_distributing(lp);
486 lacp_unselect(lp);
487 lgp->lp_flags &= ~LAGG_PORT_DISABLED;
488
489 bzero((char *)&sdl, sizeof(sdl));
490 sdl.sdl_len = sizeof(sdl);
491 sdl.sdl_family = AF_LINK;
492 sdl.sdl_index = ifp->if_index;
493 sdl.sdl_type = IFT_ETHER;
494 sdl.sdl_alen = ETHER_ADDR_LEN;
495
496 bcopy(&ethermulticastaddr_slowprotocols,
497 LLADDR(&sdl), ETHER_ADDR_LEN);
498 error = if_delmulti(ifp, (struct sockaddr *)&sdl);
499 if (error)
500 printf("%s: DELMULTI failed on %s\n", __func__, lgp->lp_ifname);
501
502 LIST_REMOVE(lp, lp_next);
503 free(lp, M_DEVBUF);
504}
505
506int
507lacp_port_isactive(struct lagg_port *lgp)
508{
509 struct lacp_port *lp = LACP_PORT(lgp);
510 struct lacp_softc *lsc = lp->lp_lsc;
511 struct lacp_aggregator *la = lp->lp_aggregator;
512
513 /* This port is joined to the active aggregator */
514 if (la != NULL && la == lsc->lsc_active_aggregator)
515 return (1);
516
517 return (0);
518}
519
520static void
521lacp_disable_collecting(struct lacp_port *lp)
522{
523 struct lagg_port *lgp = lp->lp_lagg;
524
525 LACP_DPRINTF((lp, "collecting disabled\n"));
526
527 lp->lp_state &= ~LACP_STATE_COLLECTING;
528 lgp->lp_flags &= ~LAGG_PORT_COLLECTING;
529}
530
531static void
532lacp_enable_collecting(struct lacp_port *lp)
533{
534 struct lagg_port *lgp = lp->lp_lagg;
535
536 LACP_DPRINTF((lp, "collecting enabled\n"));
537
538 lp->lp_state |= LACP_STATE_COLLECTING;
539 lgp->lp_flags |= LAGG_PORT_COLLECTING;
540}
541
542static void
543lacp_disable_distributing(struct lacp_port *lp)
544{
545 struct lacp_aggregator *la = lp->lp_aggregator;
546 struct lacp_softc *lsc = lp->lp_lsc;
547 struct lagg_port *lgp = lp->lp_lagg;
548#if defined(LACP_DEBUG)
549 char buf[LACP_LAGIDSTR_MAX+1];
550#endif /* defined(LACP_DEBUG) */
551
552 LAGG_LOCK_ASSERT(lgp->lp_lagg);
553
554 if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
555 return;
556 }
557
558 KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
559 KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
560 KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
561
562 LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
563 "nports %d -> %d\n",
564 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
565 la->la_nports, la->la_nports - 1));
566
567 TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
568 la->la_nports--;
569
570 lacp_suppress_distributing(lsc, la);
571
572 lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
573 lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING;
574
575 if (lsc->lsc_active_aggregator == la) {
576 lacp_select_active_aggregator(lsc);
577 }
578}
579
580static void
581lacp_enable_distributing(struct lacp_port *lp)
582{
583 struct lacp_aggregator *la = lp->lp_aggregator;
584 struct lacp_softc *lsc = lp->lp_lsc;
585 struct lagg_port *lgp = lp->lp_lagg;
586#if defined(LACP_DEBUG)
587 char buf[LACP_LAGIDSTR_MAX+1];
588#endif /* defined(LACP_DEBUG) */
589
590 LAGG_LOCK_ASSERT(lgp->lp_lagg);
591
592 if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
593 return;
594 }
595
596 LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
597 "nports %d -> %d\n",
598 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
599 la->la_nports, la->la_nports + 1));
600
601 KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
602 TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
603 la->la_nports++;
604
605 lacp_suppress_distributing(lsc, la);
606
607 lp->lp_state |= LACP_STATE_DISTRIBUTING;
608 lgp->lp_flags |= LAGG_PORT_DISTRIBUTING;
609
610 if (lsc->lsc_active_aggregator != la) {
611 lacp_select_active_aggregator(lsc);
612 }
613}
614
615static void
616lacp_transit_expire(void *vp)
617{
618 struct lacp_softc *lsc = vp;
619
620 LACP_DPRINTF((NULL, "%s\n", __func__));
621 lsc->lsc_suppress_distributing = FALSE;
622}
623
624int
625lacp_attach(struct lagg_softc *lgs)
626{
627 struct lacp_softc *lsc;
628
629 LAGG_LOCK_ASSERT(lgs);
630
631 lsc = malloc(sizeof(struct lacp_softc),
632 M_DEVBUF, M_NOWAIT|M_ZERO);
633 if (lsc == NULL)
634 return (ENOMEM);
635
636 lgs->sc_psc = (caddr_t)lsc;
637 lsc->lsc_lagg = lgs;
638
639 lsc->lsc_hashkey = arc4random();
640 lsc->lsc_active_aggregator = NULL;
641 TAILQ_INIT(&lsc->lsc_aggregators);
642 LIST_INIT(&lsc->lsc_ports);
643
644 callout_init_mtx(&lsc->lsc_transit_callout, &lgs->sc_mtx, 0);
645 callout_init_mtx(&lsc->lsc_callout, &lgs->sc_mtx, 0);
646
647 /* if the lagg is already up then do the same */
648 if (lgs->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
649 lacp_init(lgs);
650
651 return (0);
652}
653
654int
655lacp_detach(struct lagg_softc *lgs)
656{
657 struct lacp_softc *lsc = LACP_SOFTC(lgs);
658
659 KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
660 ("aggregators still active"));
661 KASSERT(lsc->lsc_active_aggregator == NULL,
662 ("aggregator still attached"));
663
664 lgs->sc_psc = NULL;
665 callout_drain(&lsc->lsc_transit_callout);
666 callout_drain(&lsc->lsc_callout);
667
668 free(lsc, M_DEVBUF);
669 return (0);
670}
671
672void
673lacp_init(struct lagg_softc *lgs)
674{
675 struct lacp_softc *lsc = LACP_SOFTC(lgs);
676
677 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
678}
679
680void
681lacp_stop(struct lagg_softc *lgs)
682{
683 struct lacp_softc *lsc = LACP_SOFTC(lgs);
684
685 callout_stop(&lsc->lsc_transit_callout);
686 callout_stop(&lsc->lsc_callout);
687}
688
689struct lagg_port *
690lacp_select_tx_port(struct lagg_softc *lgs, struct mbuf *m)
691{
692 struct lacp_softc *lsc = LACP_SOFTC(lgs);
693 struct lacp_aggregator *la;
694 struct lacp_port *lp;
695 uint32_t hash;
696 int nports;
697
698 LAGG_LOCK_ASSERT(lgs);
699
700 if (__predict_false(lsc->lsc_suppress_distributing)) {
701 LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
702 return (NULL);
703 }
704
705 la = lsc->lsc_active_aggregator;
706 if (__predict_false(la == NULL)) {
707 LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
708 return (NULL);
709 }
710
711 nports = la->la_nports;
712 KASSERT(nports > 0, ("no ports available"));
713
714 hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
715 hash %= nports;
716 lp = TAILQ_FIRST(&la->la_ports);
717 while (hash--) {
718 lp = TAILQ_NEXT(lp, lp_dist_q);
719 }
720
721 KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
722 ("aggregated port is not distributing"));
723
724 return (lp->lp_lagg);
725}
726/*
727 * lacp_suppress_distributing: drop transmit packets for a while
728 * to preserve packet ordering.
729 */
730
731static void
732lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
733{
734 if (lsc->lsc_active_aggregator != la) {
735 return;
736 }
737
738 LACP_DPRINTF((NULL, "%s\n", __func__));
739 lsc->lsc_suppress_distributing = TRUE;
740 /* XXX should consider collector max delay */
741 callout_reset(&lsc->lsc_transit_callout,
742 LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
743}
744
745static int
746lacp_compare_peerinfo(const struct lacp_peerinfo *a,
747 const struct lacp_peerinfo *b)
748{
749 return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
750}
751
752static int
753lacp_compare_systemid(const struct lacp_systemid *a,
754 const struct lacp_systemid *b)
755{
756 return (memcmp(a, b, sizeof(*a)));
757}
758
759#if 0 /* unused */
760static int
761lacp_compare_portid(const struct lacp_portid *a,
762 const struct lacp_portid *b)
763{
764 return (memcmp(a, b, sizeof(*a)));
765}
766#endif
767
768static uint64_t
769lacp_aggregator_bandwidth(struct lacp_aggregator *la)
770{
771 struct lacp_port *lp;
772 uint64_t speed;
773
774 lp = TAILQ_FIRST(&la->la_ports);
775 if (lp == NULL) {
776 return (0);
777 }
778
779 speed = ifmedia_baudrate(lp->lp_media);
780 speed *= la->la_nports;
781 if (speed == 0) {
782 LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
783 lp->lp_media, la->la_nports));
784 }
785
786 return (speed);
787}
788
789/*
790 * lacp_select_active_aggregator: select an aggregator to be used to transmit
791 * packets from lagg(4) interface.
792 */
793
794static void
795lacp_select_active_aggregator(struct lacp_softc *lsc)
796{
797 struct lacp_aggregator *la;
798 struct lacp_aggregator *best_la = NULL;
799 uint64_t best_speed = 0;
800#if defined(LACP_DEBUG)
801 char buf[LACP_LAGIDSTR_MAX+1];
802#endif /* defined(LACP_DEBUG) */
803
804 LACP_DPRINTF((NULL, "%s:\n", __func__));
805
806 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
807 uint64_t speed;
808
809 if (la->la_nports == 0) {
810 continue;
811 }
812
813 speed = lacp_aggregator_bandwidth(la);
814 LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
815 lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
816 speed, la->la_nports));
817 if (speed > best_speed ||
818 (speed == best_speed &&
819 la == lsc->lsc_active_aggregator)) {
820 best_la = la;
821 best_speed = speed;
822 }
823 }
824
825 KASSERT(best_la == NULL || best_la->la_nports > 0,
826 ("invalid aggregator refcnt"));
827 KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
828 ("invalid aggregator list"));
829
830#if defined(LACP_DEBUG)
831 if (lsc->lsc_active_aggregator != best_la) {
832 LACP_DPRINTF((NULL, "active aggregator changed\n"));
833 LACP_DPRINTF((NULL, "old %s\n",
834 lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
835 buf, sizeof(buf))));
836 } else {
837 LACP_DPRINTF((NULL, "active aggregator not changed\n"));
838 }
839 LACP_DPRINTF((NULL, "new %s\n",
840 lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
841#endif /* defined(LACP_DEBUG) */
842
843 if (lsc->lsc_active_aggregator != best_la) {
844 lsc->lsc_active_aggregator = best_la;
845 if (best_la) {
846 lacp_suppress_distributing(lsc, best_la);
847 }
848 }
849}
850
851static uint16_t
852lacp_compose_key(struct lacp_port *lp)
853{
854 struct lagg_port *lgp = lp->lp_lagg;
855 struct lagg_softc *lgs = lgp->lp_lagg;
856 u_int media = lp->lp_media;
857 uint16_t key;
858
859 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
860
861 /*
862 * non-aggregatable links should have unique keys.
863 *
864 * XXX this isn't really unique as if_index is 16 bit.
865 */
866
867 /* bit 0..14: (some bits of) if_index of this port */
868 key = lp->lp_ifp->if_index;
869 /* bit 15: 1 */
870 key |= 0x8000;
871 } else {
872 u_int subtype = IFM_SUBTYPE(media);
873
874 KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
875 KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
876
877 /* bit 0..4: IFM_SUBTYPE */
878 key = subtype;
879 /* bit 5..14: (some bits of) if_index of lagg device */
880 key |= 0x7fe0 & ((lgs->sc_ifp->if_index) << 5);
881 /* bit 15: 0 */
882 }
883 return (htons(key));
884}
885
886static void
887lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
888{
889#if defined(LACP_DEBUG)
890 char buf[LACP_LAGIDSTR_MAX+1];
891#endif
892
893 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
894 __func__,
895 lacp_format_lagid(&la->la_actor, &la->la_partner,
896 buf, sizeof(buf)),
897 la->la_refcnt, la->la_refcnt + 1));
898
899 KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
900 la->la_refcnt++;
901 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
902}
903
904static void
905lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
906{
907#if defined(LACP_DEBUG)
908 char buf[LACP_LAGIDSTR_MAX+1];
909#endif
910
911 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
912 __func__,
913 lacp_format_lagid(&la->la_actor, &la->la_partner,
914 buf, sizeof(buf)),
915 la->la_refcnt, la->la_refcnt - 1));
916
917 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
918 la->la_refcnt--;
919 if (la->la_refcnt > 0) {
920 return;
921 }
922
923 KASSERT(la->la_refcnt == 0, ("refcount not zero"));
924 KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
925
926 TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
927
928 free(la, M_DEVBUF);
929}
930
931/*
932 * lacp_aggregator_get: allocate an aggregator.
933 */
934
935static struct lacp_aggregator *
936lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
937{
938 struct lacp_aggregator *la;
939
940 la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
941 if (la) {
942 la->la_refcnt = 1;
943 la->la_nports = 0;
944 TAILQ_INIT(&la->la_ports);
945 la->la_pending = 0;
946 TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
947 }
948
949 return (la);
950}
951
952/*
953 * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
954 */
955
956static void
957lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
958{
959 lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
960 lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
961
962 la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
963}
964
965static void
966lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
967 const struct lacp_peerinfo *lpi_port)
968{
969 memset(lpi_aggr, 0, sizeof(*lpi_aggr));
970 lpi_aggr->lip_systemid = lpi_port->lip_systemid;
971 lpi_aggr->lip_key = lpi_port->lip_key;
972}
973
974/*
975 * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
976 */
977
978static int
979lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
980 const struct lacp_port *lp)
981{
982 if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
983 !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
984 return (0);
985 }
986
987 if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
988 return (0);
989 }
990
991 if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
992 return (0);
993 }
994
995 if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
996 return (0);
997 }
998
999 return (1);
1000}
1001
1002static int
1003lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
1004 const struct lacp_peerinfo *b)
1005{
1006 if (memcmp(&a->lip_systemid, &b->lip_systemid,
1007 sizeof(a->lip_systemid))) {
1008 return (0);
1009 }
1010
1011 if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
1012 return (0);
1013 }
1014
1015 return (1);
1016}
1017
1018static void
1019lacp_port_enable(struct lacp_port *lp)
1020{
1021 struct lagg_port *lgp = lp->lp_lagg;
1022
1023 lp->lp_state |= LACP_STATE_AGGREGATION;
1024 lgp->lp_flags &= ~LAGG_PORT_DISABLED;
1025}
1026
1027static void
1028lacp_port_disable(struct lacp_port *lp)
1029{
1030 struct lagg_port *lgp = lp->lp_lagg;
1031
1032 lacp_set_mux(lp, LACP_MUX_DETACHED);
1033
1034 lp->lp_state &= ~LACP_STATE_AGGREGATION;
1035 lp->lp_selected = LACP_UNSELECTED;
1036 lacp_sm_rx_record_default(lp);
1037 lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
1038 lp->lp_state &= ~LACP_STATE_EXPIRED;
1039 lgp->lp_flags |= LAGG_PORT_DISABLED;
1040}
1041
1042/*
1043 * lacp_select: select an aggregator. create one if necessary.
1044 */
1045static void
1046lacp_select(struct lacp_port *lp)
1047{
1048 struct lacp_softc *lsc = lp->lp_lsc;
1049 struct lacp_aggregator *la;
1050#if defined(LACP_DEBUG)
1051 char buf[LACP_LAGIDSTR_MAX+1];
1052#endif
1053
1054 if (lp->lp_aggregator) {
1055 return;
1056 }
1057
1058 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1059 ("timer_wait_while still active"));
1060
1061 LACP_DPRINTF((lp, "port lagid=%s\n",
1062 lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
1063 buf, sizeof(buf))));
1064
1065 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
1066 if (lacp_aggregator_is_compatible(la, lp)) {
1067 break;
1068 }
1069 }
1070
1071 if (la == NULL) {
1072 la = lacp_aggregator_get(lsc, lp);
1073 if (la == NULL) {
1074 LACP_DPRINTF((lp, "aggregator creation failed\n"));
1075
1076 /*
1077 * will retry on the next tick.
1078 */
1079
1080 return;
1081 }
1082 lacp_fill_aggregator_id(la, lp);
1083 LACP_DPRINTF((lp, "aggregator created\n"));
1084 } else {
1085 LACP_DPRINTF((lp, "compatible aggregator found\n"));
1086 lacp_aggregator_addref(lsc, la);
1087 }
1088
1089 LACP_DPRINTF((lp, "aggregator lagid=%s\n",
1090 lacp_format_lagid(&la->la_actor, &la->la_partner,
1091 buf, sizeof(buf))));
1092
1093 lp->lp_aggregator = la;
1094 lp->lp_selected = LACP_SELECTED;
1095}
1096
1097/*
1098 * lacp_unselect: finish unselect/detach process.
1099 */
1100
1101static void
1102lacp_unselect(struct lacp_port *lp)
1103{
1104 struct lacp_softc *lsc = lp->lp_lsc;
1105 struct lacp_aggregator *la = lp->lp_aggregator;
1106
1107 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1108 ("timer_wait_while still active"));
1109
1110 if (la == NULL) {
1111 return;
1112 }
1113
1114 lp->lp_aggregator = NULL;
1115 lacp_aggregator_delref(lsc, la);
1116}
1117
1118/* mux machine */
1119
1120static void
1121lacp_sm_mux(struct lacp_port *lp)
1122{
1123 enum lacp_mux_state new_state;
1124 boolean_t p_sync =
1125 (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
1126 boolean_t p_collecting =
1127 (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
1128 enum lacp_selected selected = lp->lp_selected;
1129 struct lacp_aggregator *la;
1130
1131 /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
1132
1133re_eval:
1134 la = lp->lp_aggregator;
1135 KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
1136 ("MUX not detached"));
1137 new_state = lp->lp_mux_state;
1138 switch (lp->lp_mux_state) {
1139 case LACP_MUX_DETACHED:
1140 if (selected != LACP_UNSELECTED) {
1141 new_state = LACP_MUX_WAITING;
1142 }
1143 break;
1144 case LACP_MUX_WAITING:
1145 KASSERT(la->la_pending > 0 ||
1146 !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1147 ("timer_wait_while still active"));
1148 if (selected == LACP_SELECTED && la->la_pending == 0) {
1149 new_state = LACP_MUX_ATTACHED;
1150 } else if (selected == LACP_UNSELECTED) {
1151 new_state = LACP_MUX_DETACHED;
1152 }
1153 break;
1154 case LACP_MUX_ATTACHED:
1155 if (selected == LACP_SELECTED && p_sync) {
1156 new_state = LACP_MUX_COLLECTING;
1157 } else if (selected != LACP_SELECTED) {
1158 new_state = LACP_MUX_DETACHED;
1159 }
1160 break;
1161 case LACP_MUX_COLLECTING:
1162 if (selected == LACP_SELECTED && p_sync && p_collecting) {
1163 new_state = LACP_MUX_DISTRIBUTING;
1164 } else if (selected != LACP_SELECTED || !p_sync) {
1165 new_state = LACP_MUX_ATTACHED;
1166 }
1167 break;
1168 case LACP_MUX_DISTRIBUTING:
1169 if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
1170 new_state = LACP_MUX_COLLECTING;
1171 }
1172 break;
1173 default:
1174 panic("%s: unknown state", __func__);
1175 }
1176
1177 if (lp->lp_mux_state == new_state) {
1178 return;
1179 }
1180
1181 lacp_set_mux(lp, new_state);
1182 goto re_eval;
1183}
1184
1185static void
1186lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
1187{
1188 struct lacp_aggregator *la = lp->lp_aggregator;
1189
1190 if (lp->lp_mux_state == new_state) {
1191 return;
1192 }
1193
1194 switch (new_state) {
1195 case LACP_MUX_DETACHED:
1196 lp->lp_state &= ~LACP_STATE_SYNC;
1197 lacp_disable_distributing(lp);
1198 lacp_disable_collecting(lp);
1199 lacp_sm_assert_ntt(lp);
1200 /* cancel timer */
1201 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
1202 KASSERT(la->la_pending > 0,
1203 ("timer_wait_while not active"));
1204 la->la_pending--;
1205 }
1206 LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
1207 lacp_unselect(lp);
1208 break;
1209 case LACP_MUX_WAITING:
1210 LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
1211 LACP_AGGREGATE_WAIT_TIME);
1212 la->la_pending++;
1213 break;
1214 case LACP_MUX_ATTACHED:
1215 lp->lp_state |= LACP_STATE_SYNC;
1216 lacp_disable_collecting(lp);
1217 lacp_sm_assert_ntt(lp);
1218 break;
1219 case LACP_MUX_COLLECTING:
1220 lacp_enable_collecting(lp);
1221 lacp_disable_distributing(lp);
1222 lacp_sm_assert_ntt(lp);
1223 break;
1224 case LACP_MUX_DISTRIBUTING:
1225 lacp_enable_distributing(lp);
1226 break;
1227 default:
1228 panic("%s: unknown state", __func__);
1229 }
1230
1231 LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
1232
1233 lp->lp_mux_state = new_state;
1234}
1235
1236static void
1237lacp_sm_mux_timer(struct lacp_port *lp)
1238{
1239 struct lacp_aggregator *la = lp->lp_aggregator;
1240#if defined(LACP_DEBUG)
1241 char buf[LACP_LAGIDSTR_MAX+1];
1242#endif
1243
1244 KASSERT(la->la_pending > 0, ("no pending event"));
1245
1246 LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
1247 lacp_format_lagid(&la->la_actor, &la->la_partner,
1248 buf, sizeof(buf)),
1249 la->la_pending, la->la_pending - 1));
1250
1251 la->la_pending--;
1252}
1253
1254/* periodic transmit machine */
1255
1256static void
1257lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
1258{
1259 if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
1260 LACP_STATE_TIMEOUT)) {
1261 return;
1262 }
1263
1264 LACP_DPRINTF((lp, "partner timeout changed\n"));
1265
1266 /*
1267 * FAST_PERIODIC -> SLOW_PERIODIC
1268 * or
1269 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
1270 *
1271 * let lacp_sm_ptx_tx_schedule to update timeout.
1272 */
1273
1274 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1275
1276 /*
1277 * if timeout has been shortened, assert NTT.
1278 */
1279
1280 if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
1281 lacp_sm_assert_ntt(lp);
1282 }
1283}
1284
1285static void
1286lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
1287{
1288 int timeout;
1289
1290 if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
1291 !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
1292
1293 /*
1294 * NO_PERIODIC
1295 */
1296
1297 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1298 return;
1299 }
1300
1301 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
1302 return;
1303 }
1304
1305 timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
1306 LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
1307
1308 LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
1309}
1310
1311static void
1312lacp_sm_ptx_timer(struct lacp_port *lp)
1313{
1314 lacp_sm_assert_ntt(lp);
1315}
1316
1317static void
1318lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
1319{
1320 int timeout;
1321
1322 /*
1323 * check LACP_DISABLED first
1324 */
1325
1326 if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
1327 return;
1328 }
1329
1330 /*
1331 * check loopback condition.
1332 */
1333
1334 if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
1335 &lp->lp_actor.lip_systemid)) {
1336 return;
1337 }
1338
1339 /*
1340 * EXPIRED, DEFAULTED, CURRENT -> CURRENT
1341 */
1342
1343 lacp_sm_rx_update_selected(lp, du);
1344 lacp_sm_rx_update_ntt(lp, du);
1345 lacp_sm_rx_record_pdu(lp, du);
1346
1347 timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
1348 LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
1349 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
1350
1351 lp->lp_state &= ~LACP_STATE_EXPIRED;
1352
1353 /*
1354 * kick transmit machine without waiting the next tick.
1355 */
1356
1357 lacp_sm_tx(lp);
1358}
1359
1360static void
1361lacp_sm_rx_set_expired(struct lacp_port *lp)
1362{
1363 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1364 lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
1365 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
1366 lp->lp_state |= LACP_STATE_EXPIRED;
1367}
1368
1369static void
1370lacp_sm_rx_timer(struct lacp_port *lp)
1371{
1372 if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
1373 /* CURRENT -> EXPIRED */
1374 LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
1375 lacp_sm_rx_set_expired(lp);
1376 } else {
1377 /* EXPIRED -> DEFAULTED */
1378 LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
1379 lacp_sm_rx_update_default_selected(lp);
1380 lacp_sm_rx_record_default(lp);
1381 lp->lp_state &= ~LACP_STATE_EXPIRED;
1382 }
1383}
1384
1385static void
1386lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
1387{
1388 boolean_t active;
1389 uint8_t oldpstate;
1390#if defined(LACP_DEBUG)
1391 char buf[LACP_STATESTR_MAX+1];
1392#endif
1393
1394 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1395
1396 oldpstate = lp->lp_partner.lip_state;
1397
1398 active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
1399 || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
1400 (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
1401
1402 lp->lp_partner = du->ldu_actor;
1403 if (active &&
1404 ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1405 LACP_STATE_AGGREGATION) &&
1406 !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
1407 || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
1408 /* XXX nothing? */
1409 } else {
1410 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1411 }
1412
1413 lp->lp_state &= ~LACP_STATE_DEFAULTED;
1414
1415 if (oldpstate != lp->lp_partner.lip_state) {
1416 LACP_DPRINTF((lp, "old pstate %s\n",
1417 lacp_format_state(oldpstate, buf, sizeof(buf))));
1418 LACP_DPRINTF((lp, "new pstate %s\n",
1419 lacp_format_state(lp->lp_partner.lip_state, buf,
1420 sizeof(buf))));
1421 }
1422
1423 lacp_sm_ptx_update_timeout(lp, oldpstate);
1424}
1425
1426static void
1427lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
1428{
1429 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1430
1431 if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
1432 !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1433 LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
1434 LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
1435 lacp_sm_assert_ntt(lp);
1436 }
1437}
1438
1439static void
1440lacp_sm_rx_record_default(struct lacp_port *lp)
1441{
1442 uint8_t oldpstate;
1443
1444 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1445
1446 oldpstate = lp->lp_partner.lip_state;
1447 lp->lp_partner = lacp_partner_admin;
1448 lp->lp_state |= LACP_STATE_DEFAULTED;
1449 lacp_sm_ptx_update_timeout(lp, oldpstate);
1450}
1451
1452static void
1453lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
1454 const struct lacp_peerinfo *info)
1455{
1456 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1457
1458 if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
1459 !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
1460 LACP_STATE_AGGREGATION)) {
1461 lp->lp_selected = LACP_UNSELECTED;
1462 /* mux machine will clean up lp->lp_aggregator */
1463 }
1464}
1465
1466static void
1467lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
1468{
1469 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1470
1471 lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
1472}
1473
1474static void
1475lacp_sm_rx_update_default_selected(struct lacp_port *lp)
1476{
1477 /* LACP_DPRINTF((lp, "%s\n", __func__)); */
1478
1479 lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
1480}
1481
1482/* transmit machine */
1483
1484static void
1485lacp_sm_tx(struct lacp_port *lp)
1486{
1487 int error;
1488
1489 if (!(lp->lp_state & LACP_STATE_AGGREGATION)
1490#if 1
1491 || (!(lp->lp_state & LACP_STATE_ACTIVITY)
1492 && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
1493#endif
1494 ) {
1495 lp->lp_flags &= ~LACP_PORT_NTT;
1496 }
1497
1498 if (!(lp->lp_flags & LACP_PORT_NTT)) {
1499 return;
1500 }
1501
1502 /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
1503 if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
1504 (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
1505 LACP_DPRINTF((lp, "rate limited pdu\n"));
1506 return;
1507 }
1508
1509 error = lacp_xmit_lacpdu(lp);
1510
1511 if (error == 0) {
1512 lp->lp_flags &= ~LACP_PORT_NTT;
1513 } else {
1514 LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
1515 error));
1516 }
1517}
1518
1519static void
1520lacp_sm_assert_ntt(struct lacp_port *lp)
1521{
1522
1523 lp->lp_flags |= LACP_PORT_NTT;
1524}
1525
1526static void
1527lacp_run_timers(struct lacp_port *lp)
1528{
1529 int i;
1530
1531 for (i = 0; i < LACP_NTIMER; i++) {
1532 KASSERT(lp->lp_timer[i] >= 0,
1533 ("invalid timer value %d", lp->lp_timer[i]));
1534 if (lp->lp_timer[i] == 0) {
1535 continue;
1536 } else if (--lp->lp_timer[i] <= 0) {
1537 if (lacp_timer_funcs[i]) {
1538 (*lacp_timer_funcs[i])(lp);
1539 }
1540 }
1541 }
1542}
1543
1544int
1545lacp_marker_input(struct lagg_port *lgp, struct mbuf *m)
1546{
1547 struct lacp_port *lp = LACP_PORT(lgp);
1548 struct markerdu *mdu;
1549 int error = 0;
1550
1551 LAGG_LOCK_ASSERT(lgp->lp_lagg);
1552
1553 if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
1554 goto bad;
1555 }
1556
1557 if (m->m_pkthdr.len != sizeof(*mdu)) {
1558 goto bad;
1559 }
1560
1561 if ((m->m_flags & M_MCAST) == 0) {
1562 goto bad;
1563 }
1564
1565 if (m->m_len < sizeof(*mdu)) {
1566 m = m_pullup(m, sizeof(*mdu));
1567 if (m == NULL) {
1568 return (ENOMEM);
1569 }
1570 }
1571
1572 mdu = mtod(m, struct markerdu *);
1573
1574 if (memcmp(&mdu->mdu_eh.ether_dhost,
1575 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
1576 goto bad;
1577 }
1578
1579 /* XXX
1580 KASSERT(mdu->mdu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_MARKER,
1581 ("a very bad kassert!"));
1582 */
1583
1584 if (mdu->mdu_sph.sph_version != 1) {
1585 goto bad;
1586 }
1587
1588 switch (mdu->mdu_tlv.tlv_type) {
1589 case MARKER_TYPE_INFO:
1590 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1591 marker_info_tlv_template, TRUE)) {
1592 goto bad;
1593 }
1594 mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
1595 memcpy(&mdu->mdu_eh.ether_dhost,
1596 &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
1597 memcpy(&mdu->mdu_eh.ether_shost,
1598 lgp->lp_lladdr, ETHER_ADDR_LEN);
1599 error = lagg_enqueue(lp->lp_ifp, m);
1600 break;
1601
1602 case MARKER_TYPE_RESPONSE:
1603 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1604 marker_response_tlv_template, TRUE)) {
1605 goto bad;
1606 }
1607 /*
1608 * we are not interested in responses as
1609 * we don't have a marker sender.
1610 */
1611 /* FALLTHROUGH */
1612 default:
1613 goto bad;
1614 }
1615
1616 return (error);
1617
1618bad:
1619 m_freem(m);
1620 return (EINVAL);
1621}
1622
1623static int
1624tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
1625 const struct tlv_template *tmpl, boolean_t check_type)
1626{
1627 while (/* CONSTCOND */ 1) {
1628 if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
1629 return (EINVAL);
1630 }
1631 if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
1632 tlv->tlv_length != tmpl->tmpl_length) {
1633 return (EINVAL);
1634 }
1635 if (tmpl->tmpl_type == 0) {
1636 break;
1637 }
1638 tlv = (const struct tlvhdr *)
1639 ((const char *)tlv + tlv->tlv_length);
1640 tmpl++;
1641 }
1642
1643 return (0);
1644}
1645
1646#if defined(LACP_DEBUG)
1647const char *
1648lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
1649{
1650 snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
1651 (int)mac[0],
1652 (int)mac[1],
1653 (int)mac[2],
1654 (int)mac[3],
1655 (int)mac[4],
1656 (int)mac[5]);
1657
1658 return (buf);
1659}
1660
1661const char *
1662lacp_format_systemid(const struct lacp_systemid *sysid,
1663 char *buf, size_t buflen)
1664{
1665 char macbuf[LACP_MACSTR_MAX+1];
1666
1667 snprintf(buf, buflen, "%04X,%s",
1668 ntohs(sysid->lsi_prio),
1669 lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
1670
1671 return (buf);
1672}
1673
1674const char *
1675lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
1676{
1677 snprintf(buf, buflen, "%04X,%04X",
1678 ntohs(portid->lpi_prio),
1679 ntohs(portid->lpi_portno));
1680
1681 return (buf);
1682}
1683
1684const char *
1685lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
1686{
1687 char sysid[LACP_SYSTEMIDSTR_MAX+1];
1688 char portid[LACP_PORTIDSTR_MAX+1];
1689
1690 snprintf(buf, buflen, "(%s,%04X,%s)",
1691 lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
1692 ntohs(peer->lip_key),
1693 lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
1694
1695 return (buf);
1696}
1697
1698const char *
1699lacp_format_lagid(const struct lacp_peerinfo *a,
1700 const struct lacp_peerinfo *b, char *buf, size_t buflen)
1701{
1702 char astr[LACP_PARTNERSTR_MAX+1];
1703 char bstr[LACP_PARTNERSTR_MAX+1];
1704
1705#if 0
1706 /*
1707 * there's a convention to display small numbered peer
1708 * in the left.
1709 */
1710
1711 if (lacp_compare_peerinfo(a, b) > 0) {
1712 const struct lacp_peerinfo *t;
1713
1714 t = a;
1715 a = b;
1716 b = t;
1717 }
1718#endif
1719
1720 snprintf(buf, buflen, "[%s,%s]",
1721 lacp_format_partner(a, astr, sizeof(astr)),
1722 lacp_format_partner(b, bstr, sizeof(bstr)));
1723
1724 return (buf);
1725}
1726
1727const char *
1728lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
1729 char *buf, size_t buflen)
1730{
1731 if (la == NULL) {
1732 return ("(none)");
1733 }
1734
1735 return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
1736}
1737
1738const char *
1739lacp_format_state(uint8_t state, char *buf, size_t buflen)
1740{
1741 snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
1742 return (buf);
1743}
1744
1745static void
1746lacp_dump_lacpdu(const struct lacpdu *du)
1747{
1748 char buf[LACP_PARTNERSTR_MAX+1];
1749 char buf2[LACP_STATESTR_MAX+1];
1750
1751 printf("actor=%s\n",
1752 lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
1753 printf("actor.state=%s\n",
1754 lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
1755 printf("partner=%s\n",
1756 lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
1757 printf("partner.state=%s\n",
1758 lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
1759
1760 printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
1761}
1762
1763static void
1764lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
1765{
1766 va_list va;
1767
1768 if (lp) {
1769 printf("%s: ", lp->lp_ifp->if_xname);
1770 }
1771
1772 va_start(va, fmt);
1773 vprintf(fmt, va);
1774 va_end(va);
1775}
1776#endif