1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
27 */
28
29#include <sys/types.h>
30#include <sys/sysmacros.h>
31#include <sys/callb.h>
32#include <sys/conf.h>
33#include <sys/cmn_err.h>
34#include <sys/disp.h>
35#include <sys/list.h>
36#include <sys/ksynch.h>
37#include <sys/kmem.h>
38#include <sys/stream.h>
39#include <sys/modctl.h>
40#include <sys/ddi.h>
41#include <sys/sunddi.h>
42#include <sys/atomic.h>
43#include <sys/stat.h>
44#include <sys/byteorder.h>
45#include <sys/strsun.h>
46#include <sys/isa_defs.h>
47#include <sys/sdt.h>
48
49#include <sys/aggr.h>
50#include <sys/aggr_impl.h>
51
52static struct ether_addr	etherzeroaddr = {
53	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54};
55
56/*
57 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58 */
59static struct ether_addr   slow_multicast_addr = {
60	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61};
62
63#ifdef DEBUG
64/* LACP state machine debugging support */
65static uint32_t aggr_lacp_debug = 0;
66#define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67#else
68#define	AGGR_LACP_DBG(x)	{}
69#endif /* DEBUG */
70
71#define	NSECS_PER_SEC   1000000000ll
72
73/* used by lacp_misconfig_walker() */
74typedef struct lacp_misconfig_check_state_s {
75	aggr_port_t *cs_portp;
76	boolean_t cs_found;
77} lacp_misconfig_check_state_t;
78
79static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82
83static uint16_t lacp_port_priority = 0x1000;
84static uint16_t lacp_system_priority = 0x1000;
85
86/*
87 * Maintains a list of all ports in ATTACHED state. This information
88 * is used to detect misconfiguration.
89 */
90typedef struct lacp_sel_ports {
91	datalink_id_t sp_grp_linkid;
92	datalink_id_t sp_linkid;
93	/* Note: sp_partner_system must be 2-byte aligned */
94	struct ether_addr sp_partner_system;
95	uint32_t sp_partner_key;
96	struct lacp_sel_ports *sp_next;
97} lacp_sel_ports_t;
98
99static lacp_sel_ports_t *sel_ports = NULL;
100static kmutex_t lacp_sel_lock;
101
102static void periodic_timer_pop(void *);
103static void periodic_timer_pop_handler(aggr_port_t *);
104static void lacp_xmit_sm(aggr_port_t *);
105static void lacp_periodic_sm(aggr_port_t *);
106static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108static void lacp_on(aggr_port_t *);
109static void lacp_off(aggr_port_t *);
110static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113static void start_wait_while_timer(aggr_port_t *);
114static void stop_wait_while_timer(aggr_port_t *);
115static void lacp_reset_port(aggr_port_t *);
116static void stop_current_while_timer(aggr_port_t *);
117static void current_while_timer_pop(void *);
118static void current_while_timer_pop_handler(aggr_port_t *);
119static void update_default_selected(aggr_port_t *);
120static boolean_t update_selected(aggr_port_t *, lacp_t *);
121static boolean_t lacp_sel_ports_add(aggr_port_t *);
122static void lacp_sel_ports_del(aggr_port_t *);
123static void wait_while_timer_pop(void *);
124static void wait_while_timer_pop_handler(aggr_port_t *);
125
126void
127aggr_lacp_init(void)
128{
129	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
130}
131
132void
133aggr_lacp_fini(void)
134{
135	mutex_destroy(&lacp_sel_lock);
136}
137
138/*
139 * The following functions are used for handling LACP timers.
140 *
141 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
142 * handler routine, otherwise it may cause deadlock with the untimeout() call
143 * which is usually called with the mac perimeter held. Instead, a
144 * lacp_timer_lock mutex is introduced, which protects a bitwise flag
145 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
146 * routines and is checked by a dedicated thread, that executes the real
147 * timeout operation.
148 */
149static void
150aggr_port_timer_thread(void *arg)
151{
152	aggr_port_t		*port = arg;
153	aggr_lacp_port_t	*pl = &port->lp_lacp;
154	aggr_grp_t		*grp = port->lp_grp;
155	uint32_t		lacp_timer_bits;
156	mac_perim_handle_t	mph;
157	callb_cpr_t		cprinfo;
158
159	CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
160	    "aggr_port_timer_thread");
161
162	mutex_enter(&pl->lacp_timer_lock);
163
164	for (;;) {
165
166		if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
167			CALLB_CPR_SAFE_BEGIN(&cprinfo);
168			cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
169			CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
170			continue;
171		}
172		pl->lacp_timer_bits = 0;
173
174		if (lacp_timer_bits & LACP_THREAD_EXIT)
175			break;
176
177		if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
178			pl->periodic_timer.id = 0;
179		if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
180			pl->wait_while_timer.id = 0;
181		if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
182			pl->current_while_timer.id = 0;
183
184		mutex_exit(&pl->lacp_timer_lock);
185
186		mac_perim_enter_by_mh(grp->lg_mh, &mph);
187		if (port->lp_closing) {
188			mac_perim_exit(mph);
189			mutex_enter(&pl->lacp_timer_lock);
190			break;
191		}
192
193		if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
194			periodic_timer_pop_handler(port);
195		if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
196			wait_while_timer_pop_handler(port);
197		if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
198			current_while_timer_pop_handler(port);
199		mac_perim_exit(mph);
200
201		mutex_enter(&pl->lacp_timer_lock);
202		if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
203			break;
204	}
205
206	pl->lacp_timer_bits = 0;
207	pl->lacp_timer_thread = NULL;
208	cv_broadcast(&pl->lacp_timer_cv);
209
210	/* CALLB_CPR_EXIT drops the lock */
211	CALLB_CPR_EXIT(&cprinfo);
212
213	/*
214	 * Release the reference of the grp so aggr_grp_delete() can call
215	 * mac_unregister() safely.
216	 */
217	aggr_grp_port_rele(port);
218	thread_exit();
219}
220
221/*
222 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
223 * could not be performed due to a memory allocation error, B_TRUE otherwise.
224 */
225static boolean_t
226lacp_port_select(aggr_port_t *portp)
227{
228	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
229
230	if (!lacp_sel_ports_add(portp))
231		return (B_FALSE);
232	portp->lp_lacp.sm.selected = AGGR_SELECTED;
233	return (B_TRUE);
234}
235
236/*
237 * Set the port LACP state to UNSELECTED.
238 */
239static void
240lacp_port_unselect(aggr_port_t *portp)
241{
242	aggr_grp_t	*grp = portp->lp_grp;
243
244	ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
245
246	lacp_sel_ports_del(portp);
247	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
248}
249
250/*
251 * Initialize group specific LACP state and parameters.
252 */
253void
254aggr_lacp_init_grp(aggr_grp_t *aggrp)
255{
256	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
257	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
258	aggrp->aggr.CollectorMaxDelay = 10;
259	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
260	aggrp->aggr.ready = B_FALSE;
261}
262
263/*
264 * Complete LACP info initialization at port creation time.
265 */
266void
267aggr_lacp_init_port(aggr_port_t *portp)
268{
269	aggr_grp_t *aggrp = portp->lp_grp;
270	aggr_lacp_port_t *pl = &portp->lp_lacp;
271
272	ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
273	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
274
275	/* actor port # */
276	pl->ActorPortNumber = portp->lp_portid;
277	AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
278	    "ActorPortNumber = 0x%x\n", portp->lp_linkid,
279	    pl->ActorPortNumber));
280
281	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
282	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
283	pl->NTT = B_FALSE;			/* need to transmit */
284
285	pl->ActorAdminPortKey = aggrp->lg_key;
286	pl->ActorOperPortKey = pl->ActorAdminPortKey;
287	AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
288	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
289	    portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
290
291	/* Actor admin. port state */
292	pl->ActorAdminPortState.bit.activity = B_FALSE;
293	pl->ActorAdminPortState.bit.timeout = B_TRUE;
294	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
295	pl->ActorAdminPortState.bit.sync = B_FALSE;
296	pl->ActorAdminPortState.bit.collecting = B_FALSE;
297	pl->ActorAdminPortState.bit.distributing = B_FALSE;
298	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
299	pl->ActorAdminPortState.bit.expired = B_FALSE;
300	pl->ActorOperPortState = pl->ActorAdminPortState;
301
302	/*
303	 * Partner Administrative Information
304	 * (All initialized to zero except for the following)
305	 * Fast Timeouts.
306	 */
307	pl->PartnerAdminPortState.bit.timeout =
308	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
309
310	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
311
312	/*
313	 * State machine information.
314	 */
315	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
316	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
317	pl->sm.lacp_enabled = B_FALSE;
318	pl->sm.port_enabled = B_FALSE;		/* Link Down */
319	pl->sm.actor_churn = B_FALSE;
320	pl->sm.partner_churn = B_FALSE;
321	pl->sm.ready_n = B_FALSE;
322	pl->sm.port_moved = B_FALSE;
323
324	lacp_port_unselect(portp);
325
326	pl->sm.periodic_state = LACP_NO_PERIODIC;
327	pl->sm.receive_state = LACP_INITIALIZE;
328	pl->sm.mux_state = LACP_DETACHED;
329	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
330
331	/*
332	 * Timer information.
333	 */
334	pl->current_while_timer.id = 0;
335	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
336
337	pl->periodic_timer.id = 0;
338	pl->periodic_timer.val = FAST_PERIODIC_TIME;
339
340	pl->wait_while_timer.id = 0;
341	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
342
343	pl->lacp_timer_bits = 0;
344
345	mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
346	cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
347
348	pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
349	    portp, 0, &p0, TS_RUN, minclsyspri);
350
351	/*
352	 * Hold a reference of the grp and the port and this reference will
353	 * be release when the thread exits.
354	 *
355	 * The reference on the port is used for aggr_port_delete() to
356	 * continue without waiting for the thread to exit; the reference
357	 * on the grp is used for aggr_grp_delete() to wait for the thread
358	 * to exit before calling mac_unregister().
359	 */
360	aggr_grp_port_hold(portp);
361}
362
363/*
364 * Port initialization when we need to
365 * turn LACP on/off, etc. Not everything is
366 * reset like in the above routine.
367 *		Do NOT modify things like link status.
368 */
369static void
370lacp_reset_port(aggr_port_t *portp)
371{
372	aggr_lacp_port_t *pl = &portp->lp_lacp;
373
374	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
375
376	pl->NTT = B_FALSE;			/* need to transmit */
377
378	/* reset operational port state */
379	pl->ActorOperPortState.bit.timeout =
380	    pl->ActorAdminPortState.bit.timeout;
381
382	pl->ActorOperPortState.bit.sync = B_FALSE;
383	pl->ActorOperPortState.bit.collecting = B_FALSE;
384	pl->ActorOperPortState.bit.distributing = B_FALSE;
385	pl->ActorOperPortState.bit.defaulted = B_TRUE;
386	pl->ActorOperPortState.bit.expired = B_FALSE;
387
388	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
389	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
390
391	/*
392	 * State machine information.
393	 */
394	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
395	pl->sm.actor_churn = B_FALSE;
396	pl->sm.partner_churn = B_FALSE;
397	pl->sm.ready_n = B_FALSE;
398
399	lacp_port_unselect(portp);
400
401	pl->sm.periodic_state = LACP_NO_PERIODIC;
402	pl->sm.receive_state = LACP_INITIALIZE;
403	pl->sm.mux_state = LACP_DETACHED;
404	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
405
406	/*
407	 * Timer information.
408	 */
409	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
410	pl->periodic_timer.val = FAST_PERIODIC_TIME;
411}
412
413static void
414aggr_lacp_mcast_on(aggr_port_t *port)
415{
416	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
417	ASSERT(MAC_PERIM_HELD(port->lp_mh));
418
419	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
420		return;
421
422	(void) aggr_port_multicst(port, B_TRUE,
423	    (uchar_t *)&slow_multicast_addr);
424}
425
426static void
427aggr_lacp_mcast_off(aggr_port_t *port)
428{
429	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
430	ASSERT(MAC_PERIM_HELD(port->lp_mh));
431
432	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
433		return;
434
435	(void) aggr_port_multicst(port, B_FALSE,
436	    (uchar_t *)&slow_multicast_addr);
437}
438
439static void
440start_periodic_timer(aggr_port_t *portp)
441{
442	aggr_lacp_port_t *pl = &portp->lp_lacp;
443
444	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
445
446	mutex_enter(&pl->lacp_timer_lock);
447	if (pl->periodic_timer.id == 0) {
448		pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
449		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
450	}
451	mutex_exit(&pl->lacp_timer_lock);
452}
453
454static void
455stop_periodic_timer(aggr_port_t *portp)
456{
457	aggr_lacp_port_t *pl = &portp->lp_lacp;
458	timeout_id_t id;
459
460	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
461
462	mutex_enter(&pl->lacp_timer_lock);
463	if ((id = pl->periodic_timer.id) != 0) {
464		pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
465		pl->periodic_timer.id = 0;
466	}
467	mutex_exit(&pl->lacp_timer_lock);
468
469	if (id != 0)
470		(void) untimeout(id);
471}
472
473/*
474 * When the timer pops, we arrive here to
475 * clear out LACPDU count as well as transmit an
476 * LACPDU. We then set the periodic state and let
477 * the periodic state machine restart the timer.
478 */
479static void
480periodic_timer_pop(void *data)
481{
482	aggr_port_t *portp = data;
483	aggr_lacp_port_t *pl = &portp->lp_lacp;
484
485	mutex_enter(&pl->lacp_timer_lock);
486	pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
487	cv_broadcast(&pl->lacp_timer_cv);
488	mutex_exit(&pl->lacp_timer_lock);
489}
490
491/*
492 * When the timer pops, we arrive here to
493 * clear out LACPDU count as well as transmit an
494 * LACPDU. We then set the periodic state and let
495 * the periodic state machine restart the timer.
496 */
497static void
498periodic_timer_pop_handler(aggr_port_t *portp)
499{
500	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
501
502	portp->lp_lacp_stats.LACPDUsTx = 0;
503
504	/* current timestamp */
505	portp->lp_lacp.time = gethrtime();
506	portp->lp_lacp.NTT = B_TRUE;
507	lacp_xmit_sm(portp);
508
509	/*
510	 * Set Periodic State machine state based on the
511	 * value of the Partner Operation Port State timeout
512	 * bit.
513	 */
514	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
515		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
516		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
517	} else {
518		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
519		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
520	}
521
522	lacp_periodic_sm(portp);
523}
524
525/*
526 * Invoked from:
527 *	- startup upon aggregation
528 *	- when the periodic timer pops
529 *	- when the periodic timer value is changed
530 *	- when the port is attached or detached
531 *	- when LACP mode is changed.
532 */
533static void
534lacp_periodic_sm(aggr_port_t *portp)
535{
536	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
537	aggr_lacp_port_t *pl = &portp->lp_lacp;
538
539	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
540
541	/* LACP_OFF state not in specification so check here.  */
542	if (!pl->sm.lacp_on) {
543		/* Stop timer whether it is running or not */
544		stop_periodic_timer(portp);
545		pl->sm.periodic_state = LACP_NO_PERIODIC;
546		pl->NTT = B_FALSE;
547		AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
548		    "%s--->%s\n", portp->lp_linkid,
549		    lacp_periodic_str[oldstate],
550		    lacp_periodic_str[pl->sm.periodic_state]));
551		return;
552	}
553
554	if (pl->sm.begin || !pl->sm.lacp_enabled ||
555	    !pl->sm.port_enabled ||
556	    !pl->ActorOperPortState.bit.activity &&
557	    !pl->PartnerOperPortState.bit.activity) {
558
559		/* Stop timer whether it is running or not */
560		stop_periodic_timer(portp);
561		pl->sm.periodic_state = LACP_NO_PERIODIC;
562		pl->NTT = B_FALSE;
563		AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
564		    portp->lp_linkid, lacp_periodic_str[oldstate],
565		    lacp_periodic_str[pl->sm.periodic_state]));
566		return;
567	}
568
569	/*
570	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
571	 * has been received. Then after we timeout, then it is
572	 * possible to go to SLOW_PERIODIC_TIME.
573	 */
574	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
575		pl->periodic_timer.val = FAST_PERIODIC_TIME;
576		pl->sm.periodic_state = LACP_FAST_PERIODIC;
577	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
578	    pl->PartnerOperPortState.bit.timeout) {
579		/*
580		 * If we receive a bit indicating we are going to
581		 * fast periodic from slow periodic, stop the timer
582		 * and let the periodic_timer_pop routine deal
583		 * with reseting the periodic state and transmitting
584		 * a LACPDU.
585		 */
586		stop_periodic_timer(portp);
587		periodic_timer_pop_handler(portp);
588	}
589
590	/* Rearm timer with value provided by partner */
591	start_periodic_timer(portp);
592}
593
594/*
595 * This routine transmits an LACPDU if lacp_enabled
596 * is TRUE and if NTT is set.
597 */
598static void
599lacp_xmit_sm(aggr_port_t *portp)
600{
601	aggr_lacp_port_t *pl = &portp->lp_lacp;
602	size_t	len;
603	mblk_t  *mp;
604	hrtime_t now, elapsed;
605
606	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
607
608	/* LACP_OFF state not in specification so check here.  */
609	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
610		return;
611
612	/*
613	 * Do nothing if LACP has been turned off or if the
614	 * periodic state machine is not enabled.
615	 */
616	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
617	    !pl->sm.lacp_enabled || pl->sm.begin) {
618		pl->NTT = B_FALSE;
619		return;
620	}
621
622	/*
623	 * If we have sent 5 Slow packets in the last second, avoid
624	 * sending any more here. No more than three LACPDUs may be transmitted
625	 * in any Fast_Periodic_Time interval.
626	 */
627	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
628		/*
629		 * Grab the current time value and see if
630		 * more than 1 second has passed. If so,
631		 * reset the timestamp and clear the count.
632		 */
633		now = gethrtime();
634		elapsed = now - pl->time;
635		if (elapsed > NSECS_PER_SEC) {
636			portp->lp_lacp_stats.LACPDUsTx = 0;
637			pl->time = now;
638		} else {
639			return;
640		}
641	}
642
643	len = sizeof (lacp_t) + sizeof (struct ether_header);
644	mp = allocb(len, BPRI_MED);
645	if (mp == NULL)
646		return;
647
648	mp->b_wptr = mp->b_rptr + len;
649	bzero(mp->b_rptr, len);
650
651	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
652	fill_lacp_pdu(portp,
653	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
654
655	/* Send the packet over the first TX ring */
656	mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
657	if (mp != NULL)
658		freemsg(mp);
659
660	pl->NTT = B_FALSE;
661	portp->lp_lacp_stats.LACPDUsTx++;
662}
663
664/*
665 * Initialize the ethernet header of a LACP packet sent from the specified
666 * port.
667 */
668static void
669fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
670{
671	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
672	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
673	    ETHERADDRL);
674	ether->ether_type = htons(ETHERTYPE_SLOW);
675}
676
677static void
678fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
679{
680	aggr_lacp_port_t *pl = &portp->lp_lacp;
681	aggr_grp_t *aggrp = portp->lp_grp;
682	mac_perim_handle_t pmph;
683
684	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
685	mac_perim_enter_by_mh(portp->lp_mh, &pmph);
686
687	lacp->subtype = LACP_SUBTYPE;
688	lacp->version = LACP_VERSION;
689
690	/*
691	 * Actor Information
692	 */
693	lacp->actor_info.tlv_type = ACTOR_TLV;
694	lacp->actor_info.information_len = sizeof (link_info_t);
695	lacp->actor_info.system_priority =
696	    htons(aggrp->aggr.ActorSystemPriority);
697	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
698	    ETHERADDRL);
699	lacp->actor_info.key = htons(pl->ActorOperPortKey);
700	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
701	lacp->actor_info.port = htons(pl->ActorPortNumber);
702	lacp->actor_info.state.state = pl->ActorOperPortState.state;
703
704	/*
705	 * Partner Information
706	 */
707	lacp->partner_info.tlv_type = PARTNER_TLV;
708	lacp->partner_info.information_len = sizeof (link_info_t);
709	lacp->partner_info.system_priority =
710	    htons(pl->PartnerOperSysPriority);
711	lacp->partner_info.system_id = pl->PartnerOperSystem;
712	lacp->partner_info.key = htons(pl->PartnerOperKey);
713	lacp->partner_info.port_priority =
714	    htons(pl->PartnerOperPortPriority);
715	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
716	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
717
718	/* Collector Information */
719	lacp->tlv_collector = COLLECTOR_TLV;
720	lacp->collector_len = 0x10;
721	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
722
723	/* Termination Information */
724	lacp->tlv_terminator = TERMINATOR_TLV;
725	lacp->terminator_len = 0x0;
726
727	mac_perim_exit(pmph);
728}
729
730/*
731 * lacp_mux_sm - LACP mux state machine
732 *		This state machine is invoked from:
733 *			- startup upon aggregation
734 *			- from the Selection logic
735 *			- when the wait_while_timer pops
736 *			- when the aggregation MAC address is changed
737 *			- when receiving DL_NOTE_LINK_UP/DOWN
738 *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
739 *			- when LACP mode is changed.
740 *			- when a DL_NOTE_SPEED is received
741 */
742static void
743lacp_mux_sm(aggr_port_t *portp)
744{
745	aggr_grp_t *aggrp = portp->lp_grp;
746	boolean_t NTT_updated = B_FALSE;
747	aggr_lacp_port_t *pl = &portp->lp_lacp;
748	lacp_mux_state_t oldstate = pl->sm.mux_state;
749
750	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
751
752	/* LACP_OFF state not in specification so check here.  */
753	if (!pl->sm.lacp_on) {
754		pl->sm.mux_state = LACP_DETACHED;
755		pl->ActorOperPortState.bit.sync = B_FALSE;
756
757		if (pl->ActorOperPortState.bit.collecting ||
758		    pl->ActorOperPortState.bit.distributing) {
759			AGGR_LACP_DBG(("trunk link: (%d): "
760			    "Collector_Distributor Disabled.\n",
761			    portp->lp_linkid));
762		}
763
764		pl->ActorOperPortState.bit.collecting =
765		    pl->ActorOperPortState.bit.distributing = B_FALSE;
766		return;
767	}
768
769	if (pl->sm.begin || !pl->sm.lacp_enabled)
770		pl->sm.mux_state = LACP_DETACHED;
771
772again:
773	/* determine next state, or return if state unchanged */
774	switch (pl->sm.mux_state) {
775	case LACP_DETACHED:
776		if (pl->sm.begin) {
777			break;
778		}
779
780		if ((pl->sm.selected == AGGR_SELECTED) ||
781		    (pl->sm.selected == AGGR_STANDBY)) {
782			pl->sm.mux_state = LACP_WAITING;
783			break;
784		}
785		return;
786
787	case LACP_WAITING:
788		if (pl->sm.selected == AGGR_UNSELECTED) {
789			pl->sm.mux_state = LACP_DETACHED;
790			break;
791		}
792
793		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
794			pl->sm.mux_state = LACP_ATTACHED;
795			break;
796		}
797		return;
798
799	case LACP_ATTACHED:
800		if ((pl->sm.selected == AGGR_UNSELECTED) ||
801		    (pl->sm.selected == AGGR_STANDBY)) {
802			pl->sm.mux_state = LACP_DETACHED;
803			break;
804		}
805
806		if ((pl->sm.selected == AGGR_SELECTED) &&
807		    pl->PartnerOperPortState.bit.sync) {
808			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
809			break;
810		}
811		return;
812
813	case LACP_COLLECTING_DISTRIBUTING:
814		if ((pl->sm.selected == AGGR_UNSELECTED) ||
815		    (pl->sm.selected == AGGR_STANDBY) ||
816		    !pl->PartnerOperPortState.bit.sync) {
817			pl->sm.mux_state = LACP_ATTACHED;
818			break;
819		}
820		return;
821	}
822
823	AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
824	    portp->lp_linkid, lacp_mux_str[oldstate],
825	    lacp_mux_str[pl->sm.mux_state]));
826
827	/* perform actions on entering a new state */
828	switch (pl->sm.mux_state) {
829	case LACP_DETACHED:
830		if (pl->ActorOperPortState.bit.collecting ||
831		    pl->ActorOperPortState.bit.distributing) {
832			AGGR_LACP_DBG(("trunk link: (%d): "
833			    "Collector_Distributor Disabled.\n",
834			    portp->lp_linkid));
835		}
836
837		pl->ActorOperPortState.bit.sync =
838		    pl->ActorOperPortState.bit.collecting = B_FALSE;
839
840		/* Turn OFF Collector_Distributor */
841		aggr_set_coll_dist(portp, B_FALSE);
842
843		pl->ActorOperPortState.bit.distributing = B_FALSE;
844		NTT_updated = B_TRUE;
845		break;
846
847	case LACP_WAITING:
848		start_wait_while_timer(portp);
849		break;
850
851	case LACP_ATTACHED:
852		if (pl->ActorOperPortState.bit.collecting ||
853		    pl->ActorOperPortState.bit.distributing) {
854			AGGR_LACP_DBG(("trunk link: (%d): "
855			    "Collector_Distributor Disabled.\n",
856			    portp->lp_linkid));
857		}
858
859		pl->ActorOperPortState.bit.sync = B_TRUE;
860		pl->ActorOperPortState.bit.collecting = B_FALSE;
861
862		/* Turn OFF Collector_Distributor */
863		aggr_set_coll_dist(portp, B_FALSE);
864
865		pl->ActorOperPortState.bit.distributing = B_FALSE;
866		NTT_updated = B_TRUE;
867		if (pl->PartnerOperPortState.bit.sync) {
868			/*
869			 * We had already received an updated sync from
870			 * the partner. Attempt to transition to
871			 * collecting/distributing now.
872			 */
873			goto again;
874		}
875		break;
876
877	case LACP_COLLECTING_DISTRIBUTING:
878		if (!pl->ActorOperPortState.bit.collecting &&
879		    !pl->ActorOperPortState.bit.distributing) {
880			AGGR_LACP_DBG(("trunk link: (%d): "
881			    "Collector_Distributor Enabled.\n",
882			    portp->lp_linkid));
883		}
884		pl->ActorOperPortState.bit.distributing = B_TRUE;
885
886		/* Turn Collector_Distributor back ON */
887		aggr_set_coll_dist(portp, B_TRUE);
888
889		pl->ActorOperPortState.bit.collecting = B_TRUE;
890		NTT_updated = B_TRUE;
891		break;
892	}
893
894	/*
895	 * If we updated the state of the NTT variable, then
896	 * initiate a LACPDU transmission.
897	 */
898	if (NTT_updated) {
899		pl->NTT = B_TRUE;
900		lacp_xmit_sm(portp);
901	}
902} /* lacp_mux_sm */
903
904
905static int
906receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
907{
908	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
909
910	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
911
912	AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
913	    portp->lp_linkid));
914
915	/* LACP_OFF state not in specification so check here.  */
916	if (!portp->lp_lacp.sm.lacp_on)
917		return (-1);
918
919	if (MBLKL(mp) < sizeof (marker_pdu_t))
920		return (-1);
921
922	if (markerp->version != MARKER_VERSION) {
923		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
924		    "version = %d does not match s/w version %d\n",
925		    portp->lp_linkid, markerp->version, MARKER_VERSION));
926		return (-1);
927	}
928
929	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
930		/* We do not yet send out MARKER info PDUs */
931		AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
932		    " MARKER TLV = %d - We don't send out info type!\n",
933		    portp->lp_linkid, markerp->tlv_marker));
934		return (-1);
935	}
936
937	if (markerp->tlv_marker != MARKER_INFO_TLV) {
938		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
939		    " MARKER TLV = %d \n", portp->lp_linkid,
940		    markerp->tlv_marker));
941		return (-1);
942	}
943
944	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
945		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
946		    " MARKER length = %d \n", portp->lp_linkid,
947		    markerp->marker_len));
948		return (-1);
949	}
950
951	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
952		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
953		    " MARKER Port %d not equal to Partner port %d\n",
954		    portp->lp_linkid, markerp->requestor_port,
955		    portp->lp_lacp.PartnerOperPortNum));
956		return (-1);
957	}
958
959	if (ether_cmp(&markerp->system_id,
960	    &portp->lp_lacp.PartnerOperSystem) != 0) {
961		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
962		    " MARKER MAC not equal to Partner MAC\n",
963		    portp->lp_linkid));
964		return (-1);
965	}
966
967	/*
968	 * Turn into Marker Response PDU
969	 * and return mblk to sending system
970	 */
971	markerp->tlv_marker = MARKER_RESPONSE_TLV;
972
973	/* reuse the space that was used by received ethernet header */
974	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
975	mp->b_rptr -= sizeof (struct ether_header);
976	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
977	return (0);
978}
979
980/*
981 * Update the LACP mode (off, active, or passive) of the specified group.
982 */
983void
984aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
985{
986	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
987	aggr_port_t *port;
988
989	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
990	ASSERT(!grp->lg_closing);
991
992	if (mode == old_mode)
993		return;
994
995	grp->lg_lacp_mode = mode;
996
997	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
998		port->lp_lacp.ActorAdminPortState.bit.activity =
999		    port->lp_lacp.ActorOperPortState.bit.activity =
1000		    (mode == AGGR_LACP_ACTIVE);
1001
1002		if (old_mode == AGGR_LACP_OFF) {
1003			/* OFF -> {PASSIVE,ACTIVE} */
1004			/* turn OFF Collector_Distributor */
1005			aggr_set_coll_dist(port, B_FALSE);
1006			lacp_on(port);
1007		} else if (mode == AGGR_LACP_OFF) {
1008			/* {PASSIVE,ACTIVE} -> OFF */
1009			lacp_off(port);
1010			/* Turn ON Collector_Distributor */
1011			aggr_set_coll_dist(port, B_TRUE);
1012		} else {
1013			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1014			port->lp_lacp.sm.begin = B_TRUE;
1015			lacp_mux_sm(port);
1016			lacp_periodic_sm(port);
1017
1018			/* kick off state machines */
1019			lacp_receive_sm(port, NULL);
1020			lacp_mux_sm(port);
1021		}
1022	}
1023}
1024
1025
1026/*
1027 * Update the LACP timer (short or long) of the specified group.
1028 */
1029void
1030aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1031{
1032	aggr_port_t *port;
1033
1034	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1035
1036	if (timer == grp->aggr.PeriodicTimer)
1037		return;
1038
1039	grp->aggr.PeriodicTimer = timer;
1040
1041	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1042		port->lp_lacp.ActorAdminPortState.bit.timeout =
1043		    port->lp_lacp.ActorOperPortState.bit.timeout =
1044		    (timer == AGGR_LACP_TIMER_SHORT);
1045	}
1046}
1047
1048void
1049aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1050{
1051	aggr_lacp_mode_t	mode;
1052	aggr_lacp_timer_t	timer;
1053
1054	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1055
1056	mode = grp->lg_lacp_mode;
1057	timer = grp->aggr.PeriodicTimer;
1058
1059	port->lp_lacp.ActorAdminPortState.bit.activity =
1060	    port->lp_lacp.ActorOperPortState.bit.activity =
1061	    (mode == AGGR_LACP_ACTIVE);
1062
1063	port->lp_lacp.ActorAdminPortState.bit.timeout =
1064	    port->lp_lacp.ActorOperPortState.bit.timeout =
1065	    (timer == AGGR_LACP_TIMER_SHORT);
1066
1067	if (mode == AGGR_LACP_OFF) {
1068		/* Turn ON Collector_Distributor */
1069		aggr_set_coll_dist(port, B_TRUE);
1070	} else { /* LACP_ACTIVE/PASSIVE */
1071		lacp_on(port);
1072	}
1073}
1074
1075/*
1076 * Sets the initial LACP mode (off, active, passive) and LACP timer
1077 * (short, long) of the specified group.
1078 */
1079void
1080aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1081    aggr_lacp_timer_t timer)
1082{
1083	aggr_port_t *port;
1084
1085	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1086
1087	grp->lg_lacp_mode = mode;
1088	grp->aggr.PeriodicTimer = timer;
1089
1090	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1091		aggr_port_lacp_set_mode(grp, port);
1092}
1093
1094/*
1095 * Verify that the Partner MAC and Key recorded by the specified
1096 * port are not found in other ports that are not part of our
1097 * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1098 * otherwise.
1099 */
1100static boolean_t
1101lacp_misconfig_check(aggr_port_t *portp)
1102{
1103	aggr_grp_t *grp = portp->lp_grp;
1104	lacp_sel_ports_t *cport;
1105
1106	mutex_enter(&lacp_sel_lock);
1107
1108	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1109
1110		/* skip entries of the group of the port being checked */
1111		if (cport->sp_grp_linkid == grp->lg_linkid)
1112			continue;
1113
1114		if ((ether_cmp(&cport->sp_partner_system,
1115		    &grp->aggr.PartnerSystem) == 0) &&
1116		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1117			char mac_str[ETHERADDRL*3];
1118			struct ether_addr *mac = &cport->sp_partner_system;
1119
1120			/*
1121			 * The Partner port information is already in use
1122			 * by ports in another aggregation so disable this
1123			 * port.
1124			 */
1125
1126			(void) snprintf(mac_str, sizeof (mac_str),
1127			    "%x:%x:%x:%x:%x:%x",
1128			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1129			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1130			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1131
1132			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1133
1134			cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1135			    "MAC %s and key %d in use on aggregation %d "
1136			    "port %d\n", grp->lg_linkid, portp->lp_linkid,
1137			    mac_str, portp->lp_lacp.PartnerOperKey,
1138			    cport->sp_grp_linkid, cport->sp_linkid);
1139			break;
1140		}
1141	}
1142
1143	mutex_exit(&lacp_sel_lock);
1144	return (cport != NULL);
1145}
1146
1147/*
1148 * Remove the specified port from the list of selected ports.
1149 */
1150static void
1151lacp_sel_ports_del(aggr_port_t *portp)
1152{
1153	lacp_sel_ports_t *cport, **prev = NULL;
1154
1155	mutex_enter(&lacp_sel_lock);
1156
1157	prev = &sel_ports;
1158	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1159	    cport = cport->sp_next) {
1160		if (portp->lp_linkid == cport->sp_linkid)
1161			break;
1162	}
1163
1164	if (cport == NULL) {
1165		mutex_exit(&lacp_sel_lock);
1166		return;
1167	}
1168
1169	*prev = cport->sp_next;
1170	kmem_free(cport, sizeof (*cport));
1171
1172	mutex_exit(&lacp_sel_lock);
1173}
1174
1175/*
1176 * Add the specified port to the list of selected ports. Returns B_FALSE
1177 * if the operation could not be performed due to an memory allocation
1178 * error.
1179 */
1180static boolean_t
1181lacp_sel_ports_add(aggr_port_t *portp)
1182{
1183	lacp_sel_ports_t *new_port;
1184	lacp_sel_ports_t *cport, **last;
1185
1186	mutex_enter(&lacp_sel_lock);
1187
1188	/* check if port is already in the list */
1189	last = &sel_ports;
1190	for (cport = sel_ports; cport != NULL;
1191	    last = &cport->sp_next, cport = cport->sp_next) {
1192		if (portp->lp_linkid == cport->sp_linkid) {
1193			ASSERT(cport->sp_partner_key ==
1194			    portp->lp_lacp.PartnerOperKey);
1195			ASSERT(ether_cmp(&cport->sp_partner_system,
1196			    &portp->lp_lacp.PartnerOperSystem) == 0);
1197
1198			mutex_exit(&lacp_sel_lock);
1199			return (B_TRUE);
1200		}
1201	}
1202
1203	/* create and initialize new entry */
1204	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1205	if (new_port == NULL) {
1206		mutex_exit(&lacp_sel_lock);
1207		return (B_FALSE);
1208	}
1209
1210	new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1211	bcopy(&portp->lp_lacp.PartnerOperSystem,
1212	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1213	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1214	new_port->sp_linkid = portp->lp_linkid;
1215
1216	*last = new_port;
1217
1218	mutex_exit(&lacp_sel_lock);
1219	return (B_TRUE);
1220}
1221
1222/*
1223 * lacp_selection_logic - LACP selection logic
1224 *		Sets the selected variable on a per port basis
1225 *		and sets Ready when all waiting ports are ready
1226 *		to go online.
1227 *
1228 * parameters:
1229 *      - portp - instance this applies to.
1230 *
1231 * invoked:
1232 *    - when initialization is needed
1233 *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1234 *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1235 *    - every time the wait_while_timer pops
1236 *    - everytime we turn LACP on/off
1237 */
1238static void
1239lacp_selection_logic(aggr_port_t *portp)
1240{
1241	aggr_port_t *tpp;
1242	aggr_grp_t *aggrp = portp->lp_grp;
1243	int ports_waiting;
1244	boolean_t reset_mac = B_FALSE;
1245	aggr_lacp_port_t *pl = &portp->lp_lacp;
1246
1247	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1248
1249	/* LACP_OFF state not in specification so check here.  */
1250	if (!pl->sm.lacp_on) {
1251		lacp_port_unselect(portp);
1252		aggrp->aggr.ready = B_FALSE;
1253		lacp_mux_sm(portp);
1254		return;
1255	}
1256
1257	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1258	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1259
1260		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1261		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1262		    "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1263		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1264		    portp->lp_state));
1265
1266		lacp_port_unselect(portp);
1267		aggrp->aggr.ready = B_FALSE;
1268		lacp_mux_sm(portp);
1269		return;
1270	}
1271
1272	/*
1273	 * If LACP is not enabled then selected is never set.
1274	 */
1275	if (!pl->sm.lacp_enabled) {
1276		AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1277		    portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1278
1279		lacp_port_unselect(portp);
1280		lacp_mux_sm(portp);
1281		return;
1282	}
1283
1284	/*
1285	 * Check if the Partner MAC or Key are zero. If so, we have
1286	 * not received any LACP info or it has expired and the
1287	 * receive machine is in the LACP_DEFAULTED state.
1288	 */
1289	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1290	    (pl->PartnerOperKey == 0)) {
1291
1292		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1293			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1294			    &etherzeroaddr) != 0 &&
1295			    (tpp->lp_lacp.PartnerOperKey != 0))
1296				break;
1297		}
1298
1299		/*
1300		 * If all ports have no key or aggregation address,
1301		 * then clear the negotiated Partner MAC and key.
1302		 */
1303		if (tpp == NULL) {
1304			/* Clear the aggregation Partner MAC and key */
1305			aggrp->aggr.PartnerSystem = etherzeroaddr;
1306			aggrp->aggr.PartnerOperAggrKey = 0;
1307		}
1308
1309		return;
1310	}
1311
1312	/*
1313	 * Insure that at least one port in the aggregation
1314	 * matches the Partner aggregation MAC and key. If not,
1315	 * then clear the aggregation MAC and key. Later we will
1316	 * set the Partner aggregation MAC and key to that of the
1317	 * current port's Partner MAC and key.
1318	 */
1319	if (ether_cmp(&pl->PartnerOperSystem,
1320	    &aggrp->aggr.PartnerSystem) != 0 ||
1321	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1322
1323		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1324			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1325			    &aggrp->aggr.PartnerSystem) == 0 &&
1326			    (tpp->lp_lacp.PartnerOperKey ==
1327			    aggrp->aggr.PartnerOperAggrKey)) {
1328				/* Set aggregation Partner MAC and key */
1329				aggrp->aggr.PartnerSystem =
1330				    pl->PartnerOperSystem;
1331				aggrp->aggr.PartnerOperAggrKey =
1332				    pl->PartnerOperKey;
1333				break;
1334			}
1335		}
1336
1337		if (tpp == NULL) {
1338			/* Clear the aggregation Partner MAC and key */
1339			aggrp->aggr.PartnerSystem = etherzeroaddr;
1340			aggrp->aggr.PartnerOperAggrKey = 0;
1341			reset_mac = B_TRUE;
1342		}
1343	}
1344
1345	/*
1346	 * If our Actor MAC is found in the Partner MAC
1347	 * on this port then we have a loopback misconfiguration.
1348	 */
1349	if (ether_cmp(&pl->PartnerOperSystem,
1350	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1351		cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1352		    portp->lp_linkid);
1353
1354		lacp_port_unselect(portp);
1355		lacp_mux_sm(portp);
1356		return;
1357	}
1358
1359	/*
1360	 * If our Partner MAC and Key are found on any other
1361	 * ports that are not in our aggregation, we have
1362	 * a misconfiguration.
1363	 */
1364	if (lacp_misconfig_check(portp)) {
1365		lacp_mux_sm(portp);
1366		return;
1367	}
1368
1369	/*
1370	 * If the Aggregation Partner MAC and Key have not been
1371	 * set, then this is either the first port or the aggregation
1372	 * MAC and key have been reset. In either case we must set
1373	 * the values of the Partner MAC and key.
1374	 */
1375	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1376	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1377		/* Set aggregation Partner MAC and key */
1378		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1379		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1380
1381		/*
1382		 * If we reset Partner aggregation MAC, then restart
1383		 * selection_logic on ports that match new MAC address.
1384		 */
1385		if (reset_mac) {
1386			for (tpp = aggrp->lg_ports; tpp; tpp =
1387			    tpp->lp_next) {
1388				if (tpp == portp)
1389					continue;
1390				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1391				    &aggrp->aggr.PartnerSystem) == 0 &&
1392				    (tpp->lp_lacp.PartnerOperKey ==
1393				    aggrp->aggr.PartnerOperAggrKey))
1394					lacp_selection_logic(tpp);
1395			}
1396		}
1397	} else if (ether_cmp(&pl->PartnerOperSystem,
1398	    &aggrp->aggr.PartnerSystem) != 0 ||
1399	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1400		/*
1401		 * The Partner port information does not match
1402		 * that of the other ports in the aggregation
1403		 * so disable this port.
1404		 */
1405		lacp_port_unselect(portp);
1406
1407		cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1408		    "or key (%d) incompatible with Aggregation Partner "
1409		    "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1410		    aggrp->aggr.PartnerOperAggrKey);
1411
1412		lacp_mux_sm(portp);
1413		return;
1414	}
1415
1416	/* If we get to here, automatically set selected */
1417	if (pl->sm.selected != AGGR_SELECTED) {
1418		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1419		    "selected %d-->%d\n", portp->lp_linkid,
1420		    pl->sm.selected, AGGR_SELECTED));
1421		if (!lacp_port_select(portp))
1422			return;
1423		lacp_mux_sm(portp);
1424	}
1425
1426	/*
1427	 * From this point onward we have selected the port
1428	 * and are simply checking if the Ready flag should
1429	 * be set.
1430	 */
1431
1432	/*
1433	 * If at least two ports are waiting to aggregate
1434	 * and ready_n is set on all ports waiting to aggregate
1435	 * then set READY for the aggregation.
1436	 */
1437
1438	ports_waiting = 0;
1439
1440	if (!aggrp->aggr.ready) {
1441		/*
1442		 * If all ports in the aggregation have received compatible
1443		 * partner information and they match up correctly with the
1444		 * switch, there is no need to wait for all the
1445		 * wait_while_timers to pop.
1446		 */
1447		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1448			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1449			    tpp->lp_lacp.sm.begin) &&
1450			    !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1451				/* Add up ports uninitialized or waiting */
1452				ports_waiting++;
1453				if (!tpp->lp_lacp.sm.ready_n) {
1454					DTRACE_PROBE1(port___not__ready,
1455					    aggr_port_t *, tpp);
1456					return;
1457				}
1458			}
1459		}
1460	}
1461
1462	if (aggrp->aggr.ready) {
1463		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1464		    "aggr.ready already set\n", portp->lp_linkid));
1465		lacp_mux_sm(portp);
1466	} else {
1467		AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1468		    portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1469		aggrp->aggr.ready = B_TRUE;
1470
1471		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1472			lacp_mux_sm(tpp);
1473	}
1474
1475}
1476
1477/*
1478 * wait_while_timer_pop - When the timer pops, we arrive here to
1479 *			set ready_n and trigger the selection logic.
1480 */
1481static void
1482wait_while_timer_pop(void *data)
1483{
1484	aggr_port_t *portp = data;
1485	aggr_lacp_port_t *pl = &portp->lp_lacp;
1486
1487	mutex_enter(&pl->lacp_timer_lock);
1488	pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1489	cv_broadcast(&pl->lacp_timer_cv);
1490	mutex_exit(&pl->lacp_timer_lock);
1491}
1492
1493/*
1494 * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1495 *			set ready_n and trigger the selection logic.
1496 */
1497static void
1498wait_while_timer_pop_handler(aggr_port_t *portp)
1499{
1500	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1501
1502	AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1503	    portp->lp_linkid));
1504	portp->lp_lacp.sm.ready_n = B_TRUE;
1505
1506	lacp_selection_logic(portp);
1507}
1508
1509static void
1510start_wait_while_timer(aggr_port_t *portp)
1511{
1512	aggr_lacp_port_t *pl = &portp->lp_lacp;
1513
1514	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1515
1516	mutex_enter(&pl->lacp_timer_lock);
1517	if (pl->wait_while_timer.id == 0) {
1518		pl->wait_while_timer.id =
1519		    timeout(wait_while_timer_pop, portp,
1520		    drv_usectohz(1000000 *
1521		    portp->lp_lacp.wait_while_timer.val));
1522	}
1523	mutex_exit(&pl->lacp_timer_lock);
1524}
1525
1526
1527static void
1528stop_wait_while_timer(aggr_port_t *portp)
1529{
1530	aggr_lacp_port_t *pl = &portp->lp_lacp;
1531	timeout_id_t id;
1532
1533	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1534
1535	mutex_enter(&pl->lacp_timer_lock);
1536	if ((id = pl->wait_while_timer.id) != 0) {
1537		pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1538		pl->wait_while_timer.id = 0;
1539	}
1540	mutex_exit(&pl->lacp_timer_lock);
1541
1542	if (id != 0)
1543		(void) untimeout(id);
1544}
1545
1546/*
1547 * Invoked when a port has been attached to a group.
1548 * Complete the processing that couldn't be finished from lacp_on()
1549 * because the port was not started. We know that the link is full
1550 * duplex and ON, otherwise it wouldn't be attached.
1551 */
1552void
1553aggr_lacp_port_attached(aggr_port_t *portp)
1554{
1555	aggr_grp_t *grp = portp->lp_grp;
1556	aggr_lacp_port_t *pl = &portp->lp_lacp;
1557
1558	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1559	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1560	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1561
1562	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1563	    portp->lp_linkid));
1564
1565	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1566
1567	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1568		return;
1569
1570	pl->sm.lacp_enabled = B_TRUE;
1571	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1572	pl->sm.begin = B_TRUE;
1573
1574	lacp_receive_sm(portp, NULL);
1575	lacp_mux_sm(portp);
1576
1577	/* Enable Multicast Slow Protocol address */
1578	aggr_lacp_mcast_on(portp);
1579
1580	/* periodic_sm is started up from the receive machine */
1581	lacp_selection_logic(portp);
1582}
1583
1584/*
1585 * Invoked when a port has been detached from a group. Turn off
1586 * LACP processing if it was enabled.
1587 */
1588void
1589aggr_lacp_port_detached(aggr_port_t *portp)
1590{
1591	aggr_grp_t *grp = portp->lp_grp;
1592
1593	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1594	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1595
1596	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1597	    portp->lp_linkid));
1598
1599	portp->lp_lacp.sm.port_enabled = B_FALSE;
1600
1601	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1602		return;
1603
1604	portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1605	lacp_selection_logic(portp);
1606	lacp_mux_sm(portp);
1607	lacp_periodic_sm(portp);
1608
1609	/*
1610	 * Disable Slow Protocol Timers.
1611	 */
1612	stop_periodic_timer(portp);
1613	stop_current_while_timer(portp);
1614	stop_wait_while_timer(portp);
1615
1616	/* Disable Multicast Slow Protocol address */
1617	aggr_lacp_mcast_off(portp);
1618	aggr_set_coll_dist(portp, B_FALSE);
1619}
1620
1621/*
1622 * Enable Slow Protocol LACP and Marker PDUs.
1623 */
1624static void
1625lacp_on(aggr_port_t *portp)
1626{
1627	aggr_lacp_port_t *pl = &portp->lp_lacp;
1628	mac_perim_handle_t mph;
1629
1630	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1631
1632	mac_perim_enter_by_mh(portp->lp_mh, &mph);
1633
1634	/*
1635	 * Reset the state machines and Partner operational
1636	 * information. Careful to not reset things like
1637	 * our link state.
1638	 */
1639	lacp_reset_port(portp);
1640	pl->sm.lacp_on = B_TRUE;
1641
1642	AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1643
1644	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1645		pl->sm.port_enabled = B_TRUE;
1646		pl->sm.lacp_enabled = B_TRUE;
1647		pl->ActorOperPortState.bit.aggregation = B_TRUE;
1648	}
1649
1650	lacp_receive_sm(portp, NULL);
1651	lacp_mux_sm(portp);
1652
1653	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1654		/* Enable Multicast Slow Protocol address */
1655		aggr_lacp_mcast_on(portp);
1656
1657		/* periodic_sm is started up from the receive machine */
1658		lacp_selection_logic(portp);
1659	}
1660done:
1661	mac_perim_exit(mph);
1662} /* lacp_on */
1663
1664/* Disable Slow Protocol LACP and Marker PDUs */
1665static void
1666lacp_off(aggr_port_t *portp)
1667{
1668	aggr_lacp_port_t *pl = &portp->lp_lacp;
1669	mac_perim_handle_t mph;
1670
1671	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1672	mac_perim_enter_by_mh(portp->lp_mh, &mph);
1673
1674	pl->sm.lacp_on = B_FALSE;
1675
1676	AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1677
1678	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1679		/*
1680		 * Disable Slow Protocol Timers.
1681		 */
1682		stop_periodic_timer(portp);
1683		stop_current_while_timer(portp);
1684		stop_wait_while_timer(portp);
1685
1686		/* Disable Multicast Slow Protocol address */
1687		aggr_lacp_mcast_off(portp);
1688
1689		pl->sm.port_enabled = B_FALSE;
1690		pl->sm.lacp_enabled = B_FALSE;
1691		pl->ActorOperPortState.bit.aggregation = B_FALSE;
1692	}
1693
1694	lacp_mux_sm(portp);
1695	lacp_periodic_sm(portp);
1696	lacp_selection_logic(portp);
1697
1698	/* Turn OFF Collector_Distributor */
1699	aggr_set_coll_dist(portp, B_FALSE);
1700
1701	lacp_reset_port(portp);
1702	mac_perim_exit(mph);
1703}
1704
1705
1706static boolean_t
1707valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1708{
1709	/*
1710	 * 43.4.12 - "a Receive machine shall not validate
1711	 * the Version Number, TLV_type, or Reserved fields in received
1712	 * LACPDUs."
1713	 * ... "a Receive machine may validate the Actor_Information_Length,
1714	 * Partner_Information_Length, Collector_Information_Length,
1715	 * or Terminator_Length fields."
1716	 */
1717	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1718	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1719	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1720	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1721		AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1722		    " Terminator Length = %d \n", portp->lp_linkid,
1723		    lacp->terminator_len));
1724		return (B_FALSE);
1725	}
1726
1727	return (B_TRUE);
1728}
1729
1730
1731static void
1732start_current_while_timer(aggr_port_t *portp, uint_t time)
1733{
1734	aggr_lacp_port_t *pl = &portp->lp_lacp;
1735
1736	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1737
1738	mutex_enter(&pl->lacp_timer_lock);
1739	if (pl->current_while_timer.id == 0) {
1740		if (time > 0)
1741			pl->current_while_timer.val = time;
1742		else if (pl->ActorOperPortState.bit.timeout)
1743			pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1744		else
1745			pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1746
1747		pl->current_while_timer.id =
1748		    timeout(current_while_timer_pop, portp,
1749		    drv_usectohz((clock_t)1000000 *
1750		    (clock_t)portp->lp_lacp.current_while_timer.val));
1751	}
1752	mutex_exit(&pl->lacp_timer_lock);
1753}
1754
1755
1756static void
1757stop_current_while_timer(aggr_port_t *portp)
1758{
1759	aggr_lacp_port_t *pl = &portp->lp_lacp;
1760	timeout_id_t id;
1761
1762	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1763
1764	mutex_enter(&pl->lacp_timer_lock);
1765	if ((id = pl->current_while_timer.id) != 0) {
1766		pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1767		pl->current_while_timer.id = 0;
1768	}
1769	mutex_exit(&pl->lacp_timer_lock);
1770
1771	if (id != 0)
1772		(void) untimeout(id);
1773}
1774
1775static void
1776current_while_timer_pop(void *data)
1777{
1778	aggr_port_t *portp = (aggr_port_t *)data;
1779	aggr_lacp_port_t *pl = &portp->lp_lacp;
1780
1781	mutex_enter(&pl->lacp_timer_lock);
1782	pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1783	cv_broadcast(&pl->lacp_timer_cv);
1784	mutex_exit(&pl->lacp_timer_lock);
1785}
1786
1787static void
1788current_while_timer_pop_handler(aggr_port_t *portp)
1789{
1790	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1791
1792	AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1793	    "pop id=%p\n", portp->lp_linkid,
1794	    portp->lp_lacp.current_while_timer.id));
1795
1796	lacp_receive_sm(portp, NULL);
1797}
1798
1799/*
1800 * record_Default - Simply copies over administrative values
1801 * to the partner operational values, and sets our state to indicate we
1802 * are using defaulted values.
1803 */
1804static void
1805record_Default(aggr_port_t *portp)
1806{
1807	aggr_lacp_port_t *pl = &portp->lp_lacp;
1808
1809	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1810
1811	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1812	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1813	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1814	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1815	pl->PartnerOperKey = pl->PartnerAdminKey;
1816	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1817
1818	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1819}
1820
1821
1822/* Returns B_TRUE on sync value changing */
1823static boolean_t
1824record_PDU(aggr_port_t *portp, lacp_t *lacp)
1825{
1826	aggr_grp_t *aggrp = portp->lp_grp;
1827	aggr_lacp_port_t *pl = &portp->lp_lacp;
1828	uint8_t save_sync;
1829
1830	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1831
1832	/*
1833	 * Partner Information
1834	 */
1835	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1836	pl->PartnerOperPortPriority =
1837	    ntohs(lacp->actor_info.port_priority);
1838	pl->PartnerOperSystem = lacp->actor_info.system_id;
1839	pl->PartnerOperSysPriority =
1840	    htons(lacp->actor_info.system_priority);
1841	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1842
1843	/* All state info except for Synchronization */
1844	save_sync = pl->PartnerOperPortState.bit.sync;
1845	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1846
1847	/* Defaulted set to FALSE */
1848	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1849
1850	/*
1851	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1852	 *		Partner_System_Priority, Partner_Key, and
1853	 *		Partner_State.Aggregation) are compared to the
1854	 *		corresponding operations paramters values for
1855	 *		the Actor. If these are equal, or if this is
1856	 *		an individual link, we are synchronized.
1857	 */
1858	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1859	    (ntohs(lacp->partner_info.port_priority) ==
1860	    pl->ActorPortPriority) &&
1861	    (ether_cmp(&lacp->partner_info.system_id,
1862	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1863	    (ntohs(lacp->partner_info.system_priority) ==
1864	    aggrp->aggr.ActorSystemPriority) &&
1865	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1866	    (lacp->partner_info.state.bit.aggregation ==
1867	    pl->ActorOperPortState.bit.aggregation)) ||
1868	    (!lacp->actor_info.state.bit.aggregation)) {
1869
1870		pl->PartnerOperPortState.bit.sync =
1871		    lacp->actor_info.state.bit.sync;
1872	} else {
1873		pl->PartnerOperPortState.bit.sync = B_FALSE;
1874	}
1875
1876	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1877		AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1878		    "%d -->%d\n", portp->lp_linkid, save_sync,
1879		    pl->PartnerOperPortState.bit.sync));
1880		return (B_TRUE);
1881	} else {
1882		return (B_FALSE);
1883	}
1884}
1885
1886
1887/*
1888 * update_selected - If any of the Partner parameters has
1889 *			changed from a previous value, then
1890 *			unselect the link from the aggregator.
1891 */
1892static boolean_t
1893update_selected(aggr_port_t *portp, lacp_t *lacp)
1894{
1895	aggr_lacp_port_t *pl = &portp->lp_lacp;
1896
1897	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1898
1899	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1900	    (pl->PartnerOperPortPriority !=
1901	    ntohs(lacp->actor_info.port_priority)) ||
1902	    (ether_cmp(&pl->PartnerOperSystem,
1903	    &lacp->actor_info.system_id) != 0) ||
1904	    (pl->PartnerOperSysPriority !=
1905	    ntohs(lacp->actor_info.system_priority)) ||
1906	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1907	    (pl->PartnerOperPortState.bit.aggregation !=
1908	    lacp->actor_info.state.bit.aggregation)) {
1909		AGGR_LACP_DBG(("update_selected:(%d): "
1910		    "selected  %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1911		    AGGR_UNSELECTED));
1912
1913		lacp_port_unselect(portp);
1914		return (B_TRUE);
1915	} else {
1916		return (B_FALSE);
1917	}
1918}
1919
1920
1921/*
1922 * update_default_selected - If any of the operational Partner parameters
1923 *			is different than that of the administrative values
1924 *			then unselect the link from the aggregator.
1925 */
1926static void
1927update_default_selected(aggr_port_t *portp)
1928{
1929	aggr_lacp_port_t *pl = &portp->lp_lacp;
1930
1931	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1932
1933	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1934	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1935	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1936	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1937	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1938	    (pl->PartnerOperPortState.bit.aggregation !=
1939	    pl->PartnerAdminPortState.bit.aggregation)) {
1940
1941		AGGR_LACP_DBG(("update_default_selected:(%d): "
1942		    "selected  %d-->%d\n", portp->lp_linkid,
1943		    pl->sm.selected, AGGR_UNSELECTED));
1944
1945		lacp_port_unselect(portp);
1946	}
1947}
1948
1949
1950/*
1951 * update_NTT - If any of the Partner values in the received LACPDU
1952 *			are different than that of the Actor operational
1953 *			values then set NTT to true.
1954 */
1955static void
1956update_NTT(aggr_port_t *portp, lacp_t *lacp)
1957{
1958	aggr_grp_t *aggrp = portp->lp_grp;
1959	aggr_lacp_port_t *pl = &portp->lp_lacp;
1960
1961	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1962
1963	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1964	    (pl->ActorPortPriority !=
1965	    ntohs(lacp->partner_info.port_priority)) ||
1966	    (ether_cmp(&aggrp->lg_addr,
1967	    &lacp->partner_info.system_id) != 0) ||
1968	    (aggrp->aggr.ActorSystemPriority !=
1969	    ntohs(lacp->partner_info.system_priority)) ||
1970	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1971	    (pl->ActorOperPortState.bit.activity !=
1972	    lacp->partner_info.state.bit.activity) ||
1973	    (pl->ActorOperPortState.bit.timeout !=
1974	    lacp->partner_info.state.bit.timeout) ||
1975	    (pl->ActorOperPortState.bit.sync !=
1976	    lacp->partner_info.state.bit.sync) ||
1977	    (pl->ActorOperPortState.bit.aggregation !=
1978	    lacp->partner_info.state.bit.aggregation)) {
1979
1980		AGGR_LACP_DBG(("update_NTT:(%d): NTT  %d-->%d\n",
1981		    portp->lp_linkid, pl->NTT, B_TRUE));
1982
1983		pl->NTT = B_TRUE;
1984	}
1985}
1986
1987/*
1988 * lacp_receive_sm - LACP receive state machine
1989 *
1990 * parameters:
1991 *      - portp - instance this applies to.
1992 *      - lacp - pointer in the case of a received LACPDU.
1993 *                This value is NULL if there is no LACPDU.
1994 *
1995 * invoked:
1996 *    - when initialization is needed
1997 *    - upon reception of an LACPDU. This is the common case.
1998 *    - every time the current_while_timer pops
1999 */
2000static void
2001lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2002{
2003	boolean_t sync_updated, selected_updated, save_activity;
2004	aggr_lacp_port_t *pl = &portp->lp_lacp;
2005	lacp_receive_state_t oldstate = pl->sm.receive_state;
2006
2007	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2008
2009	/* LACP_OFF state not in specification so check here.  */
2010	if (!pl->sm.lacp_on)
2011		return;
2012
2013	/* figure next state */
2014	if (pl->sm.begin || pl->sm.port_moved) {
2015		pl->sm.receive_state = LACP_INITIALIZE;
2016	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
2017		pl->sm.receive_state = LACP_PORT_DISABLED;
2018	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2019		pl->sm.receive_state =
2020		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2021		    LACP_DISABLED : LACP_PORT_DISABLED;
2022	} else if (lacp != NULL) {
2023		if ((pl->sm.receive_state == LACP_EXPIRED) ||
2024		    (pl->sm.receive_state == LACP_DEFAULTED)) {
2025			pl->sm.receive_state = LACP_CURRENT;
2026		}
2027	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
2028	    (pl->current_while_timer.id == 0)) {
2029		pl->sm.receive_state = LACP_EXPIRED;
2030	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2031	    (pl->current_while_timer.id == 0)) {
2032		pl->sm.receive_state = LACP_DEFAULTED;
2033	}
2034
2035	if (!((lacp && (oldstate == LACP_CURRENT) &&
2036	    (pl->sm.receive_state == LACP_CURRENT)))) {
2037		AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2038		    portp->lp_linkid, lacp_receive_str[oldstate],
2039		    lacp_receive_str[pl->sm.receive_state]));
2040	}
2041
2042	switch (pl->sm.receive_state) {
2043	case LACP_INITIALIZE:
2044		lacp_port_unselect(portp);
2045		record_Default(portp);
2046		pl->ActorOperPortState.bit.expired = B_FALSE;
2047		pl->sm.port_moved = B_FALSE;
2048		pl->sm.receive_state = LACP_PORT_DISABLED;
2049		pl->sm.begin = B_FALSE;
2050		lacp_receive_sm(portp, NULL);
2051		break;
2052
2053	case LACP_PORT_DISABLED:
2054		pl->PartnerOperPortState.bit.sync = B_FALSE;
2055		/*
2056		 * Stop current_while_timer in case
2057		 * we got here from link down
2058		 */
2059		stop_current_while_timer(portp);
2060
2061		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2062			pl->sm.receive_state = LACP_DISABLED;
2063			lacp_receive_sm(portp, lacp);
2064			/* We goto LACP_DISABLED state */
2065			break;
2066		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2067			pl->sm.receive_state = LACP_EXPIRED;
2068			/*
2069			 * FALL THROUGH TO LACP_EXPIRED CASE:
2070			 * We have no way of knowing if we get into
2071			 * lacp_receive_sm() from a  current_while_timer
2072			 * expiring as it has never been kicked off yet!
2073			 */
2074		} else {
2075			/* We stay in LACP_PORT_DISABLED state */
2076			break;
2077		}
2078		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
2079		/* FALLTHROUGH */
2080
2081	case LACP_EXPIRED:
2082		/*
2083		 * Arrives here from LACP_PORT_DISABLED state as well as
2084		 * as well as current_while_timer expiring.
2085		 */
2086		pl->PartnerOperPortState.bit.sync = B_FALSE;
2087		pl->PartnerOperPortState.bit.timeout = B_TRUE;
2088
2089		pl->ActorOperPortState.bit.expired = B_TRUE;
2090		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2091		lacp_periodic_sm(portp);
2092		break;
2093
2094	case LACP_DISABLED:
2095		/*
2096		 * This is the normal state for recv_sm when LACP_OFF
2097		 * is set or the NIC is in half duplex mode.
2098		 */
2099		lacp_port_unselect(portp);
2100		record_Default(portp);
2101		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2102		pl->ActorOperPortState.bit.expired = B_FALSE;
2103		break;
2104
2105	case LACP_DEFAULTED:
2106		/*
2107		 * Current_while_timer expired a second time.
2108		 */
2109		update_default_selected(portp);
2110		record_Default(portp);	/* overwrite Partner Oper val */
2111		pl->ActorOperPortState.bit.expired = B_FALSE;
2112		pl->PartnerOperPortState.bit.sync = B_TRUE;
2113
2114		lacp_selection_logic(portp);
2115		lacp_mux_sm(portp);
2116		break;
2117
2118	case LACP_CURRENT:
2119		/*
2120		 * Reception of LACPDU
2121		 */
2122
2123		if (!lacp) /* no LACPDU so current_while_timer popped */
2124			break;
2125
2126		AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2127		    portp->lp_linkid));
2128
2129		/*
2130		 * Validate Actor_Information_Length,
2131		 * Partner_Information_Length, Collector_Information_Length,
2132		 * and Terminator_Length fields.
2133		 */
2134		if (!valid_lacp_pdu(portp, lacp)) {
2135			AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2136			    "Invalid LACPDU received\n",
2137			    portp->lp_linkid));
2138			break;
2139		}
2140
2141		save_activity = pl->PartnerOperPortState.bit.activity;
2142		selected_updated = update_selected(portp, lacp);
2143		update_NTT(portp, lacp);
2144		sync_updated = record_PDU(portp, lacp);
2145
2146		pl->ActorOperPortState.bit.expired = B_FALSE;
2147
2148		if (selected_updated) {
2149			lacp_selection_logic(portp);
2150			lacp_mux_sm(portp);
2151		} else if (sync_updated) {
2152			lacp_mux_sm(portp);
2153		}
2154
2155		/*
2156		 * If the periodic timer value bit has been modified
2157		 * or the partner activity bit has been changed then
2158		 * we need to respectively:
2159		 *  - restart the timer with the proper timeout value.
2160		 *  - possibly enable/disable transmission of LACPDUs.
2161		 */
2162		if ((pl->PartnerOperPortState.bit.timeout &&
2163		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2164		    (!pl->PartnerOperPortState.bit.timeout &&
2165		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2166		    (pl->PartnerOperPortState.bit.activity !=
2167		    save_activity)) {
2168			lacp_periodic_sm(portp);
2169		}
2170
2171		stop_current_while_timer(portp);
2172		/* Check if we need to transmit an LACPDU */
2173		if (pl->NTT)
2174			lacp_xmit_sm(portp);
2175		start_current_while_timer(portp, 0);
2176
2177		break;
2178	}
2179}
2180
2181static void
2182aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2183{
2184	mac_perim_handle_t mph;
2185
2186	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2187	    portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2188
2189	mac_perim_enter_by_mh(portp->lp_mh, &mph);
2190	if (!enable) {
2191		/*
2192		 * Turn OFF Collector_Distributor.
2193		 */
2194		portp->lp_collector_enabled = B_FALSE;
2195		aggr_send_port_disable(portp);
2196		goto done;
2197	}
2198
2199	/*
2200	 * Turn ON Collector_Distributor.
2201	 */
2202
2203	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2204	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2205		/* Port is compatible and can be aggregated */
2206		portp->lp_collector_enabled = B_TRUE;
2207		aggr_send_port_enable(portp);
2208	}
2209
2210done:
2211	mac_perim_exit(mph);
2212}
2213
2214/*
2215 * Because the LACP packet processing needs to enter the aggr's mac perimeter
2216 * and that would potentially cause a deadlock with the thread in which the
2217 * grp/port is deleted, we defer the packet process to a worker thread. Here
2218 * we only enqueue the received Marker or LACPDU for later processing.
2219 */
2220void
2221aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2222{
2223	aggr_grp_t *grp = portp->lp_grp;
2224	lacp_t	*lacp;
2225
2226	dmp->b_rptr += sizeof (struct ether_header);
2227
2228	if (MBLKL(dmp) < sizeof (lacp_t)) {
2229		freemsg(dmp);
2230		return;
2231	}
2232
2233	lacp = (lacp_t *)dmp->b_rptr;
2234	if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2235		AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2236		    "Unknown Slow Protocol type %d\n",
2237		    portp->lp_linkid, lacp->subtype));
2238		freemsg(dmp);
2239		return;
2240	}
2241
2242	mutex_enter(&grp->lg_lacp_lock);
2243
2244	/*
2245	 * If the lg_lacp_done is set, this aggregation is in the process of
2246	 * being deleted, return directly.
2247	 */
2248	if (grp->lg_lacp_done) {
2249		mutex_exit(&grp->lg_lacp_lock);
2250		freemsg(dmp);
2251		return;
2252	}
2253
2254	if (grp->lg_lacp_tail == NULL) {
2255		grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2256	} else {
2257		grp->lg_lacp_tail->b_next = dmp;
2258		grp->lg_lacp_tail = dmp;
2259	}
2260
2261	/*
2262	 * Hold a reference of the port so that the port won't be freed when it
2263	 * is removed from the aggr. The b_prev field is borrowed to save the
2264	 * port information.
2265	 */
2266	AGGR_PORT_REFHOLD(portp);
2267	dmp->b_prev = (mblk_t *)portp;
2268	cv_broadcast(&grp->lg_lacp_cv);
2269	mutex_exit(&grp->lg_lacp_lock);
2270}
2271
2272static void
2273aggr_lacp_rx(mblk_t *dmp)
2274{
2275	aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2276	mac_perim_handle_t mph;
2277	lacp_t	*lacp;
2278
2279	dmp->b_prev = NULL;
2280
2281	mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2282	if (portp->lp_closing)
2283		goto done;
2284
2285	lacp = (lacp_t *)dmp->b_rptr;
2286	switch (lacp->subtype) {
2287	case LACP_SUBTYPE:
2288		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2289		    portp->lp_linkid));
2290
2291		if (!portp->lp_lacp.sm.lacp_on) {
2292			break;
2293		}
2294		lacp_receive_sm(portp, lacp);
2295		break;
2296
2297	case MARKER_SUBTYPE:
2298		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2299		    portp->lp_linkid));
2300
2301		if (receive_marker_pdu(portp, dmp) != 0)
2302			break;
2303
2304		/* Send the packet over the first TX ring */
2305		dmp = mac_hwring_send_priv(portp->lp_mch,
2306		    portp->lp_tx_rings[0], dmp);
2307		if (dmp != NULL)
2308			freemsg(dmp);
2309		mac_perim_exit(mph);
2310		AGGR_PORT_REFRELE(portp);
2311		return;
2312	}
2313
2314done:
2315	mac_perim_exit(mph);
2316	AGGR_PORT_REFRELE(portp);
2317	freemsg(dmp);
2318}
2319
2320void
2321aggr_lacp_rx_thread(void *arg)
2322{
2323	callb_cpr_t	cprinfo;
2324	aggr_grp_t	*grp = (aggr_grp_t *)arg;
2325	aggr_port_t	*port;
2326	mblk_t		*mp, *nextmp;
2327
2328	CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2329	    "aggr_lacp_rx_thread");
2330
2331	mutex_enter(&grp->lg_lacp_lock);
2332
2333	/*
2334	 * Quit the thread if the grp is deleted.
2335	 */
2336	while (!grp->lg_lacp_done) {
2337		if ((mp = grp->lg_lacp_head) == NULL) {
2338			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2339			cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2340			CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2341			continue;
2342		}
2343
2344		grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2345		mutex_exit(&grp->lg_lacp_lock);
2346
2347		while (mp != NULL) {
2348			nextmp = mp->b_next;
2349			mp->b_next = NULL;
2350			aggr_lacp_rx(mp);
2351			mp = nextmp;
2352		}
2353		mutex_enter(&grp->lg_lacp_lock);
2354	}
2355
2356	/*
2357	 * The grp is being destroyed, simply free all of the LACP messages
2358	 * left in the queue which did not have the chance to be processed.
2359	 * We cannot use freemsgchain() here since we need to clear the
2360	 * b_prev field.
2361	 */
2362	for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2363		port = (aggr_port_t *)mp->b_prev;
2364		AGGR_PORT_REFRELE(port);
2365		nextmp = mp->b_next;
2366		mp->b_next = NULL;
2367		mp->b_prev = NULL;
2368		freemsg(mp);
2369	}
2370
2371	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2372	grp->lg_lacp_rx_thread = NULL;
2373	cv_broadcast(&grp->lg_lacp_cv);
2374	CALLB_CPR_EXIT(&cprinfo);
2375	thread_exit();
2376}
2377