if_lagg.c revision 212100
1/*	$OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: head/sys/net/if_lagg.c 212100 2010-09-01 16:53:38Z emaste $");
22
23#include "opt_inet.h"
24#include "opt_inet6.h"
25
26#include <sys/param.h>
27#include <sys/kernel.h>
28#include <sys/malloc.h>
29#include <sys/mbuf.h>
30#include <sys/queue.h>
31#include <sys/socket.h>
32#include <sys/sockio.h>
33#include <sys/sysctl.h>
34#include <sys/module.h>
35#include <sys/priv.h>
36#include <sys/systm.h>
37#include <sys/proc.h>
38#include <sys/hash.h>
39#include <sys/lock.h>
40#include <sys/rwlock.h>
41#include <sys/taskqueue.h>
42#include <sys/eventhandler.h>
43
44#include <net/ethernet.h>
45#include <net/if.h>
46#include <net/if_clone.h>
47#include <net/if_arp.h>
48#include <net/if_dl.h>
49#include <net/if_llc.h>
50#include <net/if_media.h>
51#include <net/if_types.h>
52#include <net/if_var.h>
53#include <net/bpf.h>
54
55#ifdef INET
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/if_ether.h>
59#include <netinet/ip.h>
60#endif
61
62#ifdef INET6
63#include <netinet/ip6.h>
64#endif
65
66#include <net/if_vlan_var.h>
67#include <net/if_lagg.h>
68#include <net/ieee8023ad_lacp.h>
69
70/* Special flags we should propagate to the lagg ports. */
71static struct {
72	int flag;
73	int (*func)(struct ifnet *, int);
74} lagg_pflags[] = {
75	{IFF_PROMISC, ifpromisc},
76	{IFF_ALLMULTI, if_allmulti},
77	{0, NULL}
78};
79
80SLIST_HEAD(__trhead, lagg_softc) lagg_list;	/* list of laggs */
81static struct mtx	lagg_list_mtx;
82eventhandler_tag	lagg_detach_cookie = NULL;
83
84static int	lagg_clone_create(struct if_clone *, int, caddr_t);
85static void	lagg_clone_destroy(struct ifnet *);
86static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
87static void	lagg_capabilities(struct lagg_softc *);
88static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
89static void	lagg_port_setlladdr(void *, int);
90static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
91static int	lagg_port_destroy(struct lagg_port *, int);
92static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
93static void	lagg_linkstate(struct lagg_softc *);
94static void	lagg_port_state(struct ifnet *, int);
95static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
96static int	lagg_port_output(struct ifnet *, struct mbuf *,
97		    struct sockaddr *, struct route *);
98static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
99#ifdef LAGG_PORT_STACKING
100static int	lagg_port_checkstacking(struct lagg_softc *);
101#endif
102static void	lagg_port2req(struct lagg_port *, struct lagg_reqport *);
103static void	lagg_init(void *);
104static void	lagg_stop(struct lagg_softc *);
105static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
106static int	lagg_ether_setmulti(struct lagg_softc *);
107static int	lagg_ether_cmdmulti(struct lagg_port *, int);
108static	int	lagg_setflag(struct lagg_port *, int, int,
109		    int (*func)(struct ifnet *, int));
110static	int	lagg_setflags(struct lagg_port *, int status);
111static void	lagg_start(struct ifnet *);
112static int	lagg_media_change(struct ifnet *);
113static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
114static struct lagg_port *lagg_link_active(struct lagg_softc *,
115	    struct lagg_port *);
116static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
117
118IFC_SIMPLE_DECLARE(lagg, 0);
119
120/* Simple round robin */
121static int	lagg_rr_attach(struct lagg_softc *);
122static int	lagg_rr_detach(struct lagg_softc *);
123static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
124static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
125		    struct mbuf *);
126
127/* Active failover */
128static int	lagg_fail_attach(struct lagg_softc *);
129static int	lagg_fail_detach(struct lagg_softc *);
130static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
131static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
132		    struct mbuf *);
133
134/* Loadbalancing */
135static int	lagg_lb_attach(struct lagg_softc *);
136static int	lagg_lb_detach(struct lagg_softc *);
137static int	lagg_lb_port_create(struct lagg_port *);
138static void	lagg_lb_port_destroy(struct lagg_port *);
139static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
140static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
141		    struct mbuf *);
142static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
143
144/* 802.3ad LACP */
145static int	lagg_lacp_attach(struct lagg_softc *);
146static int	lagg_lacp_detach(struct lagg_softc *);
147static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
148static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
149		    struct mbuf *);
150static void	lagg_lacp_lladdr(struct lagg_softc *);
151
152/* lagg protocol table */
153static const struct {
154	int			ti_proto;
155	int			(*ti_attach)(struct lagg_softc *);
156} lagg_protos[] = {
157	{ LAGG_PROTO_ROUNDROBIN,	lagg_rr_attach },
158	{ LAGG_PROTO_FAILOVER,		lagg_fail_attach },
159	{ LAGG_PROTO_LOADBALANCE,	lagg_lb_attach },
160	{ LAGG_PROTO_ETHERCHANNEL,	lagg_lb_attach },
161	{ LAGG_PROTO_LACP,		lagg_lacp_attach },
162	{ LAGG_PROTO_NONE,		NULL }
163};
164
165SYSCTL_DECL(_net_link);
166SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation");
167
168static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
169SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
170    &lagg_failover_rx_all, 0,
171    "Accept input from any interface in a failover lagg");
172
173static int
174lagg_modevent(module_t mod, int type, void *data)
175{
176
177	switch (type) {
178	case MOD_LOAD:
179		mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
180		SLIST_INIT(&lagg_list);
181		if_clone_attach(&lagg_cloner);
182		lagg_input_p = lagg_input;
183		lagg_linkstate_p = lagg_port_state;
184		lagg_detach_cookie = EVENTHANDLER_REGISTER(
185		    ifnet_departure_event, lagg_port_ifdetach, NULL,
186		    EVENTHANDLER_PRI_ANY);
187		break;
188	case MOD_UNLOAD:
189		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
190		    lagg_detach_cookie);
191		if_clone_detach(&lagg_cloner);
192		lagg_input_p = NULL;
193		lagg_linkstate_p = NULL;
194		mtx_destroy(&lagg_list_mtx);
195		break;
196	default:
197		return (EOPNOTSUPP);
198	}
199	return (0);
200}
201
202static moduledata_t lagg_mod = {
203	"if_lagg",
204	lagg_modevent,
205	0
206};
207
208DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
209
210#if __FreeBSD_version >= 800000
211/*
212 * This routine is run via an vlan
213 * config EVENT
214 */
215static void
216lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
217{
218        struct lagg_softc       *sc = ifp->if_softc;
219        struct lagg_port        *lp;
220
221        if (ifp->if_softc !=  arg)   /* Not our event */
222                return;
223
224        LAGG_RLOCK(sc);
225        if (!SLIST_EMPTY(&sc->sc_ports)) {
226                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
227                        EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
228        }
229        LAGG_RUNLOCK(sc);
230}
231
232/*
233 * This routine is run via an vlan
234 * unconfig EVENT
235 */
236static void
237lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
238{
239        struct lagg_softc       *sc = ifp->if_softc;
240        struct lagg_port        *lp;
241
242        if (ifp->if_softc !=  arg)   /* Not our event */
243                return;
244
245        LAGG_RLOCK(sc);
246        if (!SLIST_EMPTY(&sc->sc_ports)) {
247                SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
248                        EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
249        }
250        LAGG_RUNLOCK(sc);
251}
252#endif
253
254static int
255lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
256{
257	struct lagg_softc *sc;
258	struct ifnet *ifp;
259	int i, error = 0;
260	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
261
262	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
263	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
264	if (ifp == NULL) {
265		free(sc, M_DEVBUF);
266		return (ENOSPC);
267	}
268
269	sc->sc_proto = LAGG_PROTO_NONE;
270	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
271		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
272			sc->sc_proto = lagg_protos[i].ti_proto;
273			if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
274				if_free_type(ifp, IFT_ETHER);
275				free(sc, M_DEVBUF);
276				return (error);
277			}
278			break;
279		}
280	}
281	LAGG_LOCK_INIT(sc);
282	SLIST_INIT(&sc->sc_ports);
283	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
284
285	/* Initialise pseudo media types */
286	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
287	    lagg_media_status);
288	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
289	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
290
291	if_initname(ifp, ifc->ifc_name, unit);
292	ifp->if_type = IFT_ETHER;
293	ifp->if_softc = sc;
294	ifp->if_start = lagg_start;
295	ifp->if_init = lagg_init;
296	ifp->if_ioctl = lagg_ioctl;
297	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
298
299	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
300	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
301	IFQ_SET_READY(&ifp->if_snd);
302
303	/*
304	 * Attach as an ordinary ethernet device, childs will be attached
305	 * as special device IFT_IEEE8023ADLAG.
306	 */
307	ether_ifattach(ifp, eaddr);
308
309#if __FreeBSD_version >= 800000
310	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
311		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
312	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
313		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
314#endif
315
316	/* Insert into the global list of laggs */
317	mtx_lock(&lagg_list_mtx);
318	SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
319	mtx_unlock(&lagg_list_mtx);
320
321	return (0);
322}
323
324static void
325lagg_clone_destroy(struct ifnet *ifp)
326{
327	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
328	struct lagg_port *lp;
329
330	LAGG_WLOCK(sc);
331
332	lagg_stop(sc);
333	ifp->if_flags &= ~IFF_UP;
334
335#if __FreeBSD_version >= 800000
336	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
337	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
338#endif
339
340	/* Shutdown and remove lagg ports */
341	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
342		lagg_port_destroy(lp, 1);
343	/* Unhook the aggregation protocol */
344	(*sc->sc_detach)(sc);
345
346	LAGG_WUNLOCK(sc);
347
348	ifmedia_removeall(&sc->sc_media);
349	ether_ifdetach(ifp);
350	if_free_type(ifp, IFT_ETHER);
351
352	mtx_lock(&lagg_list_mtx);
353	SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
354	mtx_unlock(&lagg_list_mtx);
355
356	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
357	LAGG_LOCK_DESTROY(sc);
358	free(sc, M_DEVBUF);
359}
360
361static void
362lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
363{
364	struct ifnet *ifp = sc->sc_ifp;
365
366	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
367		return;
368
369	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
370	/* Let the protocol know the MAC has changed */
371	if (sc->sc_lladdr != NULL)
372		(*sc->sc_lladdr)(sc);
373	EVENTHANDLER_INVOKE(iflladdr_event, ifp);
374}
375
376static void
377lagg_capabilities(struct lagg_softc *sc)
378{
379	struct lagg_port *lp;
380	int cap = ~0, ena = ~0;
381	u_long hwa = ~0UL;
382
383	LAGG_WLOCK_ASSERT(sc);
384
385	/* Get capabilities from the lagg ports */
386	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
387		cap &= lp->lp_ifp->if_capabilities;
388		ena &= lp->lp_ifp->if_capenable;
389		hwa &= lp->lp_ifp->if_hwassist;
390	}
391	cap = (cap == ~0 ? 0 : cap);
392	ena = (ena == ~0 ? 0 : ena);
393	hwa = (hwa == ~0 ? 0 : hwa);
394
395	if (sc->sc_ifp->if_capabilities != cap ||
396	    sc->sc_ifp->if_capenable != ena ||
397	    sc->sc_ifp->if_hwassist != hwa) {
398		sc->sc_ifp->if_capabilities = cap;
399		sc->sc_ifp->if_capenable = ena;
400		sc->sc_ifp->if_hwassist = hwa;
401		getmicrotime(&sc->sc_ifp->if_lastchange);
402
403		if (sc->sc_ifflags & IFF_DEBUG)
404			if_printf(sc->sc_ifp,
405			    "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
406	}
407}
408
409static void
410lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
411{
412	struct lagg_softc *sc = lp->lp_softc;
413	struct ifnet *ifp = lp->lp_ifp;
414	struct lagg_llq *llq;
415	int pending = 0;
416
417	LAGG_WLOCK_ASSERT(sc);
418
419	if (lp->lp_detaching ||
420	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
421		return;
422
423	/* Check to make sure its not already queued to be changed */
424	SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
425		if (llq->llq_ifp == ifp) {
426			pending = 1;
427			break;
428		}
429	}
430
431	if (!pending) {
432		llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
433		if (llq == NULL)	/* XXX what to do */
434			return;
435	}
436
437	/* Update the lladdr even if pending, it may have changed */
438	llq->llq_ifp = ifp;
439	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
440
441	if (!pending)
442		SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
443
444	taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
445}
446
447/*
448 * Set the interface MAC address from a taskqueue to avoid a LOR.
449 */
450static void
451lagg_port_setlladdr(void *arg, int pending)
452{
453	struct lagg_softc *sc = (struct lagg_softc *)arg;
454	struct lagg_llq *llq, *head;
455	struct ifnet *ifp;
456	int error;
457
458	/* Grab a local reference of the queue and remove it from the softc */
459	LAGG_WLOCK(sc);
460	head = SLIST_FIRST(&sc->sc_llq_head);
461	SLIST_FIRST(&sc->sc_llq_head) = NULL;
462	LAGG_WUNLOCK(sc);
463
464	/*
465	 * Traverse the queue and set the lladdr on each ifp. It is safe to do
466	 * unlocked as we have the only reference to it.
467	 */
468	for (llq = head; llq != NULL; llq = head) {
469		ifp = llq->llq_ifp;
470
471		/* Set the link layer address */
472		error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
473		if (error)
474			printf("%s: setlladdr failed on %s\n", __func__,
475			    ifp->if_xname);
476
477		head = SLIST_NEXT(llq, llq_entries);
478		free(llq, M_DEVBUF);
479	}
480}
481
482static int
483lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
484{
485	struct lagg_softc *sc_ptr;
486	struct lagg_port *lp;
487	int error = 0;
488
489	LAGG_WLOCK_ASSERT(sc);
490
491	/* Limit the maximal number of lagg ports */
492	if (sc->sc_count >= LAGG_MAX_PORTS)
493		return (ENOSPC);
494
495	/* Check if port has already been associated to a lagg */
496	if (ifp->if_lagg != NULL)
497		return (EBUSY);
498
499	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
500	if (ifp->if_type != IFT_ETHER)
501		return (EPROTONOSUPPORT);
502
503	/* Allow the first Ethernet member to define the MTU */
504	if (SLIST_EMPTY(&sc->sc_ports))
505		sc->sc_ifp->if_mtu = ifp->if_mtu;
506	else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
507		if_printf(sc->sc_ifp, "invalid MTU for %s\n",
508		    ifp->if_xname);
509		return (EINVAL);
510	}
511
512	if ((lp = malloc(sizeof(struct lagg_port),
513	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
514		return (ENOMEM);
515
516	/* Check if port is a stacked lagg */
517	mtx_lock(&lagg_list_mtx);
518	SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
519		if (ifp == sc_ptr->sc_ifp) {
520			mtx_unlock(&lagg_list_mtx);
521			free(lp, M_DEVBUF);
522			return (EINVAL);
523			/* XXX disable stacking for the moment, its untested */
524#ifdef LAGG_PORT_STACKING
525			lp->lp_flags |= LAGG_PORT_STACK;
526			if (lagg_port_checkstacking(sc_ptr) >=
527			    LAGG_MAX_STACKING) {
528				mtx_unlock(&lagg_list_mtx);
529				free(lp, M_DEVBUF);
530				return (E2BIG);
531			}
532#endif
533		}
534	}
535	mtx_unlock(&lagg_list_mtx);
536
537	/* Change the interface type */
538	lp->lp_iftype = ifp->if_type;
539	ifp->if_type = IFT_IEEE8023ADLAG;
540	ifp->if_lagg = lp;
541	lp->lp_ioctl = ifp->if_ioctl;
542	ifp->if_ioctl = lagg_port_ioctl;
543	lp->lp_output = ifp->if_output;
544	ifp->if_output = lagg_port_output;
545
546	lp->lp_ifp = ifp;
547	lp->lp_softc = sc;
548
549	/* Save port link layer address */
550	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
551
552	if (SLIST_EMPTY(&sc->sc_ports)) {
553		sc->sc_primary = lp;
554		lagg_lladdr(sc, IF_LLADDR(ifp));
555	} else {
556		/* Update link layer address for this port */
557		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
558	}
559
560	/* Insert into the list of ports */
561	SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
562	sc->sc_count++;
563
564	/* Update lagg capabilities */
565	lagg_capabilities(sc);
566	lagg_linkstate(sc);
567
568	/* Add multicast addresses and interface flags to this port */
569	lagg_ether_cmdmulti(lp, 1);
570	lagg_setflags(lp, 1);
571
572	if (sc->sc_port_create != NULL)
573		error = (*sc->sc_port_create)(lp);
574	if (error) {
575		/* remove the port again, without calling sc_port_destroy */
576		lagg_port_destroy(lp, 0);
577		return (error);
578	}
579
580	return (error);
581}
582
583#ifdef LAGG_PORT_STACKING
584static int
585lagg_port_checkstacking(struct lagg_softc *sc)
586{
587	struct lagg_softc *sc_ptr;
588	struct lagg_port *lp;
589	int m = 0;
590
591	LAGG_WLOCK_ASSERT(sc);
592
593	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
594		if (lp->lp_flags & LAGG_PORT_STACK) {
595			sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
596			m = MAX(m, lagg_port_checkstacking(sc_ptr));
597		}
598	}
599
600	return (m + 1);
601}
602#endif
603
604static int
605lagg_port_destroy(struct lagg_port *lp, int runpd)
606{
607	struct lagg_softc *sc = lp->lp_softc;
608	struct lagg_port *lp_ptr;
609	struct lagg_llq *llq;
610	struct ifnet *ifp = lp->lp_ifp;
611
612	LAGG_WLOCK_ASSERT(sc);
613
614	if (runpd && sc->sc_port_destroy != NULL)
615		(*sc->sc_port_destroy)(lp);
616
617	/*
618	 * Remove multicast addresses and interface flags from this port and
619	 * reset the MAC address, skip if the interface is being detached.
620	 */
621	if (!lp->lp_detaching) {
622		lagg_ether_cmdmulti(lp, 0);
623		lagg_setflags(lp, 0);
624		lagg_port_lladdr(lp, lp->lp_lladdr);
625	}
626
627	/* Restore interface */
628	ifp->if_type = lp->lp_iftype;
629	ifp->if_ioctl = lp->lp_ioctl;
630	ifp->if_output = lp->lp_output;
631	ifp->if_lagg = NULL;
632
633	/* Finally, remove the port from the lagg */
634	SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
635	sc->sc_count--;
636
637	/* Update the primary interface */
638	if (lp == sc->sc_primary) {
639		uint8_t lladdr[ETHER_ADDR_LEN];
640
641		if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
642			bzero(&lladdr, ETHER_ADDR_LEN);
643		} else {
644			bcopy(lp_ptr->lp_lladdr,
645			    lladdr, ETHER_ADDR_LEN);
646		}
647		lagg_lladdr(sc, lladdr);
648		sc->sc_primary = lp_ptr;
649
650		/* Update link layer address for each port */
651		SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
652			lagg_port_lladdr(lp_ptr, lladdr);
653	}
654
655	/* Remove any pending lladdr changes from the queue */
656	if (lp->lp_detaching) {
657		SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
658			if (llq->llq_ifp == ifp) {
659				SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
660				    llq_entries);
661				free(llq, M_DEVBUF);
662				break;	/* Only appears once */
663			}
664		}
665	}
666
667	if (lp->lp_ifflags)
668		if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
669
670	free(lp, M_DEVBUF);
671
672	/* Update lagg capabilities */
673	lagg_capabilities(sc);
674	lagg_linkstate(sc);
675
676	return (0);
677}
678
679static int
680lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
681{
682	struct lagg_reqport *rp = (struct lagg_reqport *)data;
683	struct lagg_softc *sc;
684	struct lagg_port *lp = NULL;
685	int error = 0;
686
687	/* Should be checked by the caller */
688	if (ifp->if_type != IFT_IEEE8023ADLAG ||
689	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
690		goto fallback;
691
692	switch (cmd) {
693	case SIOCGLAGGPORT:
694		if (rp->rp_portname[0] == '\0' ||
695		    ifunit(rp->rp_portname) != ifp) {
696			error = EINVAL;
697			break;
698		}
699
700		LAGG_RLOCK(sc);
701		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
702			error = ENOENT;
703			LAGG_RUNLOCK(sc);
704			break;
705		}
706
707		lagg_port2req(lp, rp);
708		LAGG_RUNLOCK(sc);
709		break;
710
711	case SIOCSIFCAP:
712		if (lp->lp_ioctl == NULL) {
713			error = EINVAL;
714			break;
715		}
716		error = (*lp->lp_ioctl)(ifp, cmd, data);
717		if (error)
718			break;
719
720		/* Update lagg interface capabilities */
721		LAGG_WLOCK(sc);
722		lagg_capabilities(sc);
723		LAGG_WUNLOCK(sc);
724		break;
725
726	case SIOCSIFMTU:
727		/* Do not allow the MTU to be changed once joined */
728		error = EINVAL;
729		break;
730
731	default:
732		goto fallback;
733	}
734
735	return (error);
736
737fallback:
738	if (lp->lp_ioctl != NULL)
739		return ((*lp->lp_ioctl)(ifp, cmd, data));
740
741	return (EINVAL);
742}
743
744static int
745lagg_port_output(struct ifnet *ifp, struct mbuf *m,
746	struct sockaddr *dst, struct route *ro)
747{
748	struct lagg_port *lp = ifp->if_lagg;
749	struct ether_header *eh;
750	short type = 0;
751
752	switch (dst->sa_family) {
753		case pseudo_AF_HDRCMPLT:
754		case AF_UNSPEC:
755			eh = (struct ether_header *)dst->sa_data;
756			type = eh->ether_type;
757			break;
758	}
759
760	/*
761	 * Only allow ethernet types required to initiate or maintain the link,
762	 * aggregated frames take a different path.
763	 */
764	switch (ntohs(type)) {
765		case ETHERTYPE_PAE:	/* EAPOL PAE/802.1x */
766			return ((*lp->lp_output)(ifp, m, dst, ro));
767	}
768
769	/* drop any other frames */
770	m_freem(m);
771	return (EBUSY);
772}
773
774static void
775lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
776{
777	struct lagg_port *lp;
778	struct lagg_softc *sc;
779
780	if ((lp = ifp->if_lagg) == NULL)
781		return;
782
783	sc = lp->lp_softc;
784
785	LAGG_WLOCK(sc);
786	lp->lp_detaching = 1;
787	lagg_port_destroy(lp, 1);
788	LAGG_WUNLOCK(sc);
789}
790
791static void
792lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
793{
794	struct lagg_softc *sc = lp->lp_softc;
795
796	strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
797	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
798	rp->rp_prio = lp->lp_prio;
799	rp->rp_flags = lp->lp_flags;
800	if (sc->sc_portreq != NULL)
801		(*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
802
803	/* Add protocol specific flags */
804	switch (sc->sc_proto) {
805		case LAGG_PROTO_FAILOVER:
806			if (lp == sc->sc_primary)
807				rp->rp_flags |= LAGG_PORT_MASTER;
808			if (lp == lagg_link_active(sc, sc->sc_primary))
809				rp->rp_flags |= LAGG_PORT_ACTIVE;
810			break;
811
812		case LAGG_PROTO_ROUNDROBIN:
813		case LAGG_PROTO_LOADBALANCE:
814		case LAGG_PROTO_ETHERCHANNEL:
815			if (LAGG_PORTACTIVE(lp))
816				rp->rp_flags |= LAGG_PORT_ACTIVE;
817			break;
818
819		case LAGG_PROTO_LACP:
820			/* LACP has a different definition of active */
821			if (lacp_isactive(lp))
822				rp->rp_flags |= LAGG_PORT_ACTIVE;
823			if (lacp_iscollecting(lp))
824				rp->rp_flags |= LAGG_PORT_COLLECTING;
825			if (lacp_isdistributing(lp))
826				rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
827			break;
828	}
829
830}
831
832static void
833lagg_init(void *xsc)
834{
835	struct lagg_softc *sc = (struct lagg_softc *)xsc;
836	struct lagg_port *lp;
837	struct ifnet *ifp = sc->sc_ifp;
838
839	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
840		return;
841
842	LAGG_WLOCK(sc);
843
844	ifp->if_drv_flags |= IFF_DRV_RUNNING;
845	/* Update the port lladdrs */
846	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
847		lagg_port_lladdr(lp, IF_LLADDR(ifp));
848
849	if (sc->sc_init != NULL)
850		(*sc->sc_init)(sc);
851
852	LAGG_WUNLOCK(sc);
853}
854
855static void
856lagg_stop(struct lagg_softc *sc)
857{
858	struct ifnet *ifp = sc->sc_ifp;
859
860	LAGG_WLOCK_ASSERT(sc);
861
862	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
863		return;
864
865	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
866
867	if (sc->sc_stop != NULL)
868		(*sc->sc_stop)(sc);
869}
870
871static int
872lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
873{
874	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
875	struct lagg_reqall *ra = (struct lagg_reqall *)data;
876	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
877	struct ifreq *ifr = (struct ifreq *)data;
878	struct lagg_port *lp;
879	struct ifnet *tpif;
880	struct thread *td = curthread;
881	char *buf, *outbuf;
882	int count, buflen, len, error = 0;
883
884	bzero(&rpbuf, sizeof(rpbuf));
885
886	switch (cmd) {
887	case SIOCGLAGG:
888		LAGG_RLOCK(sc);
889		count = 0;
890		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
891			count++;
892		buflen = count * sizeof(struct lagg_reqport);
893		LAGG_RUNLOCK(sc);
894
895		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
896
897		LAGG_RLOCK(sc);
898		ra->ra_proto = sc->sc_proto;
899		if (sc->sc_req != NULL)
900			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
901
902		count = 0;
903		buf = outbuf;
904		len = min(ra->ra_size, buflen);
905		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
906			if (len < sizeof(rpbuf))
907				break;
908
909			lagg_port2req(lp, &rpbuf);
910			memcpy(buf, &rpbuf, sizeof(rpbuf));
911			count++;
912			buf += sizeof(rpbuf);
913			len -= sizeof(rpbuf);
914		}
915		LAGG_RUNLOCK(sc);
916		ra->ra_ports = count;
917		ra->ra_size = count * sizeof(rpbuf);
918		error = copyout(outbuf, ra->ra_port, ra->ra_size);
919		free(outbuf, M_TEMP);
920		break;
921	case SIOCSLAGG:
922		error = priv_check(td, PRIV_NET_LAGG);
923		if (error)
924			break;
925		if (ra->ra_proto >= LAGG_PROTO_MAX) {
926			error = EPROTONOSUPPORT;
927			break;
928		}
929		if (sc->sc_proto != LAGG_PROTO_NONE) {
930			LAGG_WLOCK(sc);
931			error = sc->sc_detach(sc);
932			/* Reset protocol and pointers */
933			sc->sc_proto = LAGG_PROTO_NONE;
934			sc->sc_detach = NULL;
935			sc->sc_start = NULL;
936			sc->sc_input = NULL;
937			sc->sc_port_create = NULL;
938			sc->sc_port_destroy = NULL;
939			sc->sc_linkstate = NULL;
940			sc->sc_init = NULL;
941			sc->sc_stop = NULL;
942			sc->sc_lladdr = NULL;
943			sc->sc_req = NULL;
944			sc->sc_portreq = NULL;
945			LAGG_WUNLOCK(sc);
946		}
947		if (error != 0)
948			break;
949		for (int i = 0; i < (sizeof(lagg_protos) /
950		    sizeof(lagg_protos[0])); i++) {
951			if (lagg_protos[i].ti_proto == ra->ra_proto) {
952				if (sc->sc_ifflags & IFF_DEBUG)
953					printf("%s: using proto %u\n",
954					    sc->sc_ifname,
955					    lagg_protos[i].ti_proto);
956				LAGG_WLOCK(sc);
957				sc->sc_proto = lagg_protos[i].ti_proto;
958				if (sc->sc_proto != LAGG_PROTO_NONE)
959					error = lagg_protos[i].ti_attach(sc);
960				LAGG_WUNLOCK(sc);
961				return (error);
962			}
963		}
964		error = EPROTONOSUPPORT;
965		break;
966	case SIOCGLAGGPORT:
967		if (rp->rp_portname[0] == '\0' ||
968		    (tpif = ifunit(rp->rp_portname)) == NULL) {
969			error = EINVAL;
970			break;
971		}
972
973		LAGG_RLOCK(sc);
974		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
975		    lp->lp_softc != sc) {
976			error = ENOENT;
977			LAGG_RUNLOCK(sc);
978			break;
979		}
980
981		lagg_port2req(lp, rp);
982		LAGG_RUNLOCK(sc);
983		break;
984	case SIOCSLAGGPORT:
985		error = priv_check(td, PRIV_NET_LAGG);
986		if (error)
987			break;
988		if (rp->rp_portname[0] == '\0' ||
989		    (tpif = ifunit(rp->rp_portname)) == NULL) {
990			error = EINVAL;
991			break;
992		}
993		LAGG_WLOCK(sc);
994		error = lagg_port_create(sc, tpif);
995		LAGG_WUNLOCK(sc);
996		break;
997	case SIOCSLAGGDELPORT:
998		error = priv_check(td, PRIV_NET_LAGG);
999		if (error)
1000			break;
1001		if (rp->rp_portname[0] == '\0' ||
1002		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1003			error = EINVAL;
1004			break;
1005		}
1006
1007		LAGG_WLOCK(sc);
1008		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1009		    lp->lp_softc != sc) {
1010			error = ENOENT;
1011			LAGG_WUNLOCK(sc);
1012			break;
1013		}
1014
1015		error = lagg_port_destroy(lp, 1);
1016		LAGG_WUNLOCK(sc);
1017		break;
1018	case SIOCSIFFLAGS:
1019		/* Set flags on ports too */
1020		LAGG_WLOCK(sc);
1021		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1022			lagg_setflags(lp, 1);
1023		}
1024		LAGG_WUNLOCK(sc);
1025
1026		if (!(ifp->if_flags & IFF_UP) &&
1027		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1028			/*
1029			 * If interface is marked down and it is running,
1030			 * then stop and disable it.
1031			 */
1032			LAGG_WLOCK(sc);
1033			lagg_stop(sc);
1034			LAGG_WUNLOCK(sc);
1035		} else if ((ifp->if_flags & IFF_UP) &&
1036		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1037			/*
1038			 * If interface is marked up and it is stopped, then
1039			 * start it.
1040			 */
1041			(*ifp->if_init)(sc);
1042		}
1043		break;
1044	case SIOCADDMULTI:
1045	case SIOCDELMULTI:
1046		LAGG_WLOCK(sc);
1047		error = lagg_ether_setmulti(sc);
1048		LAGG_WUNLOCK(sc);
1049		break;
1050	case SIOCSIFMEDIA:
1051	case SIOCGIFMEDIA:
1052		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1053		break;
1054
1055	case SIOCSIFCAP:
1056	case SIOCSIFMTU:
1057		/* Do not allow the MTU or caps to be directly changed */
1058		error = EINVAL;
1059		break;
1060
1061	default:
1062		error = ether_ioctl(ifp, cmd, data);
1063		break;
1064	}
1065	return (error);
1066}
1067
1068static int
1069lagg_ether_setmulti(struct lagg_softc *sc)
1070{
1071	struct lagg_port *lp;
1072
1073	LAGG_WLOCK_ASSERT(sc);
1074
1075	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1076		/* First, remove any existing filter entries. */
1077		lagg_ether_cmdmulti(lp, 0);
1078		/* copy all addresses from the lagg interface to the port */
1079		lagg_ether_cmdmulti(lp, 1);
1080	}
1081	return (0);
1082}
1083
1084static int
1085lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1086{
1087	struct lagg_softc *sc = lp->lp_softc;
1088	struct ifnet *ifp = lp->lp_ifp;
1089	struct ifnet *scifp = sc->sc_ifp;
1090	struct lagg_mc *mc;
1091	struct ifmultiaddr *ifma, *rifma = NULL;
1092	struct sockaddr_dl sdl;
1093	int error;
1094
1095	LAGG_WLOCK_ASSERT(sc);
1096
1097	bzero((char *)&sdl, sizeof(sdl));
1098	sdl.sdl_len = sizeof(sdl);
1099	sdl.sdl_family = AF_LINK;
1100	sdl.sdl_type = IFT_ETHER;
1101	sdl.sdl_alen = ETHER_ADDR_LEN;
1102	sdl.sdl_index = ifp->if_index;
1103
1104	if (set) {
1105		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1106			if (ifma->ifma_addr->sa_family != AF_LINK)
1107				continue;
1108			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1109			    LLADDR(&sdl), ETHER_ADDR_LEN);
1110
1111			error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
1112			if (error)
1113				return (error);
1114			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1115			if (mc == NULL)
1116				return (ENOMEM);
1117			mc->mc_ifma = rifma;
1118			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1119		}
1120	} else {
1121		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1122			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1123			if_delmulti_ifma(mc->mc_ifma);
1124			free(mc, M_DEVBUF);
1125		}
1126	}
1127	return (0);
1128}
1129
1130/* Handle a ref counted flag that should be set on the lagg port as well */
1131static int
1132lagg_setflag(struct lagg_port *lp, int flag, int status,
1133	     int (*func)(struct ifnet *, int))
1134{
1135	struct lagg_softc *sc = lp->lp_softc;
1136	struct ifnet *scifp = sc->sc_ifp;
1137	struct ifnet *ifp = lp->lp_ifp;
1138	int error;
1139
1140	LAGG_WLOCK_ASSERT(sc);
1141
1142	status = status ? (scifp->if_flags & flag) : 0;
1143	/* Now "status" contains the flag value or 0 */
1144
1145	/*
1146	 * See if recorded ports status is different from what
1147	 * we want it to be.  If it is, flip it.  We record ports
1148	 * status in lp_ifflags so that we won't clear ports flag
1149	 * we haven't set.  In fact, we don't clear or set ports
1150	 * flags directly, but get or release references to them.
1151	 * That's why we can be sure that recorded flags still are
1152	 * in accord with actual ports flags.
1153	 */
1154	if (status != (lp->lp_ifflags & flag)) {
1155		error = (*func)(ifp, status);
1156		if (error)
1157			return (error);
1158		lp->lp_ifflags &= ~flag;
1159		lp->lp_ifflags |= status;
1160	}
1161	return (0);
1162}
1163
1164/*
1165 * Handle IFF_* flags that require certain changes on the lagg port
1166 * if "status" is true, update ports flags respective to the lagg
1167 * if "status" is false, forcedly clear the flags set on port.
1168 */
1169static int
1170lagg_setflags(struct lagg_port *lp, int status)
1171{
1172	int error, i;
1173
1174	for (i = 0; lagg_pflags[i].flag; i++) {
1175		error = lagg_setflag(lp, lagg_pflags[i].flag,
1176		    status, lagg_pflags[i].func);
1177		if (error)
1178			return (error);
1179	}
1180	return (0);
1181}
1182
1183static void
1184lagg_start(struct ifnet *ifp)
1185{
1186	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1187	struct mbuf *m;
1188	int error = 0;
1189
1190	LAGG_RLOCK(sc);
1191	/* We need a Tx algorithm and at least one port */
1192	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1193		IF_DRAIN(&ifp->if_snd);
1194		LAGG_RUNLOCK(sc);
1195		return;
1196	}
1197
1198	for (;; error = 0) {
1199		IFQ_DEQUEUE(&ifp->if_snd, m);
1200		if (m == NULL)
1201			break;
1202
1203		ETHER_BPF_MTAP(ifp, m);
1204
1205		error = (*sc->sc_start)(sc, m);
1206		if (error == 0)
1207			ifp->if_opackets++;
1208		else
1209			ifp->if_oerrors++;
1210	}
1211	LAGG_RUNLOCK(sc);
1212}
1213
1214static struct mbuf *
1215lagg_input(struct ifnet *ifp, struct mbuf *m)
1216{
1217	struct lagg_port *lp = ifp->if_lagg;
1218	struct lagg_softc *sc = lp->lp_softc;
1219	struct ifnet *scifp = sc->sc_ifp;
1220
1221	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1222	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
1223	    sc->sc_proto == LAGG_PROTO_NONE) {
1224		m_freem(m);
1225		return (NULL);
1226	}
1227
1228	LAGG_RLOCK(sc);
1229	ETHER_BPF_MTAP(scifp, m);
1230
1231	m = (*sc->sc_input)(sc, lp, m);
1232
1233	if (m != NULL) {
1234		scifp->if_ipackets++;
1235		scifp->if_ibytes += m->m_pkthdr.len;
1236
1237		if (scifp->if_flags & IFF_MONITOR) {
1238			m_freem(m);
1239			m = NULL;
1240		}
1241	}
1242
1243	LAGG_RUNLOCK(sc);
1244	return (m);
1245}
1246
1247static int
1248lagg_media_change(struct ifnet *ifp)
1249{
1250	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1251
1252	if (sc->sc_ifflags & IFF_DEBUG)
1253		printf("%s\n", __func__);
1254
1255	/* Ignore */
1256	return (0);
1257}
1258
1259static void
1260lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1261{
1262	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1263	struct lagg_port *lp;
1264
1265	imr->ifm_status = IFM_AVALID;
1266	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1267
1268	LAGG_RLOCK(sc);
1269	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1270		if (LAGG_PORTACTIVE(lp))
1271			imr->ifm_status |= IFM_ACTIVE;
1272	}
1273	LAGG_RUNLOCK(sc);
1274}
1275
1276static void
1277lagg_linkstate(struct lagg_softc *sc)
1278{
1279	struct lagg_port *lp;
1280	int new_link = LINK_STATE_DOWN;
1281	uint64_t speed;
1282
1283	/* Our link is considered up if at least one of our ports is active */
1284	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1285		if (lp->lp_link_state == LINK_STATE_UP) {
1286			new_link = LINK_STATE_UP;
1287			break;
1288		}
1289	}
1290	if_link_state_change(sc->sc_ifp, new_link);
1291
1292	/* Update if_baudrate to reflect the max possible speed */
1293	switch (sc->sc_proto) {
1294		case LAGG_PROTO_FAILOVER:
1295			sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1296			    sc->sc_primary->lp_ifp->if_baudrate : 0;
1297			break;
1298		case LAGG_PROTO_ROUNDROBIN:
1299		case LAGG_PROTO_LOADBALANCE:
1300		case LAGG_PROTO_ETHERCHANNEL:
1301			speed = 0;
1302			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1303				speed += lp->lp_ifp->if_baudrate;
1304			sc->sc_ifp->if_baudrate = speed;
1305			break;
1306		case LAGG_PROTO_LACP:
1307			/* LACP updates if_baudrate itself */
1308			break;
1309	}
1310}
1311
1312static void
1313lagg_port_state(struct ifnet *ifp, int state)
1314{
1315	struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1316	struct lagg_softc *sc = NULL;
1317
1318	if (lp != NULL)
1319		sc = lp->lp_softc;
1320	if (sc == NULL)
1321		return;
1322
1323	LAGG_WLOCK(sc);
1324	lagg_linkstate(sc);
1325	if (sc->sc_linkstate != NULL)
1326		(*sc->sc_linkstate)(lp);
1327	LAGG_WUNLOCK(sc);
1328}
1329
1330struct lagg_port *
1331lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1332{
1333	struct lagg_port *lp_next, *rval = NULL;
1334	// int new_link = LINK_STATE_DOWN;
1335
1336	LAGG_RLOCK_ASSERT(sc);
1337	/*
1338	 * Search a port which reports an active link state.
1339	 */
1340
1341	if (lp == NULL)
1342		goto search;
1343	if (LAGG_PORTACTIVE(lp)) {
1344		rval = lp;
1345		goto found;
1346	}
1347	if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1348	    LAGG_PORTACTIVE(lp_next)) {
1349		rval = lp_next;
1350		goto found;
1351	}
1352
1353search:
1354	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1355		if (LAGG_PORTACTIVE(lp_next)) {
1356			rval = lp_next;
1357			goto found;
1358		}
1359	}
1360
1361found:
1362	if (rval != NULL) {
1363		/*
1364		 * The IEEE 802.1D standard assumes that a lagg with
1365		 * multiple ports is always full duplex. This is valid
1366		 * for load sharing laggs and if at least two links
1367		 * are active. Unfortunately, checking the latter would
1368		 * be too expensive at this point.
1369		 XXX
1370		if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1371		    (sc->sc_count > 1))
1372			new_link = LINK_STATE_FULL_DUPLEX;
1373		else
1374			new_link = rval->lp_link_state;
1375		 */
1376	}
1377
1378	return (rval);
1379}
1380
1381static const void *
1382lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1383{
1384	if (m->m_pkthdr.len < (off + len)) {
1385		return (NULL);
1386	} else if (m->m_len < (off + len)) {
1387		m_copydata(m, off, len, buf);
1388		return (buf);
1389	}
1390	return (mtod(m, char *) + off);
1391}
1392
1393uint32_t
1394lagg_hashmbuf(struct mbuf *m, uint32_t key)
1395{
1396	uint16_t etype;
1397	uint32_t p = 0;
1398	int off;
1399	struct ether_header *eh;
1400	struct ether_vlan_header vlanbuf;
1401	const struct ether_vlan_header *vlan;
1402#ifdef INET
1403	const struct ip *ip;
1404	struct ip ipbuf;
1405#endif
1406#ifdef INET6
1407	const struct ip6_hdr *ip6;
1408	struct ip6_hdr ip6buf;
1409	uint32_t flow;
1410#endif
1411
1412	off = sizeof(*eh);
1413	if (m->m_len < off)
1414		goto out;
1415	eh = mtod(m, struct ether_header *);
1416	etype = ntohs(eh->ether_type);
1417	p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
1418	p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1419
1420	/* Special handling for encapsulating VLAN frames */
1421	if (m->m_flags & M_VLANTAG) {
1422		p = hash32_buf(&m->m_pkthdr.ether_vtag,
1423		    sizeof(m->m_pkthdr.ether_vtag), p);
1424	} else if (etype == ETHERTYPE_VLAN) {
1425		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &vlanbuf);
1426		if (vlan == NULL)
1427			goto out;
1428
1429		p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1430		etype = ntohs(vlan->evl_proto);
1431		off += sizeof(*vlan) - sizeof(*eh);
1432	}
1433
1434	switch (etype) {
1435#ifdef INET
1436	case ETHERTYPE_IP:
1437		ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf);
1438		if (ip == NULL)
1439			goto out;
1440
1441		p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1442		p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1443		break;
1444#endif
1445#ifdef INET6
1446	case ETHERTYPE_IPV6:
1447		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf);
1448		if (ip6 == NULL)
1449			goto out;
1450
1451		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1452		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1453		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1454		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
1455		break;
1456#endif
1457	}
1458out:
1459	return (p);
1460}
1461
1462int
1463lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1464{
1465
1466	return (ifp->if_transmit)(ifp, m);
1467}
1468
1469/*
1470 * Simple round robin aggregation
1471 */
1472
1473static int
1474lagg_rr_attach(struct lagg_softc *sc)
1475{
1476	sc->sc_detach = lagg_rr_detach;
1477	sc->sc_start = lagg_rr_start;
1478	sc->sc_input = lagg_rr_input;
1479	sc->sc_port_create = NULL;
1480	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1481	sc->sc_seq = 0;
1482
1483	return (0);
1484}
1485
1486static int
1487lagg_rr_detach(struct lagg_softc *sc)
1488{
1489	return (0);
1490}
1491
1492static int
1493lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1494{
1495	struct lagg_port *lp;
1496	uint32_t p;
1497
1498	p = atomic_fetchadd_32(&sc->sc_seq, 1);
1499	p %= sc->sc_count;
1500	lp = SLIST_FIRST(&sc->sc_ports);
1501	while (p--)
1502		lp = SLIST_NEXT(lp, lp_entries);
1503
1504	/*
1505	 * Check the port's link state. This will return the next active
1506	 * port if the link is down or the port is NULL.
1507	 */
1508	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1509		m_freem(m);
1510		return (ENOENT);
1511	}
1512
1513	/* Send mbuf */
1514	return (lagg_enqueue(lp->lp_ifp, m));
1515}
1516
1517static struct mbuf *
1518lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1519{
1520	struct ifnet *ifp = sc->sc_ifp;
1521
1522	/* Just pass in the packet to our lagg device */
1523	m->m_pkthdr.rcvif = ifp;
1524
1525	return (m);
1526}
1527
1528/*
1529 * Active failover
1530 */
1531
1532static int
1533lagg_fail_attach(struct lagg_softc *sc)
1534{
1535	sc->sc_detach = lagg_fail_detach;
1536	sc->sc_start = lagg_fail_start;
1537	sc->sc_input = lagg_fail_input;
1538	sc->sc_port_create = NULL;
1539	sc->sc_port_destroy = NULL;
1540
1541	return (0);
1542}
1543
1544static int
1545lagg_fail_detach(struct lagg_softc *sc)
1546{
1547	return (0);
1548}
1549
1550static int
1551lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1552{
1553	struct lagg_port *lp;
1554
1555	/* Use the master port if active or the next available port */
1556	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1557		m_freem(m);
1558		return (ENOENT);
1559	}
1560
1561	/* Send mbuf */
1562	return (lagg_enqueue(lp->lp_ifp, m));
1563}
1564
1565static struct mbuf *
1566lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1567{
1568	struct ifnet *ifp = sc->sc_ifp;
1569	struct lagg_port *tmp_tp;
1570
1571	if (lp == sc->sc_primary || lagg_failover_rx_all) {
1572		m->m_pkthdr.rcvif = ifp;
1573		return (m);
1574	}
1575
1576	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1577		tmp_tp = lagg_link_active(sc, sc->sc_primary);
1578		/*
1579		 * If tmp_tp is null, we've recieved a packet when all
1580		 * our links are down. Weird, but process it anyways.
1581		 */
1582		if ((tmp_tp == NULL || tmp_tp == lp)) {
1583			m->m_pkthdr.rcvif = ifp;
1584			return (m);
1585		}
1586	}
1587
1588	m_freem(m);
1589	return (NULL);
1590}
1591
1592/*
1593 * Loadbalancing
1594 */
1595
1596static int
1597lagg_lb_attach(struct lagg_softc *sc)
1598{
1599	struct lagg_port *lp;
1600	struct lagg_lb *lb;
1601
1602	if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1603	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1604		return (ENOMEM);
1605
1606	sc->sc_detach = lagg_lb_detach;
1607	sc->sc_start = lagg_lb_start;
1608	sc->sc_input = lagg_lb_input;
1609	sc->sc_port_create = lagg_lb_port_create;
1610	sc->sc_port_destroy = lagg_lb_port_destroy;
1611	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1612
1613	lb->lb_key = arc4random();
1614	sc->sc_psc = (caddr_t)lb;
1615
1616	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1617		lagg_lb_port_create(lp);
1618
1619	return (0);
1620}
1621
1622static int
1623lagg_lb_detach(struct lagg_softc *sc)
1624{
1625	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1626	if (lb != NULL)
1627		free(lb, M_DEVBUF);
1628	return (0);
1629}
1630
1631static int
1632lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1633{
1634	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1635	struct lagg_port *lp_next;
1636	int i = 0;
1637
1638	bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1639	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1640		if (lp_next == lp)
1641			continue;
1642		if (i >= LAGG_MAX_PORTS)
1643			return (EINVAL);
1644		if (sc->sc_ifflags & IFF_DEBUG)
1645			printf("%s: port %s at index %d\n",
1646			    sc->sc_ifname, lp_next->lp_ifname, i);
1647		lb->lb_ports[i++] = lp_next;
1648	}
1649
1650	return (0);
1651}
1652
1653static int
1654lagg_lb_port_create(struct lagg_port *lp)
1655{
1656	struct lagg_softc *sc = lp->lp_softc;
1657	return (lagg_lb_porttable(sc, NULL));
1658}
1659
1660static void
1661lagg_lb_port_destroy(struct lagg_port *lp)
1662{
1663	struct lagg_softc *sc = lp->lp_softc;
1664	lagg_lb_porttable(sc, lp);
1665}
1666
1667static int
1668lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1669{
1670	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1671	struct lagg_port *lp = NULL;
1672	uint32_t p = 0;
1673
1674	if (m->m_flags & M_FLOWID)
1675		p = m->m_pkthdr.flowid;
1676	else
1677		p = lagg_hashmbuf(m, lb->lb_key);
1678	p %= sc->sc_count;
1679	lp = lb->lb_ports[p];
1680
1681	/*
1682	 * Check the port's link state. This will return the next active
1683	 * port if the link is down or the port is NULL.
1684	 */
1685	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1686		m_freem(m);
1687		return (ENOENT);
1688	}
1689
1690	/* Send mbuf */
1691	return (lagg_enqueue(lp->lp_ifp, m));
1692}
1693
1694static struct mbuf *
1695lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1696{
1697	struct ifnet *ifp = sc->sc_ifp;
1698
1699	/* Just pass in the packet to our lagg device */
1700	m->m_pkthdr.rcvif = ifp;
1701
1702	return (m);
1703}
1704
1705/*
1706 * 802.3ad LACP
1707 */
1708
1709static int
1710lagg_lacp_attach(struct lagg_softc *sc)
1711{
1712	struct lagg_port *lp;
1713	int error;
1714
1715	sc->sc_detach = lagg_lacp_detach;
1716	sc->sc_port_create = lacp_port_create;
1717	sc->sc_port_destroy = lacp_port_destroy;
1718	sc->sc_linkstate = lacp_linkstate;
1719	sc->sc_start = lagg_lacp_start;
1720	sc->sc_input = lagg_lacp_input;
1721	sc->sc_init = lacp_init;
1722	sc->sc_stop = lacp_stop;
1723	sc->sc_lladdr = lagg_lacp_lladdr;
1724	sc->sc_req = lacp_req;
1725	sc->sc_portreq = lacp_portreq;
1726
1727	error = lacp_attach(sc);
1728	if (error)
1729		return (error);
1730
1731	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1732		lacp_port_create(lp);
1733
1734	return (error);
1735}
1736
1737static int
1738lagg_lacp_detach(struct lagg_softc *sc)
1739{
1740	struct lagg_port *lp;
1741	int error;
1742
1743	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1744		lacp_port_destroy(lp);
1745
1746	/* unlocking is safe here */
1747	LAGG_WUNLOCK(sc);
1748	error = lacp_detach(sc);
1749	LAGG_WLOCK(sc);
1750
1751	return (error);
1752}
1753
1754static void
1755lagg_lacp_lladdr(struct lagg_softc *sc)
1756{
1757	struct lagg_port *lp;
1758
1759	/* purge all the lacp ports */
1760	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1761		lacp_port_destroy(lp);
1762
1763	/* add them back in */
1764	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1765		lacp_port_create(lp);
1766}
1767
1768static int
1769lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1770{
1771	struct lagg_port *lp;
1772
1773	lp = lacp_select_tx_port(sc, m);
1774	if (lp == NULL) {
1775		m_freem(m);
1776		return (EBUSY);
1777	}
1778
1779	/* Send mbuf */
1780	return (lagg_enqueue(lp->lp_ifp, m));
1781}
1782
1783static struct mbuf *
1784lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1785{
1786	struct ifnet *ifp = sc->sc_ifp;
1787	struct ether_header *eh;
1788	u_short etype;
1789
1790	eh = mtod(m, struct ether_header *);
1791	etype = ntohs(eh->ether_type);
1792
1793	/* Tap off LACP control messages */
1794	if (etype == ETHERTYPE_SLOW) {
1795		m = lacp_input(lp, m);
1796		if (m == NULL)
1797			return (NULL);
1798	}
1799
1800	/*
1801	 * If the port is not collecting or not in the active aggregator then
1802	 * free and return.
1803	 */
1804	if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
1805		m_freem(m);
1806		return (NULL);
1807	}
1808
1809	m->m_pkthdr.rcvif = ifp;
1810	return (m);
1811}
1812