Deleted Added
full compact
if_lagg.c (267992) if_lagg.c (269492)
1/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
2
3/*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/cdefs.h>
1/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
2
3/*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: head/sys/net/if_lagg.c 267992 2014-06-28 03:56:17Z hselasky $");
21__FBSDID("$FreeBSD: head/sys/net/if_lagg.c 269492 2014-08-04 00:58:12Z mav $");
22
23#include "opt_inet.h"
24#include "opt_inet6.h"
25
26#include <sys/param.h>
27#include <sys/kernel.h>
28#include <sys/malloc.h>
29#include <sys/mbuf.h>
30#include <sys/queue.h>
31#include <sys/socket.h>
32#include <sys/sockio.h>
33#include <sys/sysctl.h>
34#include <sys/module.h>
35#include <sys/priv.h>
36#include <sys/systm.h>
37#include <sys/proc.h>
38#include <sys/hash.h>
39#include <sys/lock.h>
40#include <sys/rmlock.h>
41#include <sys/taskqueue.h>
42#include <sys/eventhandler.h>
43
44#include <net/ethernet.h>
45#include <net/if.h>
46#include <net/if_clone.h>
47#include <net/if_arp.h>
48#include <net/if_dl.h>
49#include <net/if_llc.h>
50#include <net/if_media.h>
51#include <net/if_types.h>
52#include <net/if_var.h>
53#include <net/bpf.h>
54
55#if defined(INET) || defined(INET6)
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#endif
59#ifdef INET
60#include <netinet/in_systm.h>
61#include <netinet/if_ether.h>
62#endif
63
64#ifdef INET6
65#include <netinet/ip6.h>
66#include <netinet6/in6_var.h>
67#include <netinet6/in6_ifattach.h>
68#endif
69
70#include <net/if_vlan_var.h>
71#include <net/if_lagg.h>
72#include <net/ieee8023ad_lacp.h>
73
74/* Special flags we should propagate to the lagg ports. */
75static struct {
76 int flag;
77 int (*func)(struct ifnet *, int);
78} lagg_pflags[] = {
79 {IFF_PROMISC, ifpromisc},
80 {IFF_ALLMULTI, if_allmulti},
81 {0, NULL}
82};
83
84SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
85static struct mtx lagg_list_mtx;
86eventhandler_tag lagg_detach_cookie = NULL;
87
88static int lagg_clone_create(struct if_clone *, int, caddr_t);
89static void lagg_clone_destroy(struct ifnet *);
90static struct if_clone *lagg_cloner;
91static const char laggname[] = "lagg";
92
93static void lagg_lladdr(struct lagg_softc *, uint8_t *);
94static void lagg_capabilities(struct lagg_softc *);
95static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
96static void lagg_port_setlladdr(void *, int);
97static int lagg_port_create(struct lagg_softc *, struct ifnet *);
98static int lagg_port_destroy(struct lagg_port *, int);
99static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
100static void lagg_linkstate(struct lagg_softc *);
101static void lagg_port_state(struct ifnet *, int);
102static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
103static int lagg_port_output(struct ifnet *, struct mbuf *,
104 const struct sockaddr *, struct route *);
105static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
106#ifdef LAGG_PORT_STACKING
107static int lagg_port_checkstacking(struct lagg_softc *);
108#endif
109static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
110static void lagg_init(void *);
111static void lagg_stop(struct lagg_softc *);
112static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
113static int lagg_ether_setmulti(struct lagg_softc *);
114static int lagg_ether_cmdmulti(struct lagg_port *, int);
115static int lagg_setflag(struct lagg_port *, int, int,
116 int (*func)(struct ifnet *, int));
117static int lagg_setflags(struct lagg_port *, int status);
118static int lagg_transmit(struct ifnet *, struct mbuf *);
119static void lagg_qflush(struct ifnet *);
120static int lagg_media_change(struct ifnet *);
121static void lagg_media_status(struct ifnet *, struct ifmediareq *);
122static struct lagg_port *lagg_link_active(struct lagg_softc *,
123 struct lagg_port *);
124static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
125static int lagg_sysctl_active(SYSCTL_HANDLER_ARGS);
126
127/* Simple round robin */
128static int lagg_rr_attach(struct lagg_softc *);
129static int lagg_rr_detach(struct lagg_softc *);
130static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
131static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
132 struct mbuf *);
133
134/* Active failover */
135static int lagg_fail_attach(struct lagg_softc *);
136static int lagg_fail_detach(struct lagg_softc *);
137static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
138static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
139 struct mbuf *);
140
141/* Loadbalancing */
142static int lagg_lb_attach(struct lagg_softc *);
143static int lagg_lb_detach(struct lagg_softc *);
144static int lagg_lb_port_create(struct lagg_port *);
145static void lagg_lb_port_destroy(struct lagg_port *);
146static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
147static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
148 struct mbuf *);
149static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
150
151/* 802.3ad LACP */
152static int lagg_lacp_attach(struct lagg_softc *);
153static int lagg_lacp_detach(struct lagg_softc *);
154static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
155static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
156 struct mbuf *);
157static void lagg_lacp_lladdr(struct lagg_softc *);
158
159static void lagg_callout(void *);
160
161/* lagg protocol table */
162static const struct {
163 int ti_proto;
164 int (*ti_attach)(struct lagg_softc *);
165} lagg_protos[] = {
166 { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
167 { LAGG_PROTO_FAILOVER, lagg_fail_attach },
168 { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
169 { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
170 { LAGG_PROTO_LACP, lagg_lacp_attach },
171 { LAGG_PROTO_NONE, NULL }
172};
173
174SYSCTL_DECL(_net_link);
175SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
176 "Link Aggregation");
177
178static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
179SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
180 &lagg_failover_rx_all, 0,
181 "Accept input from any interface in a failover lagg");
182static int def_use_flowid = 1; /* Default value for using M_FLOWID */
183SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
184 &def_use_flowid, 0,
185 "Default setting for using flow id for load sharing");
186static int def_flowid_shift = 16; /* Default value for using M_FLOWID */
187SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
188 &def_flowid_shift, 0,
189 "Default setting for flowid shift for load sharing");
190
191static int
192lagg_modevent(module_t mod, int type, void *data)
193{
194
195 switch (type) {
196 case MOD_LOAD:
197 mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
198 SLIST_INIT(&lagg_list);
199 lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
200 lagg_clone_destroy, 0);
201 lagg_input_p = lagg_input;
202 lagg_linkstate_p = lagg_port_state;
203 lagg_detach_cookie = EVENTHANDLER_REGISTER(
204 ifnet_departure_event, lagg_port_ifdetach, NULL,
205 EVENTHANDLER_PRI_ANY);
206 break;
207 case MOD_UNLOAD:
208 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
209 lagg_detach_cookie);
210 if_clone_detach(lagg_cloner);
211 lagg_input_p = NULL;
212 lagg_linkstate_p = NULL;
213 mtx_destroy(&lagg_list_mtx);
214 break;
215 default:
216 return (EOPNOTSUPP);
217 }
218 return (0);
219}
220
221static moduledata_t lagg_mod = {
222 "if_lagg",
223 lagg_modevent,
224 0
225};
226
227DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
228MODULE_VERSION(if_lagg, 1);
229
230/*
231 * This routine is run via an vlan
232 * config EVENT
233 */
234static void
235lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
236{
237 struct lagg_softc *sc = ifp->if_softc;
238 struct lagg_port *lp;
239 struct rm_priotracker tracker;
240
241 if (ifp->if_softc != arg) /* Not our event */
242 return;
243
244 LAGG_RLOCK(sc, &tracker);
245 if (!SLIST_EMPTY(&sc->sc_ports)) {
246 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
247 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
248 }
249 LAGG_RUNLOCK(sc, &tracker);
250}
251
252/*
253 * This routine is run via an vlan
254 * unconfig EVENT
255 */
256static void
257lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
258{
259 struct lagg_softc *sc = ifp->if_softc;
260 struct lagg_port *lp;
261 struct rm_priotracker tracker;
262
263 if (ifp->if_softc != arg) /* Not our event */
264 return;
265
266 LAGG_RLOCK(sc, &tracker);
267 if (!SLIST_EMPTY(&sc->sc_ports)) {
268 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
269 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
270 }
271 LAGG_RUNLOCK(sc, &tracker);
272}
273
274static int
275lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
276{
277 struct lagg_softc *sc;
278 struct ifnet *ifp;
279 int i, error = 0;
280 static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
281 struct sysctl_oid *oid;
282 char num[14]; /* sufficient for 32 bits */
283
284 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
285 ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
286 if (ifp == NULL) {
287 free(sc, M_DEVBUF);
288 return (ENOSPC);
289 }
290
291 sc->sc_ipackets = counter_u64_alloc(M_WAITOK);
292 sc->sc_opackets = counter_u64_alloc(M_WAITOK);
293 sc->sc_ibytes = counter_u64_alloc(M_WAITOK);
294 sc->sc_obytes = counter_u64_alloc(M_WAITOK);
295
296 sysctl_ctx_init(&sc->ctx);
297 snprintf(num, sizeof(num), "%u", unit);
298 sc->use_flowid = def_use_flowid;
299 sc->flowid_shift = def_flowid_shift;
300 sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx,
301 &SYSCTL_NODE_CHILDREN(_net_link, lagg),
302 OID_AUTO, num, CTLFLAG_RD, NULL, "");
303 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
304 "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid,
305 sc->use_flowid, "Use flow id for load sharing");
306 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
307 "flowid_shift", CTLTYPE_INT|CTLFLAG_RW, &sc->flowid_shift,
308 sc->flowid_shift,
309 "Shift flowid bits to prevent multiqueue collisions");
310 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
311 "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count,
312 "Total number of ports");
313 SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
314 "active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active,
315 "I", "Total number of active ports");
316 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
317 "flapping", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_flapping,
318 sc->sc_flapping, "Total number of port change events");
319 /* Hash all layers by default */
320 sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
321
322 sc->sc_proto = LAGG_PROTO_NONE;
323 for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
324 if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
325 sc->sc_proto = lagg_protos[i].ti_proto;
326 if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
327 if_free(ifp);
328 free(sc, M_DEVBUF);
329 return (error);
330 }
331 break;
332 }
333 }
334 LAGG_LOCK_INIT(sc);
335 LAGG_CALLOUT_LOCK_INIT(sc);
336 SLIST_INIT(&sc->sc_ports);
337 TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
338
339 /*
340 * This uses the callout lock rather than the rmlock; one can't
341 * hold said rmlock during SWI.
342 */
343 callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
344
345 /* Initialise pseudo media types */
346 ifmedia_init(&sc->sc_media, 0, lagg_media_change,
347 lagg_media_status);
348 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
349 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
350
351 if_initname(ifp, laggname, unit);
352 ifp->if_softc = sc;
353 ifp->if_transmit = lagg_transmit;
354 ifp->if_qflush = lagg_qflush;
355 ifp->if_init = lagg_init;
356 ifp->if_ioctl = lagg_ioctl;
357 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
358 ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
359
360 /*
361 * Attach as an ordinary ethernet device, children will be attached
362 * as special device IFT_IEEE8023ADLAG.
363 */
364 ether_ifattach(ifp, eaddr);
365
366 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
367 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
368 sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
369 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
370
371 /* Insert into the global list of laggs */
372 mtx_lock(&lagg_list_mtx);
373 SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
374 mtx_unlock(&lagg_list_mtx);
375
376 callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
377
378 return (0);
379}
380
381static void
382lagg_clone_destroy(struct ifnet *ifp)
383{
384 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
385 struct lagg_port *lp;
386
387 LAGG_WLOCK(sc);
388
389 lagg_stop(sc);
390 ifp->if_flags &= ~IFF_UP;
391
392 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
393 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
394
395 /* Shutdown and remove lagg ports */
396 while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
397 lagg_port_destroy(lp, 1);
398 /* Unhook the aggregation protocol */
399 if (sc->sc_detach != NULL)
400 (*sc->sc_detach)(sc);
401
402 LAGG_WUNLOCK(sc);
403
404 sysctl_ctx_free(&sc->ctx);
405 ifmedia_removeall(&sc->sc_media);
406 ether_ifdetach(ifp);
407 if_free(ifp);
408
409 /* This grabs sc_callout_mtx, serialising it correctly */
410 callout_drain(&sc->sc_callout);
411
412 /* At this point it's drained; we can free this */
413 counter_u64_free(sc->sc_ipackets);
414 counter_u64_free(sc->sc_opackets);
415 counter_u64_free(sc->sc_ibytes);
416 counter_u64_free(sc->sc_obytes);
417
418 mtx_lock(&lagg_list_mtx);
419 SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
420 mtx_unlock(&lagg_list_mtx);
421
422 taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
423 LAGG_LOCK_DESTROY(sc);
424 LAGG_CALLOUT_LOCK_DESTROY(sc);
425 free(sc, M_DEVBUF);
426}
427
428static void
429lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
430{
431 struct ifnet *ifp = sc->sc_ifp;
432
433 if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
434 return;
435
436 bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
437 /* Let the protocol know the MAC has changed */
438 if (sc->sc_lladdr != NULL)
439 (*sc->sc_lladdr)(sc);
440 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
441}
442
443static void
444lagg_capabilities(struct lagg_softc *sc)
445{
446 struct lagg_port *lp;
447 int cap = ~0, ena = ~0;
448 u_long hwa = ~0UL;
449#if defined(INET) || defined(INET6)
450 u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */
451#else
452 u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */
453#endif
454
455 LAGG_WLOCK_ASSERT(sc);
456
457 /* Get capabilities from the lagg ports */
458 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
459 cap &= lp->lp_ifp->if_capabilities;
460 ena &= lp->lp_ifp->if_capenable;
461 hwa &= lp->lp_ifp->if_hwassist;
462 /* Set to the minimum value of the lagg ports. */
463 if (lp->lp_ifp->if_hw_tsomax < hw_tsomax &&
464 lp->lp_ifp->if_hw_tsomax > 0)
465 hw_tsomax = lp->lp_ifp->if_hw_tsomax;
466 }
467 cap = (cap == ~0 ? 0 : cap);
468 ena = (ena == ~0 ? 0 : ena);
469 hwa = (hwa == ~0 ? 0 : hwa);
470
471 if (sc->sc_ifp->if_capabilities != cap ||
472 sc->sc_ifp->if_capenable != ena ||
473 sc->sc_ifp->if_hwassist != hwa ||
474 sc->sc_ifp->if_hw_tsomax != hw_tsomax) {
475 sc->sc_ifp->if_capabilities = cap;
476 sc->sc_ifp->if_capenable = ena;
477 sc->sc_ifp->if_hwassist = hwa;
478 sc->sc_ifp->if_hw_tsomax = hw_tsomax;
479 getmicrotime(&sc->sc_ifp->if_lastchange);
480
481 if (sc->sc_ifflags & IFF_DEBUG)
482 if_printf(sc->sc_ifp,
483 "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
484 }
485}
486
487static void
488lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
489{
490 struct lagg_softc *sc = lp->lp_softc;
491 struct ifnet *ifp = lp->lp_ifp;
492 struct lagg_llq *llq;
493 int pending = 0;
494
495 LAGG_WLOCK_ASSERT(sc);
496
497 if (lp->lp_detaching ||
498 memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
499 return;
500
501 /* Check to make sure its not already queued to be changed */
502 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
503 if (llq->llq_ifp == ifp) {
504 pending = 1;
505 break;
506 }
507 }
508
509 if (!pending) {
510 llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
511 if (llq == NULL) /* XXX what to do */
512 return;
513 }
514
515 /* Update the lladdr even if pending, it may have changed */
516 llq->llq_ifp = ifp;
517 bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
518
519 if (!pending)
520 SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
521
522 taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
523}
524
525/*
526 * Set the interface MAC address from a taskqueue to avoid a LOR.
527 */
528static void
529lagg_port_setlladdr(void *arg, int pending)
530{
531 struct lagg_softc *sc = (struct lagg_softc *)arg;
532 struct lagg_llq *llq, *head;
533 struct ifnet *ifp;
534 int error;
535
536 /* Grab a local reference of the queue and remove it from the softc */
537 LAGG_WLOCK(sc);
538 head = SLIST_FIRST(&sc->sc_llq_head);
539 SLIST_FIRST(&sc->sc_llq_head) = NULL;
540 LAGG_WUNLOCK(sc);
541
542 /*
543 * Traverse the queue and set the lladdr on each ifp. It is safe to do
544 * unlocked as we have the only reference to it.
545 */
546 for (llq = head; llq != NULL; llq = head) {
547 ifp = llq->llq_ifp;
548
549 /* Set the link layer address */
550 CURVNET_SET(ifp->if_vnet);
551 error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
552 CURVNET_RESTORE();
553 if (error)
554 printf("%s: setlladdr failed on %s\n", __func__,
555 ifp->if_xname);
556
557 head = SLIST_NEXT(llq, llq_entries);
558 free(llq, M_DEVBUF);
559 }
560}
561
562static int
563lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
564{
565 struct lagg_softc *sc_ptr;
566 struct lagg_port *lp;
567 int error = 0;
568
569 LAGG_WLOCK_ASSERT(sc);
570
571 /* Limit the maximal number of lagg ports */
572 if (sc->sc_count >= LAGG_MAX_PORTS)
573 return (ENOSPC);
574
575 /* Check if port has already been associated to a lagg */
576 if (ifp->if_lagg != NULL) {
577 /* Port is already in the current lagg? */
578 lp = (struct lagg_port *)ifp->if_lagg;
579 if (lp->lp_softc == sc)
580 return (EEXIST);
581 return (EBUSY);
582 }
583
584 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
585 if (ifp->if_type != IFT_ETHER)
586 return (EPROTONOSUPPORT);
587
588#ifdef INET6
589 /*
590 * The member interface should not have inet6 address because
591 * two interfaces with a valid link-local scope zone must not be
592 * merged in any form. This restriction is needed to
593 * prevent violation of link-local scope zone. Attempts to
594 * add a member interface which has inet6 addresses triggers
595 * removal of all inet6 addresses on the member interface.
596 */
597 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
598 if (in6ifa_llaonifp(lp->lp_ifp)) {
599 in6_ifdetach(lp->lp_ifp);
600 if_printf(sc->sc_ifp,
601 "IPv6 addresses on %s have been removed "
602 "before adding it as a member to prevent "
603 "IPv6 address scope violation.\n",
604 lp->lp_ifp->if_xname);
605 }
606 }
607 if (in6ifa_llaonifp(ifp)) {
608 in6_ifdetach(ifp);
609 if_printf(sc->sc_ifp,
610 "IPv6 addresses on %s have been removed "
611 "before adding it as a member to prevent "
612 "IPv6 address scope violation.\n",
613 ifp->if_xname);
614 }
615#endif
616 /* Allow the first Ethernet member to define the MTU */
617 if (SLIST_EMPTY(&sc->sc_ports))
618 sc->sc_ifp->if_mtu = ifp->if_mtu;
619 else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
620 if_printf(sc->sc_ifp, "invalid MTU for %s\n",
621 ifp->if_xname);
622 return (EINVAL);
623 }
624
625 if ((lp = malloc(sizeof(struct lagg_port),
626 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
627 return (ENOMEM);
628
629 /* Check if port is a stacked lagg */
630 mtx_lock(&lagg_list_mtx);
631 SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
632 if (ifp == sc_ptr->sc_ifp) {
633 mtx_unlock(&lagg_list_mtx);
634 free(lp, M_DEVBUF);
635 return (EINVAL);
636 /* XXX disable stacking for the moment, its untested */
637#ifdef LAGG_PORT_STACKING
638 lp->lp_flags |= LAGG_PORT_STACK;
639 if (lagg_port_checkstacking(sc_ptr) >=
640 LAGG_MAX_STACKING) {
641 mtx_unlock(&lagg_list_mtx);
642 free(lp, M_DEVBUF);
643 return (E2BIG);
644 }
645#endif
646 }
647 }
648 mtx_unlock(&lagg_list_mtx);
649
650 /* Change the interface type */
651 lp->lp_iftype = ifp->if_type;
652 ifp->if_type = IFT_IEEE8023ADLAG;
653 ifp->if_lagg = lp;
654 lp->lp_ioctl = ifp->if_ioctl;
655 ifp->if_ioctl = lagg_port_ioctl;
656 lp->lp_output = ifp->if_output;
657 ifp->if_output = lagg_port_output;
658
659 lp->lp_ifp = ifp;
660 lp->lp_softc = sc;
661
662 /* Save port link layer address */
663 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
664
665 if (SLIST_EMPTY(&sc->sc_ports)) {
666 sc->sc_primary = lp;
667 lagg_lladdr(sc, IF_LLADDR(ifp));
668 } else {
669 /* Update link layer address for this port */
670 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
671 }
672
673 /* Insert into the list of ports */
674 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
675 sc->sc_count++;
676
677 /* Update lagg capabilities */
678 lagg_capabilities(sc);
679 lagg_linkstate(sc);
680
681 /* Add multicast addresses and interface flags to this port */
682 lagg_ether_cmdmulti(lp, 1);
683 lagg_setflags(lp, 1);
684
685 if (sc->sc_port_create != NULL)
686 error = (*sc->sc_port_create)(lp);
687 if (error) {
688 /* remove the port again, without calling sc_port_destroy */
689 lagg_port_destroy(lp, 0);
690 return (error);
691 }
692
693 return (error);
694}
695
696#ifdef LAGG_PORT_STACKING
697static int
698lagg_port_checkstacking(struct lagg_softc *sc)
699{
700 struct lagg_softc *sc_ptr;
701 struct lagg_port *lp;
702 int m = 0;
703
704 LAGG_WLOCK_ASSERT(sc);
705
706 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
707 if (lp->lp_flags & LAGG_PORT_STACK) {
708 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
709 m = MAX(m, lagg_port_checkstacking(sc_ptr));
710 }
711 }
712
713 return (m + 1);
714}
715#endif
716
717static int
718lagg_port_destroy(struct lagg_port *lp, int runpd)
719{
720 struct lagg_softc *sc = lp->lp_softc;
721 struct lagg_port *lp_ptr;
722 struct lagg_llq *llq;
723 struct ifnet *ifp = lp->lp_ifp;
724
725 LAGG_WLOCK_ASSERT(sc);
726
727 if (runpd && sc->sc_port_destroy != NULL)
728 (*sc->sc_port_destroy)(lp);
729
730 /*
731 * Remove multicast addresses and interface flags from this port and
732 * reset the MAC address, skip if the interface is being detached.
733 */
734 if (!lp->lp_detaching) {
735 lagg_ether_cmdmulti(lp, 0);
736 lagg_setflags(lp, 0);
737 lagg_port_lladdr(lp, lp->lp_lladdr);
738 }
739
740 /* Restore interface */
741 ifp->if_type = lp->lp_iftype;
742 ifp->if_ioctl = lp->lp_ioctl;
743 ifp->if_output = lp->lp_output;
744 ifp->if_lagg = NULL;
745
746 /* Finally, remove the port from the lagg */
747 SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
748 sc->sc_count--;
749
750 /* Update the primary interface */
751 if (lp == sc->sc_primary) {
752 uint8_t lladdr[ETHER_ADDR_LEN];
753
754 if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
755 bzero(&lladdr, ETHER_ADDR_LEN);
756 } else {
757 bcopy(lp_ptr->lp_lladdr,
758 lladdr, ETHER_ADDR_LEN);
759 }
760 lagg_lladdr(sc, lladdr);
761 sc->sc_primary = lp_ptr;
762
763 /* Update link layer address for each port */
764 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
765 lagg_port_lladdr(lp_ptr, lladdr);
766 }
767
768 /* Remove any pending lladdr changes from the queue */
769 if (lp->lp_detaching) {
770 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
771 if (llq->llq_ifp == ifp) {
772 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
773 llq_entries);
774 free(llq, M_DEVBUF);
775 break; /* Only appears once */
776 }
777 }
778 }
779
780 if (lp->lp_ifflags)
781 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
782
783 free(lp, M_DEVBUF);
784
785 /* Update lagg capabilities */
786 lagg_capabilities(sc);
787 lagg_linkstate(sc);
788
789 return (0);
790}
791
792static int
793lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
794{
795 struct lagg_reqport *rp = (struct lagg_reqport *)data;
796 struct lagg_softc *sc;
797 struct lagg_port *lp = NULL;
798 int error = 0;
799 struct rm_priotracker tracker;
800
801 /* Should be checked by the caller */
802 if (ifp->if_type != IFT_IEEE8023ADLAG ||
803 (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
804 goto fallback;
805
806 switch (cmd) {
807 case SIOCGLAGGPORT:
808 if (rp->rp_portname[0] == '\0' ||
809 ifunit(rp->rp_portname) != ifp) {
810 error = EINVAL;
811 break;
812 }
813
814 LAGG_RLOCK(sc, &tracker);
815 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
816 error = ENOENT;
817 LAGG_RUNLOCK(sc, &tracker);
818 break;
819 }
820
821 lagg_port2req(lp, rp);
822 LAGG_RUNLOCK(sc, &tracker);
823 break;
824
825 case SIOCSIFCAP:
826 if (lp->lp_ioctl == NULL) {
827 error = EINVAL;
828 break;
829 }
830 error = (*lp->lp_ioctl)(ifp, cmd, data);
831 if (error)
832 break;
833
834 /* Update lagg interface capabilities */
835 LAGG_WLOCK(sc);
836 lagg_capabilities(sc);
837 LAGG_WUNLOCK(sc);
838 break;
839
840 case SIOCSIFMTU:
841 /* Do not allow the MTU to be changed once joined */
842 error = EINVAL;
843 break;
844
845 default:
846 goto fallback;
847 }
848
849 return (error);
850
851fallback:
852 if (lp->lp_ioctl != NULL)
853 return ((*lp->lp_ioctl)(ifp, cmd, data));
854
855 return (EINVAL);
856}
857
858/*
859 * For direct output to child ports.
860 */
861static int
862lagg_port_output(struct ifnet *ifp, struct mbuf *m,
863 const struct sockaddr *dst, struct route *ro)
864{
865 struct lagg_port *lp = ifp->if_lagg;
866
867 switch (dst->sa_family) {
868 case pseudo_AF_HDRCMPLT:
869 case AF_UNSPEC:
870 return ((*lp->lp_output)(ifp, m, dst, ro));
871 }
872
873 /* drop any other frames */
874 m_freem(m);
875 return (ENETDOWN);
876}
877
878static void
879lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
880{
881 struct lagg_port *lp;
882 struct lagg_softc *sc;
883
884 if ((lp = ifp->if_lagg) == NULL)
885 return;
886 /* If the ifnet is just being renamed, don't do anything. */
887 if (ifp->if_flags & IFF_RENAMING)
888 return;
889
890 sc = lp->lp_softc;
891
892 LAGG_WLOCK(sc);
893 lp->lp_detaching = 1;
894 lagg_port_destroy(lp, 1);
895 LAGG_WUNLOCK(sc);
896}
897
898static void
899lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
900{
901 struct lagg_softc *sc = lp->lp_softc;
902
903 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
904 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
905 rp->rp_prio = lp->lp_prio;
906 rp->rp_flags = lp->lp_flags;
907 if (sc->sc_portreq != NULL)
908 (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
909
910 /* Add protocol specific flags */
911 switch (sc->sc_proto) {
912 case LAGG_PROTO_FAILOVER:
913 if (lp == sc->sc_primary)
914 rp->rp_flags |= LAGG_PORT_MASTER;
915 if (lp == lagg_link_active(sc, sc->sc_primary))
916 rp->rp_flags |= LAGG_PORT_ACTIVE;
917 break;
918
919 case LAGG_PROTO_ROUNDROBIN:
920 case LAGG_PROTO_LOADBALANCE:
921 case LAGG_PROTO_ETHERCHANNEL:
922 if (LAGG_PORTACTIVE(lp))
923 rp->rp_flags |= LAGG_PORT_ACTIVE;
924 break;
925
926 case LAGG_PROTO_LACP:
927 /* LACP has a different definition of active */
928 if (lacp_isactive(lp))
929 rp->rp_flags |= LAGG_PORT_ACTIVE;
930 if (lacp_iscollecting(lp))
931 rp->rp_flags |= LAGG_PORT_COLLECTING;
932 if (lacp_isdistributing(lp))
933 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
934 break;
935 }
936
937}
938
939static void
940lagg_init(void *xsc)
941{
942 struct lagg_softc *sc = (struct lagg_softc *)xsc;
943 struct lagg_port *lp;
944 struct ifnet *ifp = sc->sc_ifp;
945
946 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
947 return;
948
949 LAGG_WLOCK(sc);
950
951 ifp->if_drv_flags |= IFF_DRV_RUNNING;
952 /* Update the port lladdrs */
953 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
954 lagg_port_lladdr(lp, IF_LLADDR(ifp));
955
956 if (sc->sc_init != NULL)
957 (*sc->sc_init)(sc);
958
959 LAGG_WUNLOCK(sc);
960}
961
962static void
963lagg_stop(struct lagg_softc *sc)
964{
965 struct ifnet *ifp = sc->sc_ifp;
966
967 LAGG_WLOCK_ASSERT(sc);
968
969 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
970 return;
971
972 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973
974 if (sc->sc_stop != NULL)
975 (*sc->sc_stop)(sc);
976}
977
978static int
979lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
980{
981 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
982 struct lagg_reqall *ra = (struct lagg_reqall *)data;
983 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
984 struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
985 struct ifreq *ifr = (struct ifreq *)data;
986 struct lagg_port *lp;
987 struct ifnet *tpif;
988 struct thread *td = curthread;
989 char *buf, *outbuf;
990 int count, buflen, len, error = 0;
991 struct rm_priotracker tracker;
992
993 bzero(&rpbuf, sizeof(rpbuf));
994
995 switch (cmd) {
996 case SIOCGLAGG:
997 LAGG_RLOCK(sc, &tracker);
998 count = 0;
999 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1000 count++;
1001 buflen = count * sizeof(struct lagg_reqport);
1002 LAGG_RUNLOCK(sc, &tracker);
1003
1004 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
1005
1006 LAGG_RLOCK(sc, &tracker);
1007 ra->ra_proto = sc->sc_proto;
1008 if (sc->sc_req != NULL)
1009 (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
1010
1011 count = 0;
1012 buf = outbuf;
1013 len = min(ra->ra_size, buflen);
1014 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1015 if (len < sizeof(rpbuf))
1016 break;
1017
1018 lagg_port2req(lp, &rpbuf);
1019 memcpy(buf, &rpbuf, sizeof(rpbuf));
1020 count++;
1021 buf += sizeof(rpbuf);
1022 len -= sizeof(rpbuf);
1023 }
1024 LAGG_RUNLOCK(sc, &tracker);
1025 ra->ra_ports = count;
1026 ra->ra_size = count * sizeof(rpbuf);
1027 error = copyout(outbuf, ra->ra_port, ra->ra_size);
1028 free(outbuf, M_TEMP);
1029 break;
1030 case SIOCSLAGG:
1031 error = priv_check(td, PRIV_NET_LAGG);
1032 if (error)
1033 break;
1034 if (ra->ra_proto >= LAGG_PROTO_MAX) {
1035 error = EPROTONOSUPPORT;
1036 break;
1037 }
1038 LAGG_WLOCK(sc);
1039 if (sc->sc_proto != LAGG_PROTO_NONE) {
1040 /* Reset protocol first in case detach unlocks */
1041 sc->sc_proto = LAGG_PROTO_NONE;
1042 error = sc->sc_detach(sc);
1043 sc->sc_detach = NULL;
1044 sc->sc_start = NULL;
1045 sc->sc_input = NULL;
1046 sc->sc_port_create = NULL;
1047 sc->sc_port_destroy = NULL;
1048 sc->sc_linkstate = NULL;
1049 sc->sc_init = NULL;
1050 sc->sc_stop = NULL;
1051 sc->sc_lladdr = NULL;
1052 sc->sc_req = NULL;
1053 sc->sc_portreq = NULL;
1054 } else if (sc->sc_input != NULL) {
1055 /* Still detaching */
1056 error = EBUSY;
1057 }
1058 if (error != 0) {
1059 LAGG_WUNLOCK(sc);
1060 break;
1061 }
1062 for (int i = 0; i < (sizeof(lagg_protos) /
1063 sizeof(lagg_protos[0])); i++) {
1064 if (lagg_protos[i].ti_proto == ra->ra_proto) {
1065 if (sc->sc_ifflags & IFF_DEBUG)
1066 printf("%s: using proto %u\n",
1067 sc->sc_ifname,
1068 lagg_protos[i].ti_proto);
1069 sc->sc_proto = lagg_protos[i].ti_proto;
1070 if (sc->sc_proto != LAGG_PROTO_NONE)
1071 error = lagg_protos[i].ti_attach(sc);
1072 LAGG_WUNLOCK(sc);
1073 return (error);
1074 }
1075 }
1076 LAGG_WUNLOCK(sc);
1077 error = EPROTONOSUPPORT;
1078 break;
1079 case SIOCGLAGGFLAGS:
1080 rf->rf_flags = sc->sc_flags;
1081 break;
1082 case SIOCSLAGGHASH:
1083 error = priv_check(td, PRIV_NET_LAGG);
1084 if (error)
1085 break;
1086 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
1087 error = EINVAL;
1088 break;
1089 }
1090 LAGG_WLOCK(sc);
1091 sc->sc_flags &= ~LAGG_F_HASHMASK;
1092 sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
1093 LAGG_WUNLOCK(sc);
1094 break;
1095 case SIOCGLAGGPORT:
1096 if (rp->rp_portname[0] == '\0' ||
1097 (tpif = ifunit(rp->rp_portname)) == NULL) {
1098 error = EINVAL;
1099 break;
1100 }
1101
1102 LAGG_RLOCK(sc, &tracker);
1103 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1104 lp->lp_softc != sc) {
1105 error = ENOENT;
1106 LAGG_RUNLOCK(sc, &tracker);
1107 break;
1108 }
1109
1110 lagg_port2req(lp, rp);
1111 LAGG_RUNLOCK(sc, &tracker);
1112 break;
1113 case SIOCSLAGGPORT:
1114 error = priv_check(td, PRIV_NET_LAGG);
1115 if (error)
1116 break;
1117 if (rp->rp_portname[0] == '\0' ||
1118 (tpif = ifunit(rp->rp_portname)) == NULL) {
1119 error = EINVAL;
1120 break;
1121 }
1122 LAGG_WLOCK(sc);
1123 error = lagg_port_create(sc, tpif);
1124 LAGG_WUNLOCK(sc);
1125 break;
1126 case SIOCSLAGGDELPORT:
1127 error = priv_check(td, PRIV_NET_LAGG);
1128 if (error)
1129 break;
1130 if (rp->rp_portname[0] == '\0' ||
1131 (tpif = ifunit(rp->rp_portname)) == NULL) {
1132 error = EINVAL;
1133 break;
1134 }
1135
1136 LAGG_WLOCK(sc);
1137 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1138 lp->lp_softc != sc) {
1139 error = ENOENT;
1140 LAGG_WUNLOCK(sc);
1141 break;
1142 }
1143
1144 error = lagg_port_destroy(lp, 1);
1145 LAGG_WUNLOCK(sc);
1146 break;
1147 case SIOCSIFFLAGS:
1148 /* Set flags on ports too */
1149 LAGG_WLOCK(sc);
1150 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1151 lagg_setflags(lp, 1);
1152 }
1153 LAGG_WUNLOCK(sc);
1154
1155 if (!(ifp->if_flags & IFF_UP) &&
1156 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1157 /*
1158 * If interface is marked down and it is running,
1159 * then stop and disable it.
1160 */
1161 LAGG_WLOCK(sc);
1162 lagg_stop(sc);
1163 LAGG_WUNLOCK(sc);
1164 } else if ((ifp->if_flags & IFF_UP) &&
1165 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1166 /*
1167 * If interface is marked up and it is stopped, then
1168 * start it.
1169 */
1170 (*ifp->if_init)(sc);
1171 }
1172 break;
1173 case SIOCADDMULTI:
1174 case SIOCDELMULTI:
1175 LAGG_WLOCK(sc);
1176 error = lagg_ether_setmulti(sc);
1177 LAGG_WUNLOCK(sc);
1178 break;
1179 case SIOCSIFMEDIA:
1180 case SIOCGIFMEDIA:
1181 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1182 break;
1183
1184 case SIOCSIFCAP:
1185 case SIOCSIFMTU:
1186 /* Do not allow the MTU or caps to be directly changed */
1187 error = EINVAL;
1188 break;
1189
1190 default:
1191 error = ether_ioctl(ifp, cmd, data);
1192 break;
1193 }
1194 return (error);
1195}
1196
1197static int
1198lagg_ether_setmulti(struct lagg_softc *sc)
1199{
1200 struct lagg_port *lp;
1201
1202 LAGG_WLOCK_ASSERT(sc);
1203
1204 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1205 /* First, remove any existing filter entries. */
1206 lagg_ether_cmdmulti(lp, 0);
1207 /* copy all addresses from the lagg interface to the port */
1208 lagg_ether_cmdmulti(lp, 1);
1209 }
1210 return (0);
1211}
1212
1213static int
1214lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1215{
1216 struct lagg_softc *sc = lp->lp_softc;
1217 struct ifnet *ifp = lp->lp_ifp;
1218 struct ifnet *scifp = sc->sc_ifp;
1219 struct lagg_mc *mc;
22
23#include "opt_inet.h"
24#include "opt_inet6.h"
25
26#include <sys/param.h>
27#include <sys/kernel.h>
28#include <sys/malloc.h>
29#include <sys/mbuf.h>
30#include <sys/queue.h>
31#include <sys/socket.h>
32#include <sys/sockio.h>
33#include <sys/sysctl.h>
34#include <sys/module.h>
35#include <sys/priv.h>
36#include <sys/systm.h>
37#include <sys/proc.h>
38#include <sys/hash.h>
39#include <sys/lock.h>
40#include <sys/rmlock.h>
41#include <sys/taskqueue.h>
42#include <sys/eventhandler.h>
43
44#include <net/ethernet.h>
45#include <net/if.h>
46#include <net/if_clone.h>
47#include <net/if_arp.h>
48#include <net/if_dl.h>
49#include <net/if_llc.h>
50#include <net/if_media.h>
51#include <net/if_types.h>
52#include <net/if_var.h>
53#include <net/bpf.h>
54
55#if defined(INET) || defined(INET6)
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#endif
59#ifdef INET
60#include <netinet/in_systm.h>
61#include <netinet/if_ether.h>
62#endif
63
64#ifdef INET6
65#include <netinet/ip6.h>
66#include <netinet6/in6_var.h>
67#include <netinet6/in6_ifattach.h>
68#endif
69
70#include <net/if_vlan_var.h>
71#include <net/if_lagg.h>
72#include <net/ieee8023ad_lacp.h>
73
74/* Special flags we should propagate to the lagg ports. */
75static struct {
76 int flag;
77 int (*func)(struct ifnet *, int);
78} lagg_pflags[] = {
79 {IFF_PROMISC, ifpromisc},
80 {IFF_ALLMULTI, if_allmulti},
81 {0, NULL}
82};
83
84SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
85static struct mtx lagg_list_mtx;
86eventhandler_tag lagg_detach_cookie = NULL;
87
88static int lagg_clone_create(struct if_clone *, int, caddr_t);
89static void lagg_clone_destroy(struct ifnet *);
90static struct if_clone *lagg_cloner;
91static const char laggname[] = "lagg";
92
93static void lagg_lladdr(struct lagg_softc *, uint8_t *);
94static void lagg_capabilities(struct lagg_softc *);
95static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
96static void lagg_port_setlladdr(void *, int);
97static int lagg_port_create(struct lagg_softc *, struct ifnet *);
98static int lagg_port_destroy(struct lagg_port *, int);
99static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
100static void lagg_linkstate(struct lagg_softc *);
101static void lagg_port_state(struct ifnet *, int);
102static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
103static int lagg_port_output(struct ifnet *, struct mbuf *,
104 const struct sockaddr *, struct route *);
105static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
106#ifdef LAGG_PORT_STACKING
107static int lagg_port_checkstacking(struct lagg_softc *);
108#endif
109static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
110static void lagg_init(void *);
111static void lagg_stop(struct lagg_softc *);
112static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
113static int lagg_ether_setmulti(struct lagg_softc *);
114static int lagg_ether_cmdmulti(struct lagg_port *, int);
115static int lagg_setflag(struct lagg_port *, int, int,
116 int (*func)(struct ifnet *, int));
117static int lagg_setflags(struct lagg_port *, int status);
118static int lagg_transmit(struct ifnet *, struct mbuf *);
119static void lagg_qflush(struct ifnet *);
120static int lagg_media_change(struct ifnet *);
121static void lagg_media_status(struct ifnet *, struct ifmediareq *);
122static struct lagg_port *lagg_link_active(struct lagg_softc *,
123 struct lagg_port *);
124static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
125static int lagg_sysctl_active(SYSCTL_HANDLER_ARGS);
126
127/* Simple round robin */
128static int lagg_rr_attach(struct lagg_softc *);
129static int lagg_rr_detach(struct lagg_softc *);
130static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
131static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
132 struct mbuf *);
133
134/* Active failover */
135static int lagg_fail_attach(struct lagg_softc *);
136static int lagg_fail_detach(struct lagg_softc *);
137static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
138static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
139 struct mbuf *);
140
141/* Loadbalancing */
142static int lagg_lb_attach(struct lagg_softc *);
143static int lagg_lb_detach(struct lagg_softc *);
144static int lagg_lb_port_create(struct lagg_port *);
145static void lagg_lb_port_destroy(struct lagg_port *);
146static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
147static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
148 struct mbuf *);
149static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
150
151/* 802.3ad LACP */
152static int lagg_lacp_attach(struct lagg_softc *);
153static int lagg_lacp_detach(struct lagg_softc *);
154static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
155static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
156 struct mbuf *);
157static void lagg_lacp_lladdr(struct lagg_softc *);
158
159static void lagg_callout(void *);
160
161/* lagg protocol table */
162static const struct {
163 int ti_proto;
164 int (*ti_attach)(struct lagg_softc *);
165} lagg_protos[] = {
166 { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
167 { LAGG_PROTO_FAILOVER, lagg_fail_attach },
168 { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
169 { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
170 { LAGG_PROTO_LACP, lagg_lacp_attach },
171 { LAGG_PROTO_NONE, NULL }
172};
173
174SYSCTL_DECL(_net_link);
175SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
176 "Link Aggregation");
177
178static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
179SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
180 &lagg_failover_rx_all, 0,
181 "Accept input from any interface in a failover lagg");
182static int def_use_flowid = 1; /* Default value for using M_FLOWID */
183SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
184 &def_use_flowid, 0,
185 "Default setting for using flow id for load sharing");
186static int def_flowid_shift = 16; /* Default value for using M_FLOWID */
187SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
188 &def_flowid_shift, 0,
189 "Default setting for flowid shift for load sharing");
190
191static int
192lagg_modevent(module_t mod, int type, void *data)
193{
194
195 switch (type) {
196 case MOD_LOAD:
197 mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
198 SLIST_INIT(&lagg_list);
199 lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
200 lagg_clone_destroy, 0);
201 lagg_input_p = lagg_input;
202 lagg_linkstate_p = lagg_port_state;
203 lagg_detach_cookie = EVENTHANDLER_REGISTER(
204 ifnet_departure_event, lagg_port_ifdetach, NULL,
205 EVENTHANDLER_PRI_ANY);
206 break;
207 case MOD_UNLOAD:
208 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
209 lagg_detach_cookie);
210 if_clone_detach(lagg_cloner);
211 lagg_input_p = NULL;
212 lagg_linkstate_p = NULL;
213 mtx_destroy(&lagg_list_mtx);
214 break;
215 default:
216 return (EOPNOTSUPP);
217 }
218 return (0);
219}
220
221static moduledata_t lagg_mod = {
222 "if_lagg",
223 lagg_modevent,
224 0
225};
226
227DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
228MODULE_VERSION(if_lagg, 1);
229
230/*
231 * This routine is run via an vlan
232 * config EVENT
233 */
234static void
235lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
236{
237 struct lagg_softc *sc = ifp->if_softc;
238 struct lagg_port *lp;
239 struct rm_priotracker tracker;
240
241 if (ifp->if_softc != arg) /* Not our event */
242 return;
243
244 LAGG_RLOCK(sc, &tracker);
245 if (!SLIST_EMPTY(&sc->sc_ports)) {
246 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
247 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
248 }
249 LAGG_RUNLOCK(sc, &tracker);
250}
251
252/*
253 * This routine is run via an vlan
254 * unconfig EVENT
255 */
256static void
257lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
258{
259 struct lagg_softc *sc = ifp->if_softc;
260 struct lagg_port *lp;
261 struct rm_priotracker tracker;
262
263 if (ifp->if_softc != arg) /* Not our event */
264 return;
265
266 LAGG_RLOCK(sc, &tracker);
267 if (!SLIST_EMPTY(&sc->sc_ports)) {
268 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
269 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
270 }
271 LAGG_RUNLOCK(sc, &tracker);
272}
273
274static int
275lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
276{
277 struct lagg_softc *sc;
278 struct ifnet *ifp;
279 int i, error = 0;
280 static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
281 struct sysctl_oid *oid;
282 char num[14]; /* sufficient for 32 bits */
283
284 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
285 ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
286 if (ifp == NULL) {
287 free(sc, M_DEVBUF);
288 return (ENOSPC);
289 }
290
291 sc->sc_ipackets = counter_u64_alloc(M_WAITOK);
292 sc->sc_opackets = counter_u64_alloc(M_WAITOK);
293 sc->sc_ibytes = counter_u64_alloc(M_WAITOK);
294 sc->sc_obytes = counter_u64_alloc(M_WAITOK);
295
296 sysctl_ctx_init(&sc->ctx);
297 snprintf(num, sizeof(num), "%u", unit);
298 sc->use_flowid = def_use_flowid;
299 sc->flowid_shift = def_flowid_shift;
300 sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx,
301 &SYSCTL_NODE_CHILDREN(_net_link, lagg),
302 OID_AUTO, num, CTLFLAG_RD, NULL, "");
303 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
304 "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid,
305 sc->use_flowid, "Use flow id for load sharing");
306 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
307 "flowid_shift", CTLTYPE_INT|CTLFLAG_RW, &sc->flowid_shift,
308 sc->flowid_shift,
309 "Shift flowid bits to prevent multiqueue collisions");
310 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
311 "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count,
312 "Total number of ports");
313 SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
314 "active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active,
315 "I", "Total number of active ports");
316 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
317 "flapping", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_flapping,
318 sc->sc_flapping, "Total number of port change events");
319 /* Hash all layers by default */
320 sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
321
322 sc->sc_proto = LAGG_PROTO_NONE;
323 for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
324 if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
325 sc->sc_proto = lagg_protos[i].ti_proto;
326 if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
327 if_free(ifp);
328 free(sc, M_DEVBUF);
329 return (error);
330 }
331 break;
332 }
333 }
334 LAGG_LOCK_INIT(sc);
335 LAGG_CALLOUT_LOCK_INIT(sc);
336 SLIST_INIT(&sc->sc_ports);
337 TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
338
339 /*
340 * This uses the callout lock rather than the rmlock; one can't
341 * hold said rmlock during SWI.
342 */
343 callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
344
345 /* Initialise pseudo media types */
346 ifmedia_init(&sc->sc_media, 0, lagg_media_change,
347 lagg_media_status);
348 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
349 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
350
351 if_initname(ifp, laggname, unit);
352 ifp->if_softc = sc;
353 ifp->if_transmit = lagg_transmit;
354 ifp->if_qflush = lagg_qflush;
355 ifp->if_init = lagg_init;
356 ifp->if_ioctl = lagg_ioctl;
357 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
358 ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
359
360 /*
361 * Attach as an ordinary ethernet device, children will be attached
362 * as special device IFT_IEEE8023ADLAG.
363 */
364 ether_ifattach(ifp, eaddr);
365
366 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
367 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
368 sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
369 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
370
371 /* Insert into the global list of laggs */
372 mtx_lock(&lagg_list_mtx);
373 SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
374 mtx_unlock(&lagg_list_mtx);
375
376 callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
377
378 return (0);
379}
380
381static void
382lagg_clone_destroy(struct ifnet *ifp)
383{
384 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
385 struct lagg_port *lp;
386
387 LAGG_WLOCK(sc);
388
389 lagg_stop(sc);
390 ifp->if_flags &= ~IFF_UP;
391
392 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
393 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
394
395 /* Shutdown and remove lagg ports */
396 while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
397 lagg_port_destroy(lp, 1);
398 /* Unhook the aggregation protocol */
399 if (sc->sc_detach != NULL)
400 (*sc->sc_detach)(sc);
401
402 LAGG_WUNLOCK(sc);
403
404 sysctl_ctx_free(&sc->ctx);
405 ifmedia_removeall(&sc->sc_media);
406 ether_ifdetach(ifp);
407 if_free(ifp);
408
409 /* This grabs sc_callout_mtx, serialising it correctly */
410 callout_drain(&sc->sc_callout);
411
412 /* At this point it's drained; we can free this */
413 counter_u64_free(sc->sc_ipackets);
414 counter_u64_free(sc->sc_opackets);
415 counter_u64_free(sc->sc_ibytes);
416 counter_u64_free(sc->sc_obytes);
417
418 mtx_lock(&lagg_list_mtx);
419 SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
420 mtx_unlock(&lagg_list_mtx);
421
422 taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
423 LAGG_LOCK_DESTROY(sc);
424 LAGG_CALLOUT_LOCK_DESTROY(sc);
425 free(sc, M_DEVBUF);
426}
427
428static void
429lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
430{
431 struct ifnet *ifp = sc->sc_ifp;
432
433 if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
434 return;
435
436 bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
437 /* Let the protocol know the MAC has changed */
438 if (sc->sc_lladdr != NULL)
439 (*sc->sc_lladdr)(sc);
440 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
441}
442
443static void
444lagg_capabilities(struct lagg_softc *sc)
445{
446 struct lagg_port *lp;
447 int cap = ~0, ena = ~0;
448 u_long hwa = ~0UL;
449#if defined(INET) || defined(INET6)
450 u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */
451#else
452 u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */
453#endif
454
455 LAGG_WLOCK_ASSERT(sc);
456
457 /* Get capabilities from the lagg ports */
458 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
459 cap &= lp->lp_ifp->if_capabilities;
460 ena &= lp->lp_ifp->if_capenable;
461 hwa &= lp->lp_ifp->if_hwassist;
462 /* Set to the minimum value of the lagg ports. */
463 if (lp->lp_ifp->if_hw_tsomax < hw_tsomax &&
464 lp->lp_ifp->if_hw_tsomax > 0)
465 hw_tsomax = lp->lp_ifp->if_hw_tsomax;
466 }
467 cap = (cap == ~0 ? 0 : cap);
468 ena = (ena == ~0 ? 0 : ena);
469 hwa = (hwa == ~0 ? 0 : hwa);
470
471 if (sc->sc_ifp->if_capabilities != cap ||
472 sc->sc_ifp->if_capenable != ena ||
473 sc->sc_ifp->if_hwassist != hwa ||
474 sc->sc_ifp->if_hw_tsomax != hw_tsomax) {
475 sc->sc_ifp->if_capabilities = cap;
476 sc->sc_ifp->if_capenable = ena;
477 sc->sc_ifp->if_hwassist = hwa;
478 sc->sc_ifp->if_hw_tsomax = hw_tsomax;
479 getmicrotime(&sc->sc_ifp->if_lastchange);
480
481 if (sc->sc_ifflags & IFF_DEBUG)
482 if_printf(sc->sc_ifp,
483 "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
484 }
485}
486
487static void
488lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
489{
490 struct lagg_softc *sc = lp->lp_softc;
491 struct ifnet *ifp = lp->lp_ifp;
492 struct lagg_llq *llq;
493 int pending = 0;
494
495 LAGG_WLOCK_ASSERT(sc);
496
497 if (lp->lp_detaching ||
498 memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
499 return;
500
501 /* Check to make sure its not already queued to be changed */
502 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
503 if (llq->llq_ifp == ifp) {
504 pending = 1;
505 break;
506 }
507 }
508
509 if (!pending) {
510 llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
511 if (llq == NULL) /* XXX what to do */
512 return;
513 }
514
515 /* Update the lladdr even if pending, it may have changed */
516 llq->llq_ifp = ifp;
517 bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
518
519 if (!pending)
520 SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
521
522 taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
523}
524
525/*
526 * Set the interface MAC address from a taskqueue to avoid a LOR.
527 */
528static void
529lagg_port_setlladdr(void *arg, int pending)
530{
531 struct lagg_softc *sc = (struct lagg_softc *)arg;
532 struct lagg_llq *llq, *head;
533 struct ifnet *ifp;
534 int error;
535
536 /* Grab a local reference of the queue and remove it from the softc */
537 LAGG_WLOCK(sc);
538 head = SLIST_FIRST(&sc->sc_llq_head);
539 SLIST_FIRST(&sc->sc_llq_head) = NULL;
540 LAGG_WUNLOCK(sc);
541
542 /*
543 * Traverse the queue and set the lladdr on each ifp. It is safe to do
544 * unlocked as we have the only reference to it.
545 */
546 for (llq = head; llq != NULL; llq = head) {
547 ifp = llq->llq_ifp;
548
549 /* Set the link layer address */
550 CURVNET_SET(ifp->if_vnet);
551 error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
552 CURVNET_RESTORE();
553 if (error)
554 printf("%s: setlladdr failed on %s\n", __func__,
555 ifp->if_xname);
556
557 head = SLIST_NEXT(llq, llq_entries);
558 free(llq, M_DEVBUF);
559 }
560}
561
562static int
563lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
564{
565 struct lagg_softc *sc_ptr;
566 struct lagg_port *lp;
567 int error = 0;
568
569 LAGG_WLOCK_ASSERT(sc);
570
571 /* Limit the maximal number of lagg ports */
572 if (sc->sc_count >= LAGG_MAX_PORTS)
573 return (ENOSPC);
574
575 /* Check if port has already been associated to a lagg */
576 if (ifp->if_lagg != NULL) {
577 /* Port is already in the current lagg? */
578 lp = (struct lagg_port *)ifp->if_lagg;
579 if (lp->lp_softc == sc)
580 return (EEXIST);
581 return (EBUSY);
582 }
583
584 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
585 if (ifp->if_type != IFT_ETHER)
586 return (EPROTONOSUPPORT);
587
588#ifdef INET6
589 /*
590 * The member interface should not have inet6 address because
591 * two interfaces with a valid link-local scope zone must not be
592 * merged in any form. This restriction is needed to
593 * prevent violation of link-local scope zone. Attempts to
594 * add a member interface which has inet6 addresses triggers
595 * removal of all inet6 addresses on the member interface.
596 */
597 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
598 if (in6ifa_llaonifp(lp->lp_ifp)) {
599 in6_ifdetach(lp->lp_ifp);
600 if_printf(sc->sc_ifp,
601 "IPv6 addresses on %s have been removed "
602 "before adding it as a member to prevent "
603 "IPv6 address scope violation.\n",
604 lp->lp_ifp->if_xname);
605 }
606 }
607 if (in6ifa_llaonifp(ifp)) {
608 in6_ifdetach(ifp);
609 if_printf(sc->sc_ifp,
610 "IPv6 addresses on %s have been removed "
611 "before adding it as a member to prevent "
612 "IPv6 address scope violation.\n",
613 ifp->if_xname);
614 }
615#endif
616 /* Allow the first Ethernet member to define the MTU */
617 if (SLIST_EMPTY(&sc->sc_ports))
618 sc->sc_ifp->if_mtu = ifp->if_mtu;
619 else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
620 if_printf(sc->sc_ifp, "invalid MTU for %s\n",
621 ifp->if_xname);
622 return (EINVAL);
623 }
624
625 if ((lp = malloc(sizeof(struct lagg_port),
626 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
627 return (ENOMEM);
628
629 /* Check if port is a stacked lagg */
630 mtx_lock(&lagg_list_mtx);
631 SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
632 if (ifp == sc_ptr->sc_ifp) {
633 mtx_unlock(&lagg_list_mtx);
634 free(lp, M_DEVBUF);
635 return (EINVAL);
636 /* XXX disable stacking for the moment, its untested */
637#ifdef LAGG_PORT_STACKING
638 lp->lp_flags |= LAGG_PORT_STACK;
639 if (lagg_port_checkstacking(sc_ptr) >=
640 LAGG_MAX_STACKING) {
641 mtx_unlock(&lagg_list_mtx);
642 free(lp, M_DEVBUF);
643 return (E2BIG);
644 }
645#endif
646 }
647 }
648 mtx_unlock(&lagg_list_mtx);
649
650 /* Change the interface type */
651 lp->lp_iftype = ifp->if_type;
652 ifp->if_type = IFT_IEEE8023ADLAG;
653 ifp->if_lagg = lp;
654 lp->lp_ioctl = ifp->if_ioctl;
655 ifp->if_ioctl = lagg_port_ioctl;
656 lp->lp_output = ifp->if_output;
657 ifp->if_output = lagg_port_output;
658
659 lp->lp_ifp = ifp;
660 lp->lp_softc = sc;
661
662 /* Save port link layer address */
663 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
664
665 if (SLIST_EMPTY(&sc->sc_ports)) {
666 sc->sc_primary = lp;
667 lagg_lladdr(sc, IF_LLADDR(ifp));
668 } else {
669 /* Update link layer address for this port */
670 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
671 }
672
673 /* Insert into the list of ports */
674 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
675 sc->sc_count++;
676
677 /* Update lagg capabilities */
678 lagg_capabilities(sc);
679 lagg_linkstate(sc);
680
681 /* Add multicast addresses and interface flags to this port */
682 lagg_ether_cmdmulti(lp, 1);
683 lagg_setflags(lp, 1);
684
685 if (sc->sc_port_create != NULL)
686 error = (*sc->sc_port_create)(lp);
687 if (error) {
688 /* remove the port again, without calling sc_port_destroy */
689 lagg_port_destroy(lp, 0);
690 return (error);
691 }
692
693 return (error);
694}
695
696#ifdef LAGG_PORT_STACKING
697static int
698lagg_port_checkstacking(struct lagg_softc *sc)
699{
700 struct lagg_softc *sc_ptr;
701 struct lagg_port *lp;
702 int m = 0;
703
704 LAGG_WLOCK_ASSERT(sc);
705
706 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
707 if (lp->lp_flags & LAGG_PORT_STACK) {
708 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
709 m = MAX(m, lagg_port_checkstacking(sc_ptr));
710 }
711 }
712
713 return (m + 1);
714}
715#endif
716
717static int
718lagg_port_destroy(struct lagg_port *lp, int runpd)
719{
720 struct lagg_softc *sc = lp->lp_softc;
721 struct lagg_port *lp_ptr;
722 struct lagg_llq *llq;
723 struct ifnet *ifp = lp->lp_ifp;
724
725 LAGG_WLOCK_ASSERT(sc);
726
727 if (runpd && sc->sc_port_destroy != NULL)
728 (*sc->sc_port_destroy)(lp);
729
730 /*
731 * Remove multicast addresses and interface flags from this port and
732 * reset the MAC address, skip if the interface is being detached.
733 */
734 if (!lp->lp_detaching) {
735 lagg_ether_cmdmulti(lp, 0);
736 lagg_setflags(lp, 0);
737 lagg_port_lladdr(lp, lp->lp_lladdr);
738 }
739
740 /* Restore interface */
741 ifp->if_type = lp->lp_iftype;
742 ifp->if_ioctl = lp->lp_ioctl;
743 ifp->if_output = lp->lp_output;
744 ifp->if_lagg = NULL;
745
746 /* Finally, remove the port from the lagg */
747 SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
748 sc->sc_count--;
749
750 /* Update the primary interface */
751 if (lp == sc->sc_primary) {
752 uint8_t lladdr[ETHER_ADDR_LEN];
753
754 if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
755 bzero(&lladdr, ETHER_ADDR_LEN);
756 } else {
757 bcopy(lp_ptr->lp_lladdr,
758 lladdr, ETHER_ADDR_LEN);
759 }
760 lagg_lladdr(sc, lladdr);
761 sc->sc_primary = lp_ptr;
762
763 /* Update link layer address for each port */
764 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
765 lagg_port_lladdr(lp_ptr, lladdr);
766 }
767
768 /* Remove any pending lladdr changes from the queue */
769 if (lp->lp_detaching) {
770 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
771 if (llq->llq_ifp == ifp) {
772 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
773 llq_entries);
774 free(llq, M_DEVBUF);
775 break; /* Only appears once */
776 }
777 }
778 }
779
780 if (lp->lp_ifflags)
781 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
782
783 free(lp, M_DEVBUF);
784
785 /* Update lagg capabilities */
786 lagg_capabilities(sc);
787 lagg_linkstate(sc);
788
789 return (0);
790}
791
792static int
793lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
794{
795 struct lagg_reqport *rp = (struct lagg_reqport *)data;
796 struct lagg_softc *sc;
797 struct lagg_port *lp = NULL;
798 int error = 0;
799 struct rm_priotracker tracker;
800
801 /* Should be checked by the caller */
802 if (ifp->if_type != IFT_IEEE8023ADLAG ||
803 (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
804 goto fallback;
805
806 switch (cmd) {
807 case SIOCGLAGGPORT:
808 if (rp->rp_portname[0] == '\0' ||
809 ifunit(rp->rp_portname) != ifp) {
810 error = EINVAL;
811 break;
812 }
813
814 LAGG_RLOCK(sc, &tracker);
815 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
816 error = ENOENT;
817 LAGG_RUNLOCK(sc, &tracker);
818 break;
819 }
820
821 lagg_port2req(lp, rp);
822 LAGG_RUNLOCK(sc, &tracker);
823 break;
824
825 case SIOCSIFCAP:
826 if (lp->lp_ioctl == NULL) {
827 error = EINVAL;
828 break;
829 }
830 error = (*lp->lp_ioctl)(ifp, cmd, data);
831 if (error)
832 break;
833
834 /* Update lagg interface capabilities */
835 LAGG_WLOCK(sc);
836 lagg_capabilities(sc);
837 LAGG_WUNLOCK(sc);
838 break;
839
840 case SIOCSIFMTU:
841 /* Do not allow the MTU to be changed once joined */
842 error = EINVAL;
843 break;
844
845 default:
846 goto fallback;
847 }
848
849 return (error);
850
851fallback:
852 if (lp->lp_ioctl != NULL)
853 return ((*lp->lp_ioctl)(ifp, cmd, data));
854
855 return (EINVAL);
856}
857
858/*
859 * For direct output to child ports.
860 */
861static int
862lagg_port_output(struct ifnet *ifp, struct mbuf *m,
863 const struct sockaddr *dst, struct route *ro)
864{
865 struct lagg_port *lp = ifp->if_lagg;
866
867 switch (dst->sa_family) {
868 case pseudo_AF_HDRCMPLT:
869 case AF_UNSPEC:
870 return ((*lp->lp_output)(ifp, m, dst, ro));
871 }
872
873 /* drop any other frames */
874 m_freem(m);
875 return (ENETDOWN);
876}
877
878static void
879lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
880{
881 struct lagg_port *lp;
882 struct lagg_softc *sc;
883
884 if ((lp = ifp->if_lagg) == NULL)
885 return;
886 /* If the ifnet is just being renamed, don't do anything. */
887 if (ifp->if_flags & IFF_RENAMING)
888 return;
889
890 sc = lp->lp_softc;
891
892 LAGG_WLOCK(sc);
893 lp->lp_detaching = 1;
894 lagg_port_destroy(lp, 1);
895 LAGG_WUNLOCK(sc);
896}
897
898static void
899lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
900{
901 struct lagg_softc *sc = lp->lp_softc;
902
903 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
904 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
905 rp->rp_prio = lp->lp_prio;
906 rp->rp_flags = lp->lp_flags;
907 if (sc->sc_portreq != NULL)
908 (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
909
910 /* Add protocol specific flags */
911 switch (sc->sc_proto) {
912 case LAGG_PROTO_FAILOVER:
913 if (lp == sc->sc_primary)
914 rp->rp_flags |= LAGG_PORT_MASTER;
915 if (lp == lagg_link_active(sc, sc->sc_primary))
916 rp->rp_flags |= LAGG_PORT_ACTIVE;
917 break;
918
919 case LAGG_PROTO_ROUNDROBIN:
920 case LAGG_PROTO_LOADBALANCE:
921 case LAGG_PROTO_ETHERCHANNEL:
922 if (LAGG_PORTACTIVE(lp))
923 rp->rp_flags |= LAGG_PORT_ACTIVE;
924 break;
925
926 case LAGG_PROTO_LACP:
927 /* LACP has a different definition of active */
928 if (lacp_isactive(lp))
929 rp->rp_flags |= LAGG_PORT_ACTIVE;
930 if (lacp_iscollecting(lp))
931 rp->rp_flags |= LAGG_PORT_COLLECTING;
932 if (lacp_isdistributing(lp))
933 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
934 break;
935 }
936
937}
938
939static void
940lagg_init(void *xsc)
941{
942 struct lagg_softc *sc = (struct lagg_softc *)xsc;
943 struct lagg_port *lp;
944 struct ifnet *ifp = sc->sc_ifp;
945
946 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
947 return;
948
949 LAGG_WLOCK(sc);
950
951 ifp->if_drv_flags |= IFF_DRV_RUNNING;
952 /* Update the port lladdrs */
953 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
954 lagg_port_lladdr(lp, IF_LLADDR(ifp));
955
956 if (sc->sc_init != NULL)
957 (*sc->sc_init)(sc);
958
959 LAGG_WUNLOCK(sc);
960}
961
962static void
963lagg_stop(struct lagg_softc *sc)
964{
965 struct ifnet *ifp = sc->sc_ifp;
966
967 LAGG_WLOCK_ASSERT(sc);
968
969 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
970 return;
971
972 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973
974 if (sc->sc_stop != NULL)
975 (*sc->sc_stop)(sc);
976}
977
978static int
979lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
980{
981 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
982 struct lagg_reqall *ra = (struct lagg_reqall *)data;
983 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
984 struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
985 struct ifreq *ifr = (struct ifreq *)data;
986 struct lagg_port *lp;
987 struct ifnet *tpif;
988 struct thread *td = curthread;
989 char *buf, *outbuf;
990 int count, buflen, len, error = 0;
991 struct rm_priotracker tracker;
992
993 bzero(&rpbuf, sizeof(rpbuf));
994
995 switch (cmd) {
996 case SIOCGLAGG:
997 LAGG_RLOCK(sc, &tracker);
998 count = 0;
999 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1000 count++;
1001 buflen = count * sizeof(struct lagg_reqport);
1002 LAGG_RUNLOCK(sc, &tracker);
1003
1004 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
1005
1006 LAGG_RLOCK(sc, &tracker);
1007 ra->ra_proto = sc->sc_proto;
1008 if (sc->sc_req != NULL)
1009 (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
1010
1011 count = 0;
1012 buf = outbuf;
1013 len = min(ra->ra_size, buflen);
1014 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1015 if (len < sizeof(rpbuf))
1016 break;
1017
1018 lagg_port2req(lp, &rpbuf);
1019 memcpy(buf, &rpbuf, sizeof(rpbuf));
1020 count++;
1021 buf += sizeof(rpbuf);
1022 len -= sizeof(rpbuf);
1023 }
1024 LAGG_RUNLOCK(sc, &tracker);
1025 ra->ra_ports = count;
1026 ra->ra_size = count * sizeof(rpbuf);
1027 error = copyout(outbuf, ra->ra_port, ra->ra_size);
1028 free(outbuf, M_TEMP);
1029 break;
1030 case SIOCSLAGG:
1031 error = priv_check(td, PRIV_NET_LAGG);
1032 if (error)
1033 break;
1034 if (ra->ra_proto >= LAGG_PROTO_MAX) {
1035 error = EPROTONOSUPPORT;
1036 break;
1037 }
1038 LAGG_WLOCK(sc);
1039 if (sc->sc_proto != LAGG_PROTO_NONE) {
1040 /* Reset protocol first in case detach unlocks */
1041 sc->sc_proto = LAGG_PROTO_NONE;
1042 error = sc->sc_detach(sc);
1043 sc->sc_detach = NULL;
1044 sc->sc_start = NULL;
1045 sc->sc_input = NULL;
1046 sc->sc_port_create = NULL;
1047 sc->sc_port_destroy = NULL;
1048 sc->sc_linkstate = NULL;
1049 sc->sc_init = NULL;
1050 sc->sc_stop = NULL;
1051 sc->sc_lladdr = NULL;
1052 sc->sc_req = NULL;
1053 sc->sc_portreq = NULL;
1054 } else if (sc->sc_input != NULL) {
1055 /* Still detaching */
1056 error = EBUSY;
1057 }
1058 if (error != 0) {
1059 LAGG_WUNLOCK(sc);
1060 break;
1061 }
1062 for (int i = 0; i < (sizeof(lagg_protos) /
1063 sizeof(lagg_protos[0])); i++) {
1064 if (lagg_protos[i].ti_proto == ra->ra_proto) {
1065 if (sc->sc_ifflags & IFF_DEBUG)
1066 printf("%s: using proto %u\n",
1067 sc->sc_ifname,
1068 lagg_protos[i].ti_proto);
1069 sc->sc_proto = lagg_protos[i].ti_proto;
1070 if (sc->sc_proto != LAGG_PROTO_NONE)
1071 error = lagg_protos[i].ti_attach(sc);
1072 LAGG_WUNLOCK(sc);
1073 return (error);
1074 }
1075 }
1076 LAGG_WUNLOCK(sc);
1077 error = EPROTONOSUPPORT;
1078 break;
1079 case SIOCGLAGGFLAGS:
1080 rf->rf_flags = sc->sc_flags;
1081 break;
1082 case SIOCSLAGGHASH:
1083 error = priv_check(td, PRIV_NET_LAGG);
1084 if (error)
1085 break;
1086 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
1087 error = EINVAL;
1088 break;
1089 }
1090 LAGG_WLOCK(sc);
1091 sc->sc_flags &= ~LAGG_F_HASHMASK;
1092 sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
1093 LAGG_WUNLOCK(sc);
1094 break;
1095 case SIOCGLAGGPORT:
1096 if (rp->rp_portname[0] == '\0' ||
1097 (tpif = ifunit(rp->rp_portname)) == NULL) {
1098 error = EINVAL;
1099 break;
1100 }
1101
1102 LAGG_RLOCK(sc, &tracker);
1103 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1104 lp->lp_softc != sc) {
1105 error = ENOENT;
1106 LAGG_RUNLOCK(sc, &tracker);
1107 break;
1108 }
1109
1110 lagg_port2req(lp, rp);
1111 LAGG_RUNLOCK(sc, &tracker);
1112 break;
1113 case SIOCSLAGGPORT:
1114 error = priv_check(td, PRIV_NET_LAGG);
1115 if (error)
1116 break;
1117 if (rp->rp_portname[0] == '\0' ||
1118 (tpif = ifunit(rp->rp_portname)) == NULL) {
1119 error = EINVAL;
1120 break;
1121 }
1122 LAGG_WLOCK(sc);
1123 error = lagg_port_create(sc, tpif);
1124 LAGG_WUNLOCK(sc);
1125 break;
1126 case SIOCSLAGGDELPORT:
1127 error = priv_check(td, PRIV_NET_LAGG);
1128 if (error)
1129 break;
1130 if (rp->rp_portname[0] == '\0' ||
1131 (tpif = ifunit(rp->rp_portname)) == NULL) {
1132 error = EINVAL;
1133 break;
1134 }
1135
1136 LAGG_WLOCK(sc);
1137 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1138 lp->lp_softc != sc) {
1139 error = ENOENT;
1140 LAGG_WUNLOCK(sc);
1141 break;
1142 }
1143
1144 error = lagg_port_destroy(lp, 1);
1145 LAGG_WUNLOCK(sc);
1146 break;
1147 case SIOCSIFFLAGS:
1148 /* Set flags on ports too */
1149 LAGG_WLOCK(sc);
1150 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1151 lagg_setflags(lp, 1);
1152 }
1153 LAGG_WUNLOCK(sc);
1154
1155 if (!(ifp->if_flags & IFF_UP) &&
1156 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1157 /*
1158 * If interface is marked down and it is running,
1159 * then stop and disable it.
1160 */
1161 LAGG_WLOCK(sc);
1162 lagg_stop(sc);
1163 LAGG_WUNLOCK(sc);
1164 } else if ((ifp->if_flags & IFF_UP) &&
1165 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1166 /*
1167 * If interface is marked up and it is stopped, then
1168 * start it.
1169 */
1170 (*ifp->if_init)(sc);
1171 }
1172 break;
1173 case SIOCADDMULTI:
1174 case SIOCDELMULTI:
1175 LAGG_WLOCK(sc);
1176 error = lagg_ether_setmulti(sc);
1177 LAGG_WUNLOCK(sc);
1178 break;
1179 case SIOCSIFMEDIA:
1180 case SIOCGIFMEDIA:
1181 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1182 break;
1183
1184 case SIOCSIFCAP:
1185 case SIOCSIFMTU:
1186 /* Do not allow the MTU or caps to be directly changed */
1187 error = EINVAL;
1188 break;
1189
1190 default:
1191 error = ether_ioctl(ifp, cmd, data);
1192 break;
1193 }
1194 return (error);
1195}
1196
1197static int
1198lagg_ether_setmulti(struct lagg_softc *sc)
1199{
1200 struct lagg_port *lp;
1201
1202 LAGG_WLOCK_ASSERT(sc);
1203
1204 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1205 /* First, remove any existing filter entries. */
1206 lagg_ether_cmdmulti(lp, 0);
1207 /* copy all addresses from the lagg interface to the port */
1208 lagg_ether_cmdmulti(lp, 1);
1209 }
1210 return (0);
1211}
1212
1213static int
1214lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1215{
1216 struct lagg_softc *sc = lp->lp_softc;
1217 struct ifnet *ifp = lp->lp_ifp;
1218 struct ifnet *scifp = sc->sc_ifp;
1219 struct lagg_mc *mc;
1220 struct ifmultiaddr *ifma, *rifma = NULL;
1221 struct sockaddr_dl sdl;
1220 struct ifmultiaddr *ifma;
1222 int error;
1223
1224 LAGG_WLOCK_ASSERT(sc);
1225
1221 int error;
1222
1223 LAGG_WLOCK_ASSERT(sc);
1224
1226 link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
1227 sdl.sdl_alen = ETHER_ADDR_LEN;
1228
1229 if (set) {
1225 if (set) {
1226 IF_ADDR_WLOCK(scifp);
1230 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1231 if (ifma->ifma_addr->sa_family != AF_LINK)
1232 continue;
1227 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1228 if (ifma->ifma_addr->sa_family != AF_LINK)
1229 continue;
1233 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1234 LLADDR(&sdl), ETHER_ADDR_LEN);
1235
1236 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
1237 if (error)
1238 return (error);
1239 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1230 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1240 if (mc == NULL)
1231 if (mc == NULL) {
1232 IF_ADDR_WUNLOCK(scifp);
1241 return (ENOMEM);
1233 return (ENOMEM);
1242 mc->mc_ifma = rifma;
1234 }
1235 bcopy(ifma->ifma_addr, &mc->mc_addr,
1236 ifma->ifma_addr->sa_len);
1237 mc->mc_addr.sdl_index = ifp->if_index;
1238 mc->mc_ifma = NULL;
1243 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1244 }
1239 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1240 }
1241 IF_ADDR_WUNLOCK(scifp);
1242 SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
1243 error = if_addmulti(ifp,
1244 (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
1245 if (error)
1246 return (error);
1247 }
1245 } else {
1246 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1247 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1248 } else {
1249 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1250 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1248 if_delmulti_ifma(mc->mc_ifma);
1251 if (mc->mc_ifma && !lp->lp_detaching)
1252 if_delmulti_ifma(mc->mc_ifma);
1249 free(mc, M_DEVBUF);
1250 }
1251 }
1252 return (0);
1253}
1254
1255/* Handle a ref counted flag that should be set on the lagg port as well */
1256static int
1257lagg_setflag(struct lagg_port *lp, int flag, int status,
1258 int (*func)(struct ifnet *, int))
1259{
1260 struct lagg_softc *sc = lp->lp_softc;
1261 struct ifnet *scifp = sc->sc_ifp;
1262 struct ifnet *ifp = lp->lp_ifp;
1263 int error;
1264
1265 LAGG_WLOCK_ASSERT(sc);
1266
1267 status = status ? (scifp->if_flags & flag) : 0;
1268 /* Now "status" contains the flag value or 0 */
1269
1270 /*
1271 * See if recorded ports status is different from what
1272 * we want it to be. If it is, flip it. We record ports
1273 * status in lp_ifflags so that we won't clear ports flag
1274 * we haven't set. In fact, we don't clear or set ports
1275 * flags directly, but get or release references to them.
1276 * That's why we can be sure that recorded flags still are
1277 * in accord with actual ports flags.
1278 */
1279 if (status != (lp->lp_ifflags & flag)) {
1280 error = (*func)(ifp, status);
1281 if (error)
1282 return (error);
1283 lp->lp_ifflags &= ~flag;
1284 lp->lp_ifflags |= status;
1285 }
1286 return (0);
1287}
1288
1289/*
1290 * Handle IFF_* flags that require certain changes on the lagg port
1291 * if "status" is true, update ports flags respective to the lagg
1292 * if "status" is false, forcedly clear the flags set on port.
1293 */
1294static int
1295lagg_setflags(struct lagg_port *lp, int status)
1296{
1297 int error, i;
1298
1299 for (i = 0; lagg_pflags[i].flag; i++) {
1300 error = lagg_setflag(lp, lagg_pflags[i].flag,
1301 status, lagg_pflags[i].func);
1302 if (error)
1303 return (error);
1304 }
1305 return (0);
1306}
1307
1308static int
1309lagg_transmit(struct ifnet *ifp, struct mbuf *m)
1310{
1311 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1312 int error, len, mcast;
1313 struct rm_priotracker tracker;
1314
1315 len = m->m_pkthdr.len;
1316 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1317
1318 LAGG_RLOCK(sc, &tracker);
1319 /* We need a Tx algorithm and at least one port */
1320 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1321 LAGG_RUNLOCK(sc, &tracker);
1322 m_freem(m);
1323 ifp->if_oerrors++;
1324 return (ENXIO);
1325 }
1326
1327 ETHER_BPF_MTAP(ifp, m);
1328
1329 error = (*sc->sc_start)(sc, m);
1330 LAGG_RUNLOCK(sc, &tracker);
1331
1332 if (error == 0) {
1333 counter_u64_add(sc->sc_opackets, 1);
1334 counter_u64_add(sc->sc_obytes, len);
1335 ifp->if_omcasts += mcast;
1336 } else
1337 ifp->if_oerrors++;
1338
1339 return (error);
1340}
1341
1342/*
1343 * The ifp->if_qflush entry point for lagg(4) is no-op.
1344 */
1345static void
1346lagg_qflush(struct ifnet *ifp __unused)
1347{
1348}
1349
1350static struct mbuf *
1351lagg_input(struct ifnet *ifp, struct mbuf *m)
1352{
1353 struct lagg_port *lp = ifp->if_lagg;
1354 struct lagg_softc *sc = lp->lp_softc;
1355 struct ifnet *scifp = sc->sc_ifp;
1356 struct rm_priotracker tracker;
1357
1358 LAGG_RLOCK(sc, &tracker);
1359 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1360 (lp->lp_flags & LAGG_PORT_DISABLED) ||
1361 sc->sc_proto == LAGG_PROTO_NONE) {
1362 LAGG_RUNLOCK(sc, &tracker);
1363 m_freem(m);
1364 return (NULL);
1365 }
1366
1367 ETHER_BPF_MTAP(scifp, m);
1368
1369 m = (*sc->sc_input)(sc, lp, m);
1370
1371 if (m != NULL) {
1372 counter_u64_add(sc->sc_ipackets, 1);
1373 counter_u64_add(sc->sc_ibytes, m->m_pkthdr.len);
1374
1375 if (scifp->if_flags & IFF_MONITOR) {
1376 m_freem(m);
1377 m = NULL;
1378 }
1379 }
1380
1381 LAGG_RUNLOCK(sc, &tracker);
1382 return (m);
1383}
1384
1385static int
1386lagg_media_change(struct ifnet *ifp)
1387{
1388 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1389
1390 if (sc->sc_ifflags & IFF_DEBUG)
1391 printf("%s\n", __func__);
1392
1393 /* Ignore */
1394 return (0);
1395}
1396
1397static void
1398lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1399{
1400 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1401 struct lagg_port *lp;
1402 struct rm_priotracker tracker;
1403
1404 imr->ifm_status = IFM_AVALID;
1405 imr->ifm_active = IFM_ETHER | IFM_AUTO;
1406
1407 LAGG_RLOCK(sc, &tracker);
1408 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1409 if (LAGG_PORTACTIVE(lp))
1410 imr->ifm_status |= IFM_ACTIVE;
1411 }
1412 LAGG_RUNLOCK(sc, &tracker);
1413}
1414
1415static void
1416lagg_linkstate(struct lagg_softc *sc)
1417{
1418 struct lagg_port *lp;
1419 int new_link = LINK_STATE_DOWN;
1420 uint64_t speed;
1421
1422 /* Our link is considered up if at least one of our ports is active */
1423 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1424 if (lp->lp_link_state == LINK_STATE_UP) {
1425 new_link = LINK_STATE_UP;
1426 break;
1427 }
1428 }
1429 if_link_state_change(sc->sc_ifp, new_link);
1430
1431 /* Update if_baudrate to reflect the max possible speed */
1432 switch (sc->sc_proto) {
1433 case LAGG_PROTO_FAILOVER:
1434 sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1435 sc->sc_primary->lp_ifp->if_baudrate : 0;
1436 break;
1437 case LAGG_PROTO_ROUNDROBIN:
1438 case LAGG_PROTO_LOADBALANCE:
1439 case LAGG_PROTO_ETHERCHANNEL:
1440 speed = 0;
1441 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1442 speed += lp->lp_ifp->if_baudrate;
1443 sc->sc_ifp->if_baudrate = speed;
1444 break;
1445 case LAGG_PROTO_LACP:
1446 /* LACP updates if_baudrate itself */
1447 break;
1448 }
1449}
1450
1451static void
1452lagg_port_state(struct ifnet *ifp, int state)
1453{
1454 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1455 struct lagg_softc *sc = NULL;
1456
1457 if (lp != NULL)
1458 sc = lp->lp_softc;
1459 if (sc == NULL)
1460 return;
1461
1462 LAGG_WLOCK(sc);
1463 lagg_linkstate(sc);
1464 if (sc->sc_linkstate != NULL)
1465 (*sc->sc_linkstate)(lp);
1466 LAGG_WUNLOCK(sc);
1467}
1468
1469struct lagg_port *
1470lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1471{
1472 struct lagg_port *lp_next, *rval = NULL;
1473 // int new_link = LINK_STATE_DOWN;
1474
1475 LAGG_RLOCK_ASSERT(sc);
1476 /*
1477 * Search a port which reports an active link state.
1478 */
1479
1480 if (lp == NULL)
1481 goto search;
1482 if (LAGG_PORTACTIVE(lp)) {
1483 rval = lp;
1484 goto found;
1485 }
1486 if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1487 LAGG_PORTACTIVE(lp_next)) {
1488 rval = lp_next;
1489 goto found;
1490 }
1491
1492search:
1493 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1494 if (LAGG_PORTACTIVE(lp_next)) {
1495 rval = lp_next;
1496 goto found;
1497 }
1498 }
1499
1500found:
1501 if (rval != NULL) {
1502 /*
1503 * The IEEE 802.1D standard assumes that a lagg with
1504 * multiple ports is always full duplex. This is valid
1505 * for load sharing laggs and if at least two links
1506 * are active. Unfortunately, checking the latter would
1507 * be too expensive at this point.
1508 XXX
1509 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1510 (sc->sc_count > 1))
1511 new_link = LINK_STATE_FULL_DUPLEX;
1512 else
1513 new_link = rval->lp_link_state;
1514 */
1515 }
1516
1517 return (rval);
1518}
1519
1520static const void *
1521lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1522{
1523 if (m->m_pkthdr.len < (off + len)) {
1524 return (NULL);
1525 } else if (m->m_len < (off + len)) {
1526 m_copydata(m, off, len, buf);
1527 return (buf);
1528 }
1529 return (mtod(m, char *) + off);
1530}
1531
1532static int
1533lagg_sysctl_active(SYSCTL_HANDLER_ARGS)
1534{
1535 struct lagg_softc *sc = (struct lagg_softc *)arg1;
1536 struct lagg_port *lp;
1537 int error;
1538
1539 /* LACP tracks active links automatically, the others do not */
1540 if (sc->sc_proto != LAGG_PROTO_LACP) {
1541 sc->sc_active = 0;
1542 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1543 sc->sc_active += LAGG_PORTACTIVE(lp);
1544 }
1545
1546 error = sysctl_handle_int(oidp, &sc->sc_active, 0, req);
1547 if ((error) || (req->newptr == NULL))
1548 return (error);
1549
1550 return (0);
1551}
1552
1553uint32_t
1554lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
1555{
1556 uint16_t etype;
1557 uint32_t p = key;
1558 int off;
1559 struct ether_header *eh;
1560 const struct ether_vlan_header *vlan;
1561#ifdef INET
1562 const struct ip *ip;
1563 const uint32_t *ports;
1564 int iphlen;
1565#endif
1566#ifdef INET6
1567 const struct ip6_hdr *ip6;
1568 uint32_t flow;
1569#endif
1570 union {
1571#ifdef INET
1572 struct ip ip;
1573#endif
1574#ifdef INET6
1575 struct ip6_hdr ip6;
1576#endif
1577 struct ether_vlan_header vlan;
1578 uint32_t port;
1579 } buf;
1580
1581
1582 off = sizeof(*eh);
1583 if (m->m_len < off)
1584 goto out;
1585 eh = mtod(m, struct ether_header *);
1586 etype = ntohs(eh->ether_type);
1587 if (sc->sc_flags & LAGG_F_HASHL2) {
1588 p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
1589 p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1590 }
1591
1592 /* Special handling for encapsulating VLAN frames */
1593 if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
1594 p = hash32_buf(&m->m_pkthdr.ether_vtag,
1595 sizeof(m->m_pkthdr.ether_vtag), p);
1596 } else if (etype == ETHERTYPE_VLAN) {
1597 vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
1598 if (vlan == NULL)
1599 goto out;
1600
1601 if (sc->sc_flags & LAGG_F_HASHL2)
1602 p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1603 etype = ntohs(vlan->evl_proto);
1604 off += sizeof(*vlan) - sizeof(*eh);
1605 }
1606
1607 switch (etype) {
1608#ifdef INET
1609 case ETHERTYPE_IP:
1610 ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
1611 if (ip == NULL)
1612 goto out;
1613
1614 if (sc->sc_flags & LAGG_F_HASHL3) {
1615 p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1616 p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1617 }
1618 if (!(sc->sc_flags & LAGG_F_HASHL4))
1619 break;
1620 switch (ip->ip_p) {
1621 case IPPROTO_TCP:
1622 case IPPROTO_UDP:
1623 case IPPROTO_SCTP:
1624 iphlen = ip->ip_hl << 2;
1625 if (iphlen < sizeof(*ip))
1626 break;
1627 off += iphlen;
1628 ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
1629 if (ports == NULL)
1630 break;
1631 p = hash32_buf(ports, sizeof(*ports), p);
1632 break;
1633 }
1634 break;
1635#endif
1636#ifdef INET6
1637 case ETHERTYPE_IPV6:
1638 if (!(sc->sc_flags & LAGG_F_HASHL3))
1639 break;
1640 ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
1641 if (ip6 == NULL)
1642 goto out;
1643
1644 p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1645 p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1646 flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1647 p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
1648 break;
1649#endif
1650 }
1651out:
1652 return (p);
1653}
1654
1655int
1656lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1657{
1658
1659 return (ifp->if_transmit)(ifp, m);
1660}
1661
1662/*
1663 * Simple round robin aggregation
1664 */
1665
1666static int
1667lagg_rr_attach(struct lagg_softc *sc)
1668{
1669 sc->sc_detach = lagg_rr_detach;
1670 sc->sc_start = lagg_rr_start;
1671 sc->sc_input = lagg_rr_input;
1672 sc->sc_port_create = NULL;
1673 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1674 sc->sc_seq = 0;
1675
1676 return (0);
1677}
1678
1679static int
1680lagg_rr_detach(struct lagg_softc *sc)
1681{
1682 return (0);
1683}
1684
1685static int
1686lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1687{
1688 struct lagg_port *lp;
1689 uint32_t p;
1690
1691 p = atomic_fetchadd_32(&sc->sc_seq, 1);
1692 p %= sc->sc_count;
1693 lp = SLIST_FIRST(&sc->sc_ports);
1694 while (p--)
1695 lp = SLIST_NEXT(lp, lp_entries);
1696
1697 /*
1698 * Check the port's link state. This will return the next active
1699 * port if the link is down or the port is NULL.
1700 */
1701 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1702 m_freem(m);
1703 return (ENETDOWN);
1704 }
1705
1706 /* Send mbuf */
1707 return (lagg_enqueue(lp->lp_ifp, m));
1708}
1709
1710static struct mbuf *
1711lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1712{
1713 struct ifnet *ifp = sc->sc_ifp;
1714
1715 /* Just pass in the packet to our lagg device */
1716 m->m_pkthdr.rcvif = ifp;
1717
1718 return (m);
1719}
1720
1721/*
1722 * Active failover
1723 */
1724
1725static int
1726lagg_fail_attach(struct lagg_softc *sc)
1727{
1728 sc->sc_detach = lagg_fail_detach;
1729 sc->sc_start = lagg_fail_start;
1730 sc->sc_input = lagg_fail_input;
1731 sc->sc_port_create = NULL;
1732 sc->sc_port_destroy = NULL;
1733
1734 return (0);
1735}
1736
1737static int
1738lagg_fail_detach(struct lagg_softc *sc)
1739{
1740 return (0);
1741}
1742
1743static int
1744lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1745{
1746 struct lagg_port *lp;
1747
1748 /* Use the master port if active or the next available port */
1749 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1750 m_freem(m);
1751 return (ENETDOWN);
1752 }
1753
1754 /* Send mbuf */
1755 return (lagg_enqueue(lp->lp_ifp, m));
1756}
1757
1758static struct mbuf *
1759lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1760{
1761 struct ifnet *ifp = sc->sc_ifp;
1762 struct lagg_port *tmp_tp;
1763
1764 if (lp == sc->sc_primary || lagg_failover_rx_all) {
1765 m->m_pkthdr.rcvif = ifp;
1766 return (m);
1767 }
1768
1769 if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1770 tmp_tp = lagg_link_active(sc, sc->sc_primary);
1771 /*
1772 * If tmp_tp is null, we've recieved a packet when all
1773 * our links are down. Weird, but process it anyways.
1774 */
1775 if ((tmp_tp == NULL || tmp_tp == lp)) {
1776 m->m_pkthdr.rcvif = ifp;
1777 return (m);
1778 }
1779 }
1780
1781 m_freem(m);
1782 return (NULL);
1783}
1784
1785/*
1786 * Loadbalancing
1787 */
1788
1789static int
1790lagg_lb_attach(struct lagg_softc *sc)
1791{
1792 struct lagg_port *lp;
1793 struct lagg_lb *lb;
1794
1795 if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1796 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1797 return (ENOMEM);
1798
1799 sc->sc_detach = lagg_lb_detach;
1800 sc->sc_start = lagg_lb_start;
1801 sc->sc_input = lagg_lb_input;
1802 sc->sc_port_create = lagg_lb_port_create;
1803 sc->sc_port_destroy = lagg_lb_port_destroy;
1804 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1805
1806 lb->lb_key = arc4random();
1807 sc->sc_psc = (caddr_t)lb;
1808
1809 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1810 lagg_lb_port_create(lp);
1811
1812 return (0);
1813}
1814
1815static int
1816lagg_lb_detach(struct lagg_softc *sc)
1817{
1818 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1819 if (lb != NULL)
1820 free(lb, M_DEVBUF);
1821 return (0);
1822}
1823
1824static int
1825lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1826{
1827 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1828 struct lagg_port *lp_next;
1829 int i = 0;
1830
1831 bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1832 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1833 if (lp_next == lp)
1834 continue;
1835 if (i >= LAGG_MAX_PORTS)
1836 return (EINVAL);
1837 if (sc->sc_ifflags & IFF_DEBUG)
1838 printf("%s: port %s at index %d\n",
1839 sc->sc_ifname, lp_next->lp_ifname, i);
1840 lb->lb_ports[i++] = lp_next;
1841 }
1842
1843 return (0);
1844}
1845
1846static int
1847lagg_lb_port_create(struct lagg_port *lp)
1848{
1849 struct lagg_softc *sc = lp->lp_softc;
1850 return (lagg_lb_porttable(sc, NULL));
1851}
1852
1853static void
1854lagg_lb_port_destroy(struct lagg_port *lp)
1855{
1856 struct lagg_softc *sc = lp->lp_softc;
1857 lagg_lb_porttable(sc, lp);
1858}
1859
1860static int
1861lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1862{
1863 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1864 struct lagg_port *lp = NULL;
1865 uint32_t p = 0;
1866
1867 if (sc->use_flowid && (m->m_flags & M_FLOWID))
1868 p = m->m_pkthdr.flowid >> sc->flowid_shift;
1869 else
1870 p = lagg_hashmbuf(sc, m, lb->lb_key);
1871 p %= sc->sc_count;
1872 lp = lb->lb_ports[p];
1873
1874 /*
1875 * Check the port's link state. This will return the next active
1876 * port if the link is down or the port is NULL.
1877 */
1878 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1879 m_freem(m);
1880 return (ENETDOWN);
1881 }
1882
1883 /* Send mbuf */
1884 return (lagg_enqueue(lp->lp_ifp, m));
1885}
1886
1887static struct mbuf *
1888lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1889{
1890 struct ifnet *ifp = sc->sc_ifp;
1891
1892 /* Just pass in the packet to our lagg device */
1893 m->m_pkthdr.rcvif = ifp;
1894
1895 return (m);
1896}
1897
1898/*
1899 * 802.3ad LACP
1900 */
1901
1902static int
1903lagg_lacp_attach(struct lagg_softc *sc)
1904{
1905 struct lagg_port *lp;
1906 int error;
1907
1908 sc->sc_detach = lagg_lacp_detach;
1909 sc->sc_port_create = lacp_port_create;
1910 sc->sc_port_destroy = lacp_port_destroy;
1911 sc->sc_linkstate = lacp_linkstate;
1912 sc->sc_start = lagg_lacp_start;
1913 sc->sc_input = lagg_lacp_input;
1914 sc->sc_init = lacp_init;
1915 sc->sc_stop = lacp_stop;
1916 sc->sc_lladdr = lagg_lacp_lladdr;
1917 sc->sc_req = lacp_req;
1918 sc->sc_portreq = lacp_portreq;
1919
1920 error = lacp_attach(sc);
1921 if (error)
1922 return (error);
1923
1924 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1925 lacp_port_create(lp);
1926
1927 return (error);
1928}
1929
1930static int
1931lagg_lacp_detach(struct lagg_softc *sc)
1932{
1933 struct lagg_port *lp;
1934 int error;
1935
1936 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1937 lacp_port_destroy(lp);
1938
1939 /* unlocking is safe here */
1940 LAGG_WUNLOCK(sc);
1941 error = lacp_detach(sc);
1942 LAGG_WLOCK(sc);
1943
1944 return (error);
1945}
1946
1947static void
1948lagg_lacp_lladdr(struct lagg_softc *sc)
1949{
1950 struct lagg_port *lp;
1951
1952 /* purge all the lacp ports */
1953 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1954 lacp_port_destroy(lp);
1955
1956 /* add them back in */
1957 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1958 lacp_port_create(lp);
1959}
1960
1961static int
1962lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1963{
1964 struct lagg_port *lp;
1965
1966 lp = lacp_select_tx_port(sc, m);
1967 if (lp == NULL) {
1968 m_freem(m);
1969 return (ENETDOWN);
1970 }
1971
1972 /* Send mbuf */
1973 return (lagg_enqueue(lp->lp_ifp, m));
1974}
1975
1976static struct mbuf *
1977lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1978{
1979 struct ifnet *ifp = sc->sc_ifp;
1980 struct ether_header *eh;
1981 u_short etype;
1982
1983 eh = mtod(m, struct ether_header *);
1984 etype = ntohs(eh->ether_type);
1985
1986 /* Tap off LACP control messages */
1987 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
1988 m = lacp_input(lp, m);
1989 if (m == NULL)
1990 return (NULL);
1991 }
1992
1993 /*
1994 * If the port is not collecting or not in the active aggregator then
1995 * free and return.
1996 */
1997 if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
1998 m_freem(m);
1999 return (NULL);
2000 }
2001
2002 m->m_pkthdr.rcvif = ifp;
2003 return (m);
2004}
2005
2006static void
2007lagg_callout(void *arg)
2008{
2009 struct lagg_softc *sc = (struct lagg_softc *)arg;
2010 struct ifnet *ifp = sc->sc_ifp;
2011
2012 ifp->if_ipackets = counter_u64_fetch(sc->sc_ipackets);
2013 ifp->if_opackets = counter_u64_fetch(sc->sc_opackets);
2014 ifp->if_ibytes = counter_u64_fetch(sc->sc_ibytes);
2015 ifp->if_obytes = counter_u64_fetch(sc->sc_obytes);
2016
2017 callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
2018}
1253 free(mc, M_DEVBUF);
1254 }
1255 }
1256 return (0);
1257}
1258
1259/* Handle a ref counted flag that should be set on the lagg port as well */
1260static int
1261lagg_setflag(struct lagg_port *lp, int flag, int status,
1262 int (*func)(struct ifnet *, int))
1263{
1264 struct lagg_softc *sc = lp->lp_softc;
1265 struct ifnet *scifp = sc->sc_ifp;
1266 struct ifnet *ifp = lp->lp_ifp;
1267 int error;
1268
1269 LAGG_WLOCK_ASSERT(sc);
1270
1271 status = status ? (scifp->if_flags & flag) : 0;
1272 /* Now "status" contains the flag value or 0 */
1273
1274 /*
1275 * See if recorded ports status is different from what
1276 * we want it to be. If it is, flip it. We record ports
1277 * status in lp_ifflags so that we won't clear ports flag
1278 * we haven't set. In fact, we don't clear or set ports
1279 * flags directly, but get or release references to them.
1280 * That's why we can be sure that recorded flags still are
1281 * in accord with actual ports flags.
1282 */
1283 if (status != (lp->lp_ifflags & flag)) {
1284 error = (*func)(ifp, status);
1285 if (error)
1286 return (error);
1287 lp->lp_ifflags &= ~flag;
1288 lp->lp_ifflags |= status;
1289 }
1290 return (0);
1291}
1292
1293/*
1294 * Handle IFF_* flags that require certain changes on the lagg port
1295 * if "status" is true, update ports flags respective to the lagg
1296 * if "status" is false, forcedly clear the flags set on port.
1297 */
1298static int
1299lagg_setflags(struct lagg_port *lp, int status)
1300{
1301 int error, i;
1302
1303 for (i = 0; lagg_pflags[i].flag; i++) {
1304 error = lagg_setflag(lp, lagg_pflags[i].flag,
1305 status, lagg_pflags[i].func);
1306 if (error)
1307 return (error);
1308 }
1309 return (0);
1310}
1311
1312static int
1313lagg_transmit(struct ifnet *ifp, struct mbuf *m)
1314{
1315 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1316 int error, len, mcast;
1317 struct rm_priotracker tracker;
1318
1319 len = m->m_pkthdr.len;
1320 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1321
1322 LAGG_RLOCK(sc, &tracker);
1323 /* We need a Tx algorithm and at least one port */
1324 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1325 LAGG_RUNLOCK(sc, &tracker);
1326 m_freem(m);
1327 ifp->if_oerrors++;
1328 return (ENXIO);
1329 }
1330
1331 ETHER_BPF_MTAP(ifp, m);
1332
1333 error = (*sc->sc_start)(sc, m);
1334 LAGG_RUNLOCK(sc, &tracker);
1335
1336 if (error == 0) {
1337 counter_u64_add(sc->sc_opackets, 1);
1338 counter_u64_add(sc->sc_obytes, len);
1339 ifp->if_omcasts += mcast;
1340 } else
1341 ifp->if_oerrors++;
1342
1343 return (error);
1344}
1345
1346/*
1347 * The ifp->if_qflush entry point for lagg(4) is no-op.
1348 */
1349static void
1350lagg_qflush(struct ifnet *ifp __unused)
1351{
1352}
1353
1354static struct mbuf *
1355lagg_input(struct ifnet *ifp, struct mbuf *m)
1356{
1357 struct lagg_port *lp = ifp->if_lagg;
1358 struct lagg_softc *sc = lp->lp_softc;
1359 struct ifnet *scifp = sc->sc_ifp;
1360 struct rm_priotracker tracker;
1361
1362 LAGG_RLOCK(sc, &tracker);
1363 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1364 (lp->lp_flags & LAGG_PORT_DISABLED) ||
1365 sc->sc_proto == LAGG_PROTO_NONE) {
1366 LAGG_RUNLOCK(sc, &tracker);
1367 m_freem(m);
1368 return (NULL);
1369 }
1370
1371 ETHER_BPF_MTAP(scifp, m);
1372
1373 m = (*sc->sc_input)(sc, lp, m);
1374
1375 if (m != NULL) {
1376 counter_u64_add(sc->sc_ipackets, 1);
1377 counter_u64_add(sc->sc_ibytes, m->m_pkthdr.len);
1378
1379 if (scifp->if_flags & IFF_MONITOR) {
1380 m_freem(m);
1381 m = NULL;
1382 }
1383 }
1384
1385 LAGG_RUNLOCK(sc, &tracker);
1386 return (m);
1387}
1388
1389static int
1390lagg_media_change(struct ifnet *ifp)
1391{
1392 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1393
1394 if (sc->sc_ifflags & IFF_DEBUG)
1395 printf("%s\n", __func__);
1396
1397 /* Ignore */
1398 return (0);
1399}
1400
1401static void
1402lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1403{
1404 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1405 struct lagg_port *lp;
1406 struct rm_priotracker tracker;
1407
1408 imr->ifm_status = IFM_AVALID;
1409 imr->ifm_active = IFM_ETHER | IFM_AUTO;
1410
1411 LAGG_RLOCK(sc, &tracker);
1412 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1413 if (LAGG_PORTACTIVE(lp))
1414 imr->ifm_status |= IFM_ACTIVE;
1415 }
1416 LAGG_RUNLOCK(sc, &tracker);
1417}
1418
1419static void
1420lagg_linkstate(struct lagg_softc *sc)
1421{
1422 struct lagg_port *lp;
1423 int new_link = LINK_STATE_DOWN;
1424 uint64_t speed;
1425
1426 /* Our link is considered up if at least one of our ports is active */
1427 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1428 if (lp->lp_link_state == LINK_STATE_UP) {
1429 new_link = LINK_STATE_UP;
1430 break;
1431 }
1432 }
1433 if_link_state_change(sc->sc_ifp, new_link);
1434
1435 /* Update if_baudrate to reflect the max possible speed */
1436 switch (sc->sc_proto) {
1437 case LAGG_PROTO_FAILOVER:
1438 sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1439 sc->sc_primary->lp_ifp->if_baudrate : 0;
1440 break;
1441 case LAGG_PROTO_ROUNDROBIN:
1442 case LAGG_PROTO_LOADBALANCE:
1443 case LAGG_PROTO_ETHERCHANNEL:
1444 speed = 0;
1445 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1446 speed += lp->lp_ifp->if_baudrate;
1447 sc->sc_ifp->if_baudrate = speed;
1448 break;
1449 case LAGG_PROTO_LACP:
1450 /* LACP updates if_baudrate itself */
1451 break;
1452 }
1453}
1454
1455static void
1456lagg_port_state(struct ifnet *ifp, int state)
1457{
1458 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1459 struct lagg_softc *sc = NULL;
1460
1461 if (lp != NULL)
1462 sc = lp->lp_softc;
1463 if (sc == NULL)
1464 return;
1465
1466 LAGG_WLOCK(sc);
1467 lagg_linkstate(sc);
1468 if (sc->sc_linkstate != NULL)
1469 (*sc->sc_linkstate)(lp);
1470 LAGG_WUNLOCK(sc);
1471}
1472
1473struct lagg_port *
1474lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1475{
1476 struct lagg_port *lp_next, *rval = NULL;
1477 // int new_link = LINK_STATE_DOWN;
1478
1479 LAGG_RLOCK_ASSERT(sc);
1480 /*
1481 * Search a port which reports an active link state.
1482 */
1483
1484 if (lp == NULL)
1485 goto search;
1486 if (LAGG_PORTACTIVE(lp)) {
1487 rval = lp;
1488 goto found;
1489 }
1490 if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1491 LAGG_PORTACTIVE(lp_next)) {
1492 rval = lp_next;
1493 goto found;
1494 }
1495
1496search:
1497 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1498 if (LAGG_PORTACTIVE(lp_next)) {
1499 rval = lp_next;
1500 goto found;
1501 }
1502 }
1503
1504found:
1505 if (rval != NULL) {
1506 /*
1507 * The IEEE 802.1D standard assumes that a lagg with
1508 * multiple ports is always full duplex. This is valid
1509 * for load sharing laggs and if at least two links
1510 * are active. Unfortunately, checking the latter would
1511 * be too expensive at this point.
1512 XXX
1513 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1514 (sc->sc_count > 1))
1515 new_link = LINK_STATE_FULL_DUPLEX;
1516 else
1517 new_link = rval->lp_link_state;
1518 */
1519 }
1520
1521 return (rval);
1522}
1523
1524static const void *
1525lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1526{
1527 if (m->m_pkthdr.len < (off + len)) {
1528 return (NULL);
1529 } else if (m->m_len < (off + len)) {
1530 m_copydata(m, off, len, buf);
1531 return (buf);
1532 }
1533 return (mtod(m, char *) + off);
1534}
1535
1536static int
1537lagg_sysctl_active(SYSCTL_HANDLER_ARGS)
1538{
1539 struct lagg_softc *sc = (struct lagg_softc *)arg1;
1540 struct lagg_port *lp;
1541 int error;
1542
1543 /* LACP tracks active links automatically, the others do not */
1544 if (sc->sc_proto != LAGG_PROTO_LACP) {
1545 sc->sc_active = 0;
1546 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1547 sc->sc_active += LAGG_PORTACTIVE(lp);
1548 }
1549
1550 error = sysctl_handle_int(oidp, &sc->sc_active, 0, req);
1551 if ((error) || (req->newptr == NULL))
1552 return (error);
1553
1554 return (0);
1555}
1556
1557uint32_t
1558lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
1559{
1560 uint16_t etype;
1561 uint32_t p = key;
1562 int off;
1563 struct ether_header *eh;
1564 const struct ether_vlan_header *vlan;
1565#ifdef INET
1566 const struct ip *ip;
1567 const uint32_t *ports;
1568 int iphlen;
1569#endif
1570#ifdef INET6
1571 const struct ip6_hdr *ip6;
1572 uint32_t flow;
1573#endif
1574 union {
1575#ifdef INET
1576 struct ip ip;
1577#endif
1578#ifdef INET6
1579 struct ip6_hdr ip6;
1580#endif
1581 struct ether_vlan_header vlan;
1582 uint32_t port;
1583 } buf;
1584
1585
1586 off = sizeof(*eh);
1587 if (m->m_len < off)
1588 goto out;
1589 eh = mtod(m, struct ether_header *);
1590 etype = ntohs(eh->ether_type);
1591 if (sc->sc_flags & LAGG_F_HASHL2) {
1592 p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
1593 p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1594 }
1595
1596 /* Special handling for encapsulating VLAN frames */
1597 if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
1598 p = hash32_buf(&m->m_pkthdr.ether_vtag,
1599 sizeof(m->m_pkthdr.ether_vtag), p);
1600 } else if (etype == ETHERTYPE_VLAN) {
1601 vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
1602 if (vlan == NULL)
1603 goto out;
1604
1605 if (sc->sc_flags & LAGG_F_HASHL2)
1606 p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1607 etype = ntohs(vlan->evl_proto);
1608 off += sizeof(*vlan) - sizeof(*eh);
1609 }
1610
1611 switch (etype) {
1612#ifdef INET
1613 case ETHERTYPE_IP:
1614 ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
1615 if (ip == NULL)
1616 goto out;
1617
1618 if (sc->sc_flags & LAGG_F_HASHL3) {
1619 p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1620 p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1621 }
1622 if (!(sc->sc_flags & LAGG_F_HASHL4))
1623 break;
1624 switch (ip->ip_p) {
1625 case IPPROTO_TCP:
1626 case IPPROTO_UDP:
1627 case IPPROTO_SCTP:
1628 iphlen = ip->ip_hl << 2;
1629 if (iphlen < sizeof(*ip))
1630 break;
1631 off += iphlen;
1632 ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
1633 if (ports == NULL)
1634 break;
1635 p = hash32_buf(ports, sizeof(*ports), p);
1636 break;
1637 }
1638 break;
1639#endif
1640#ifdef INET6
1641 case ETHERTYPE_IPV6:
1642 if (!(sc->sc_flags & LAGG_F_HASHL3))
1643 break;
1644 ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
1645 if (ip6 == NULL)
1646 goto out;
1647
1648 p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1649 p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1650 flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1651 p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
1652 break;
1653#endif
1654 }
1655out:
1656 return (p);
1657}
1658
1659int
1660lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1661{
1662
1663 return (ifp->if_transmit)(ifp, m);
1664}
1665
1666/*
1667 * Simple round robin aggregation
1668 */
1669
1670static int
1671lagg_rr_attach(struct lagg_softc *sc)
1672{
1673 sc->sc_detach = lagg_rr_detach;
1674 sc->sc_start = lagg_rr_start;
1675 sc->sc_input = lagg_rr_input;
1676 sc->sc_port_create = NULL;
1677 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1678 sc->sc_seq = 0;
1679
1680 return (0);
1681}
1682
1683static int
1684lagg_rr_detach(struct lagg_softc *sc)
1685{
1686 return (0);
1687}
1688
1689static int
1690lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1691{
1692 struct lagg_port *lp;
1693 uint32_t p;
1694
1695 p = atomic_fetchadd_32(&sc->sc_seq, 1);
1696 p %= sc->sc_count;
1697 lp = SLIST_FIRST(&sc->sc_ports);
1698 while (p--)
1699 lp = SLIST_NEXT(lp, lp_entries);
1700
1701 /*
1702 * Check the port's link state. This will return the next active
1703 * port if the link is down or the port is NULL.
1704 */
1705 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1706 m_freem(m);
1707 return (ENETDOWN);
1708 }
1709
1710 /* Send mbuf */
1711 return (lagg_enqueue(lp->lp_ifp, m));
1712}
1713
1714static struct mbuf *
1715lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1716{
1717 struct ifnet *ifp = sc->sc_ifp;
1718
1719 /* Just pass in the packet to our lagg device */
1720 m->m_pkthdr.rcvif = ifp;
1721
1722 return (m);
1723}
1724
1725/*
1726 * Active failover
1727 */
1728
1729static int
1730lagg_fail_attach(struct lagg_softc *sc)
1731{
1732 sc->sc_detach = lagg_fail_detach;
1733 sc->sc_start = lagg_fail_start;
1734 sc->sc_input = lagg_fail_input;
1735 sc->sc_port_create = NULL;
1736 sc->sc_port_destroy = NULL;
1737
1738 return (0);
1739}
1740
1741static int
1742lagg_fail_detach(struct lagg_softc *sc)
1743{
1744 return (0);
1745}
1746
1747static int
1748lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1749{
1750 struct lagg_port *lp;
1751
1752 /* Use the master port if active or the next available port */
1753 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1754 m_freem(m);
1755 return (ENETDOWN);
1756 }
1757
1758 /* Send mbuf */
1759 return (lagg_enqueue(lp->lp_ifp, m));
1760}
1761
1762static struct mbuf *
1763lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1764{
1765 struct ifnet *ifp = sc->sc_ifp;
1766 struct lagg_port *tmp_tp;
1767
1768 if (lp == sc->sc_primary || lagg_failover_rx_all) {
1769 m->m_pkthdr.rcvif = ifp;
1770 return (m);
1771 }
1772
1773 if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1774 tmp_tp = lagg_link_active(sc, sc->sc_primary);
1775 /*
1776 * If tmp_tp is null, we've recieved a packet when all
1777 * our links are down. Weird, but process it anyways.
1778 */
1779 if ((tmp_tp == NULL || tmp_tp == lp)) {
1780 m->m_pkthdr.rcvif = ifp;
1781 return (m);
1782 }
1783 }
1784
1785 m_freem(m);
1786 return (NULL);
1787}
1788
1789/*
1790 * Loadbalancing
1791 */
1792
1793static int
1794lagg_lb_attach(struct lagg_softc *sc)
1795{
1796 struct lagg_port *lp;
1797 struct lagg_lb *lb;
1798
1799 if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1800 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1801 return (ENOMEM);
1802
1803 sc->sc_detach = lagg_lb_detach;
1804 sc->sc_start = lagg_lb_start;
1805 sc->sc_input = lagg_lb_input;
1806 sc->sc_port_create = lagg_lb_port_create;
1807 sc->sc_port_destroy = lagg_lb_port_destroy;
1808 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1809
1810 lb->lb_key = arc4random();
1811 sc->sc_psc = (caddr_t)lb;
1812
1813 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1814 lagg_lb_port_create(lp);
1815
1816 return (0);
1817}
1818
1819static int
1820lagg_lb_detach(struct lagg_softc *sc)
1821{
1822 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1823 if (lb != NULL)
1824 free(lb, M_DEVBUF);
1825 return (0);
1826}
1827
1828static int
1829lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1830{
1831 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1832 struct lagg_port *lp_next;
1833 int i = 0;
1834
1835 bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1836 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1837 if (lp_next == lp)
1838 continue;
1839 if (i >= LAGG_MAX_PORTS)
1840 return (EINVAL);
1841 if (sc->sc_ifflags & IFF_DEBUG)
1842 printf("%s: port %s at index %d\n",
1843 sc->sc_ifname, lp_next->lp_ifname, i);
1844 lb->lb_ports[i++] = lp_next;
1845 }
1846
1847 return (0);
1848}
1849
1850static int
1851lagg_lb_port_create(struct lagg_port *lp)
1852{
1853 struct lagg_softc *sc = lp->lp_softc;
1854 return (lagg_lb_porttable(sc, NULL));
1855}
1856
1857static void
1858lagg_lb_port_destroy(struct lagg_port *lp)
1859{
1860 struct lagg_softc *sc = lp->lp_softc;
1861 lagg_lb_porttable(sc, lp);
1862}
1863
1864static int
1865lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1866{
1867 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1868 struct lagg_port *lp = NULL;
1869 uint32_t p = 0;
1870
1871 if (sc->use_flowid && (m->m_flags & M_FLOWID))
1872 p = m->m_pkthdr.flowid >> sc->flowid_shift;
1873 else
1874 p = lagg_hashmbuf(sc, m, lb->lb_key);
1875 p %= sc->sc_count;
1876 lp = lb->lb_ports[p];
1877
1878 /*
1879 * Check the port's link state. This will return the next active
1880 * port if the link is down or the port is NULL.
1881 */
1882 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1883 m_freem(m);
1884 return (ENETDOWN);
1885 }
1886
1887 /* Send mbuf */
1888 return (lagg_enqueue(lp->lp_ifp, m));
1889}
1890
1891static struct mbuf *
1892lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1893{
1894 struct ifnet *ifp = sc->sc_ifp;
1895
1896 /* Just pass in the packet to our lagg device */
1897 m->m_pkthdr.rcvif = ifp;
1898
1899 return (m);
1900}
1901
1902/*
1903 * 802.3ad LACP
1904 */
1905
1906static int
1907lagg_lacp_attach(struct lagg_softc *sc)
1908{
1909 struct lagg_port *lp;
1910 int error;
1911
1912 sc->sc_detach = lagg_lacp_detach;
1913 sc->sc_port_create = lacp_port_create;
1914 sc->sc_port_destroy = lacp_port_destroy;
1915 sc->sc_linkstate = lacp_linkstate;
1916 sc->sc_start = lagg_lacp_start;
1917 sc->sc_input = lagg_lacp_input;
1918 sc->sc_init = lacp_init;
1919 sc->sc_stop = lacp_stop;
1920 sc->sc_lladdr = lagg_lacp_lladdr;
1921 sc->sc_req = lacp_req;
1922 sc->sc_portreq = lacp_portreq;
1923
1924 error = lacp_attach(sc);
1925 if (error)
1926 return (error);
1927
1928 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1929 lacp_port_create(lp);
1930
1931 return (error);
1932}
1933
1934static int
1935lagg_lacp_detach(struct lagg_softc *sc)
1936{
1937 struct lagg_port *lp;
1938 int error;
1939
1940 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1941 lacp_port_destroy(lp);
1942
1943 /* unlocking is safe here */
1944 LAGG_WUNLOCK(sc);
1945 error = lacp_detach(sc);
1946 LAGG_WLOCK(sc);
1947
1948 return (error);
1949}
1950
1951static void
1952lagg_lacp_lladdr(struct lagg_softc *sc)
1953{
1954 struct lagg_port *lp;
1955
1956 /* purge all the lacp ports */
1957 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1958 lacp_port_destroy(lp);
1959
1960 /* add them back in */
1961 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1962 lacp_port_create(lp);
1963}
1964
1965static int
1966lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1967{
1968 struct lagg_port *lp;
1969
1970 lp = lacp_select_tx_port(sc, m);
1971 if (lp == NULL) {
1972 m_freem(m);
1973 return (ENETDOWN);
1974 }
1975
1976 /* Send mbuf */
1977 return (lagg_enqueue(lp->lp_ifp, m));
1978}
1979
1980static struct mbuf *
1981lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1982{
1983 struct ifnet *ifp = sc->sc_ifp;
1984 struct ether_header *eh;
1985 u_short etype;
1986
1987 eh = mtod(m, struct ether_header *);
1988 etype = ntohs(eh->ether_type);
1989
1990 /* Tap off LACP control messages */
1991 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
1992 m = lacp_input(lp, m);
1993 if (m == NULL)
1994 return (NULL);
1995 }
1996
1997 /*
1998 * If the port is not collecting or not in the active aggregator then
1999 * free and return.
2000 */
2001 if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
2002 m_freem(m);
2003 return (NULL);
2004 }
2005
2006 m->m_pkthdr.rcvif = ifp;
2007 return (m);
2008}
2009
2010static void
2011lagg_callout(void *arg)
2012{
2013 struct lagg_softc *sc = (struct lagg_softc *)arg;
2014 struct ifnet *ifp = sc->sc_ifp;
2015
2016 ifp->if_ipackets = counter_u64_fetch(sc->sc_ipackets);
2017 ifp->if_opackets = counter_u64_fetch(sc->sc_opackets);
2018 ifp->if_ibytes = counter_u64_fetch(sc->sc_ibytes);
2019 ifp->if_obytes = counter_u64_fetch(sc->sc_obytes);
2020
2021 callout_reset(&sc->sc_callout, hz, lagg_callout, sc);
2022}