ng_fec.c revision 162324
1/*
2 * ng_fec.c
3 */
4
5/*-
6 * Copyright (c) 2001 Berkeley Software Design, Inc.
7 * Copyright (c) 2000, 2001
8 *	Bill Paul <wpaul@osd.bsdi.com>.  All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by Bill Paul.
21 * 4. Neither the name of the author nor the names of any co-contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
35 * THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * $FreeBSD: head/sys/netgraph/ng_fec.c 162324 2006-09-15 16:06:27Z ru $
38 */
39/*-
40 * Copyright (c) 1996-1999 Whistle Communications, Inc.
41 * All rights reserved.
42 *
43 * Subject to the following obligations and disclaimer of warranty, use and
44 * redistribution of this software, in source or object code forms, with or
45 * without modifications are expressly permitted by Whistle Communications;
46 * provided, however, that:
47 * 1. Any and all reproductions of the source or object code must include the
48 *    copyright notice above and the following disclaimer of warranties; and
49 * 2. No rights are granted, in any manner or form, to use Whistle
50 *    Communications, Inc. trademarks, including the mark "WHISTLE
51 *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
52 *    such appears in the above copyright notice or in the software.
53 *
54 * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
55 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
56 * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
57 * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
58 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
59 * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
60 * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
61 * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
62 * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
63 * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
64 * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
65 * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
66 * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
67 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
68 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
69 * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
70 * OF SUCH DAMAGE.
71 *
72 * Author: Archie Cobbs <archie@freebsd.org>
73 *
74 * $Whistle: ng_fec.c,v 1.33 1999/11/01 09:24:51 julian Exp $
75 */
76
77/*
78 * This module implements ethernet channel bonding using the Cisco
79 * Fast EtherChannel mechanism. Two or four ports may be combined
80 * into a single aggregate interface.
81 *
82 * Interfaces are named fec0, fec1, etc.  New nodes take the
83 * first available interface name.
84 *
85 * This node also includes Berkeley packet filter support.
86 *
87 * Note that this node doesn't need to connect to any other
88 * netgraph nodes in order to do its work.
89 */
90
91#include <sys/param.h>
92#include <sys/systm.h>
93#include <sys/errno.h>
94#include <sys/kernel.h>
95#include <sys/malloc.h>
96#include <sys/mbuf.h>
97#include <sys/errno.h>
98#include <sys/sockio.h>
99#include <sys/socket.h>
100#include <sys/syslog.h>
101#include <sys/libkern.h>
102#include <sys/queue.h>
103
104#include <net/if.h>
105#include <net/if_dl.h>
106#include <net/if_types.h>
107#include <net/if_media.h>
108#include <net/bpf.h>
109#include <net/ethernet.h>
110
111#include "opt_inet.h"
112#include "opt_inet6.h"
113
114#include <netinet/in.h>
115#ifdef INET
116#include <netinet/in_systm.h>
117#include <netinet/ip.h>
118#endif
119
120#ifdef INET6
121#include <netinet/ip6.h>
122#endif
123
124#include <netgraph/ng_message.h>
125#include <netgraph/netgraph.h>
126#include <netgraph/ng_parse.h>
127#include <netgraph/ng_fec.h>
128
129/*
130 * We need a way to stash a pointer to our netgraph node in the
131 * ifnet structure so that receive handling works. As far as I can
132 * tell, although there is an AF_NETGRAPH address family, it's only
133 * used to identify sockaddr_ng structures: there is no netgraph address
134 * family domain. This means the AF_NETGRAPH entry in ifp->if_afdata
135 * should be unused, so we can use to hold our node context.
136 */
137#define	IFP2NG(ifp)	((ifp)->if_afdata[AF_NETGRAPH])
138
139/*
140 * Current fast etherchannel implementations use either 2 or 4
141 * ports, so for now we limit the maximum bundle size to 4 interfaces.
142 */
143#define FEC_BUNDLESIZ	4
144
145struct ng_fec_portlist {
146	struct ifnet		*fec_if;
147	void			(*fec_if_input) (struct ifnet *,
148						 struct mbuf *);
149	int			fec_idx;
150	int			fec_ifstat;
151	struct ether_addr	fec_mac;
152	TAILQ_ENTRY(ng_fec_portlist) fec_list;
153};
154
155struct ng_fec_bundle {
156	TAILQ_HEAD(,ng_fec_portlist) ng_fec_ports;
157	int			fec_ifcnt;
158	int			fec_btype;
159	int			(*fec_if_output) (struct ifnet *,
160						  struct mbuf *,
161						  struct sockaddr *,
162						  struct rtentry *);
163};
164
165#define FEC_BTYPE_MAC		0x01
166#define FEC_BTYPE_INET		0x02
167#define FEC_BTYPE_INET6		0x03
168
169/* Node private data */
170struct ng_fec_private {
171	struct ifnet *ifp;
172	struct ifmedia ifmedia;
173	int	if_flags;
174	int	if_error;		/* XXX */
175	int	unit;			/* Interface unit number */
176	node_p	node;			/* Our netgraph node */
177	struct ng_fec_bundle fec_bundle;/* Aggregate bundle */
178	struct callout_handle fec_ch;	/* callout handle for ticker */
179};
180typedef struct ng_fec_private *priv_p;
181
182/* Interface methods */
183static void	ng_fec_input(struct ifnet *, struct mbuf *);
184static void	ng_fec_start(struct ifnet *ifp);
185static int	ng_fec_choose_port(struct ng_fec_bundle *b,
186			struct mbuf *m, struct ifnet **ifp);
187static int	ng_fec_setport(struct ifnet *ifp, u_long cmd, caddr_t data);
188static void	ng_fec_init(void *arg);
189static void	ng_fec_stop(struct ifnet *ifp);
190static int	ng_fec_ifmedia_upd(struct ifnet *ifp);
191static void	ng_fec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
192static int	ng_fec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
193static int	ng_fec_output(struct ifnet *ifp, struct mbuf *m0,
194			struct sockaddr *dst, struct rtentry *rt0);
195static void	ng_fec_tick(void *arg);
196static int	ng_fec_addport(struct ng_fec_private *priv, char *iface);
197static int	ng_fec_delport(struct ng_fec_private *priv, char *iface);
198
199#ifdef DEBUG
200static void	ng_fec_print_ioctl(struct ifnet *ifp, int cmd, caddr_t data);
201#endif
202
203/* Netgraph methods */
204static int		ng_fec_mod_event(module_t, int, void *);
205static ng_constructor_t	ng_fec_constructor;
206static ng_rcvmsg_t	ng_fec_rcvmsg;
207static ng_shutdown_t	ng_fec_shutdown;
208
209/* List of commands and how to convert arguments to/from ASCII */
210static const struct ng_cmdlist ng_fec_cmds[] = {
211	{
212	  NGM_FEC_COOKIE,
213	  NGM_FEC_ADD_IFACE,
214	  "add_iface",
215	  &ng_parse_string_type,
216	  NULL,
217	},
218	{
219	  NGM_FEC_COOKIE,
220	  NGM_FEC_DEL_IFACE,
221	  "del_iface",
222	  &ng_parse_string_type,
223	  NULL,
224	},
225	{
226	  NGM_FEC_COOKIE,
227	  NGM_FEC_SET_MODE_MAC,
228	  "set_mode_mac",
229	  NULL,
230	  NULL,
231	},
232	{
233	  NGM_FEC_COOKIE,
234	  NGM_FEC_SET_MODE_INET,
235	  "set_mode_inet",
236	  NULL,
237	  NULL,
238	},
239	{ 0 }
240};
241
242/* Node type descriptor */
243static struct ng_type typestruct = {
244	.version =	NG_ABI_VERSION,
245	.name =		NG_FEC_NODE_TYPE,
246	.mod_event =	ng_fec_mod_event,
247	.constructor =	ng_fec_constructor,
248	.rcvmsg =	ng_fec_rcvmsg,
249	.shutdown =	ng_fec_shutdown,
250	.cmdlist =	ng_fec_cmds,
251};
252NETGRAPH_INIT(fec, &typestruct);
253
254/* We keep a bitmap indicating which unit numbers are free.
255   One means the unit number is free, zero means it's taken. */
256static int	*ng_fec_units = NULL;
257static int	ng_fec_units_len = 0;
258static int	ng_units_in_use = 0;
259
260#define UNITS_BITSPERWORD	(sizeof(*ng_fec_units) * NBBY)
261
262static struct mtx	ng_fec_mtx;
263
264/*
265 * Find the first free unit number for a new interface.
266 * Increase the size of the unit bitmap as necessary.
267 */
268static __inline int
269ng_fec_get_unit(int *unit)
270{
271	int index, bit;
272
273	mtx_lock(&ng_fec_mtx);
274	for (index = 0; index < ng_fec_units_len
275	    && ng_fec_units[index] == 0; index++);
276	if (index == ng_fec_units_len) {		/* extend array */
277		int i, *newarray, newlen;
278
279		newlen = (2 * ng_fec_units_len) + 4;
280		MALLOC(newarray, int *, newlen * sizeof(*ng_fec_units),
281		    M_NETGRAPH, M_NOWAIT);
282		if (newarray == NULL) {
283			mtx_unlock(&ng_fec_mtx);
284			return (ENOMEM);
285		}
286		bcopy(ng_fec_units, newarray,
287		    ng_fec_units_len * sizeof(*ng_fec_units));
288		for (i = ng_fec_units_len; i < newlen; i++)
289			newarray[i] = ~0;
290		if (ng_fec_units != NULL)
291			FREE(ng_fec_units, M_NETGRAPH);
292		ng_fec_units = newarray;
293		ng_fec_units_len = newlen;
294	}
295	bit = ffs(ng_fec_units[index]) - 1;
296	KASSERT(bit >= 0 && bit <= UNITS_BITSPERWORD - 1,
297	    ("%s: word=%d bit=%d", __func__, ng_fec_units[index], bit));
298	ng_fec_units[index] &= ~(1 << bit);
299	*unit = (index * UNITS_BITSPERWORD) + bit;
300	ng_units_in_use++;
301	mtx_unlock(&ng_fec_mtx);
302	return (0);
303}
304
305/*
306 * Free a no longer needed unit number.
307 */
308static __inline void
309ng_fec_free_unit(int unit)
310{
311	int index, bit;
312
313	index = unit / UNITS_BITSPERWORD;
314	bit = unit % UNITS_BITSPERWORD;
315	mtx_lock(&ng_fec_mtx);
316	KASSERT(index < ng_fec_units_len,
317	    ("%s: unit=%d len=%d", __func__, unit, ng_fec_units_len));
318	KASSERT((ng_fec_units[index] & (1 << bit)) == 0,
319	    ("%s: unit=%d is free", __func__, unit));
320	ng_fec_units[index] |= (1 << bit);
321	/*
322	 * XXX We could think about reducing the size of ng_fec_units[]
323	 * XXX here if the last portion is all ones
324	 * XXX At least free it if no more units
325	 * Needed if we are to eventually be able to unload.
326	 */
327	ng_units_in_use--;
328	if (ng_units_in_use == 0) { /* XXX make SMP safe */
329		FREE(ng_fec_units, M_NETGRAPH);
330		ng_fec_units_len = 0;
331		ng_fec_units = NULL;
332	}
333	mtx_unlock(&ng_fec_mtx);
334}
335
336/************************************************************************
337			INTERFACE STUFF
338 ************************************************************************/
339
340static int
341ng_fec_addport(struct ng_fec_private *priv, char *iface)
342{
343	struct ng_fec_bundle	*b;
344	struct ifnet		*ifp, *bifp;
345	struct ng_fec_portlist	*p, *new;
346
347	if (priv == NULL || iface == NULL)
348		return(EINVAL);
349
350	b = &priv->fec_bundle;
351	ifp = priv->ifp;
352
353	/* Only allow reconfiguration if not running. */
354	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
355		printf("fec%d: can't add new iface; bundle is running\n",
356		    priv->unit);
357		return (EINVAL);
358	}
359
360	/* Find the interface */
361	bifp = ifunit(iface);
362	if (bifp == NULL) {
363		printf("fec%d: tried to add iface %s, which "
364		    "doesn't seem to exist\n", priv->unit, iface);
365		return(ENOENT);
366	}
367
368	/* See if we have room in the bundle */
369	if (b->fec_ifcnt == FEC_BUNDLESIZ) {
370		printf("fec%d: can't add new iface; bundle is full\n",
371		    priv->unit);
372		return(ENOSPC);
373	}
374
375	/* See if the interface is already in the bundle */
376	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
377		if (p->fec_if == bifp) {
378			printf("fec%d: iface %s is already in this "
379			    "bundle\n", priv->unit, iface);
380			return(EINVAL);
381		}
382	}
383
384	/*
385	 * All interfaces must use the same output vector. Once the
386	 * user attaches an interface of one type, make all subsequent
387	 * interfaces have the same output vector.
388	 */
389	if (b->fec_if_output != NULL) {
390		if (b->fec_if_output != bifp->if_output) {
391			printf("fec%d: iface %s is not the same type "
392			    "as the other interface(s) already in "
393			    "the bundle\n", priv->unit, iface);
394			return(EINVAL);
395		}
396	}
397
398	/* Allocate new list entry. */
399	MALLOC(new, struct ng_fec_portlist *,
400	    sizeof(struct ng_fec_portlist), M_NETGRAPH, M_NOWAIT);
401	if (new == NULL)
402		return(ENOMEM);
403
404	IF_AFDATA_LOCK(bifp);
405	IFP2NG(bifp) = priv->node;
406	IF_AFDATA_UNLOCK(bifp);
407
408	/*
409	 * If this is the first interface added to the bundle,
410	 * use its MAC address for the virtual interface (and,
411	 * by extension, all the other ports in the bundle).
412	 */
413	if (b->fec_ifcnt == 0)
414		if_setlladdr(ifp, IF_LLADDR(bifp), ETHER_ADDR_LEN);
415
416	b->fec_btype = FEC_BTYPE_MAC;
417	new->fec_idx = b->fec_ifcnt;
418	b->fec_ifcnt++;
419
420	/* Save the real MAC address. */
421	bcopy(IF_LLADDR(bifp),
422	    (char *)&new->fec_mac, ETHER_ADDR_LEN);
423
424	/* Set up phony MAC address. */
425	if_setlladdr(bifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
426
427	/* Save original input vector */
428	new->fec_if_input = bifp->if_input;
429
430	/* Override it with our own */
431	bifp->if_input = ng_fec_input;
432
433	/* Save output vector too. */
434	if (b->fec_if_output == NULL)
435		b->fec_if_output = bifp->if_output;
436
437	/* Add to the queue */
438	new->fec_if = bifp;
439	new->fec_ifstat = -1;
440	TAILQ_INSERT_TAIL(&b->ng_fec_ports, new, fec_list);
441
442	return(0);
443}
444
445static int
446ng_fec_delport(struct ng_fec_private *priv, char *iface)
447{
448	struct ng_fec_bundle	*b;
449	struct ifnet		*ifp, *bifp;
450	struct ng_fec_portlist	*p;
451
452	if (priv == NULL || iface == NULL)
453		return(EINVAL);
454
455	b = &priv->fec_bundle;
456	ifp = priv->ifp;
457
458	/* Only allow reconfiguration if not running. */
459	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
460		printf("fec%d: can't remove iface; bundle is running\n",
461		    priv->unit);
462		return (EINVAL);
463	}
464
465	/* Find the interface */
466	bifp = ifunit(iface);
467	if (bifp == NULL) {
468		printf("fec%d: tried to remove iface %s, which "
469		    "doesn't seem to exist\n", priv->unit, iface);
470		return(ENOENT);
471	}
472
473	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
474		if (p->fec_if == bifp)
475			break;
476	}
477
478	if (p == NULL) {
479		printf("fec%d: tried to remove iface %s which "
480		    "is not in our bundle\n", priv->unit, iface);
481		return(EINVAL);
482	}
483
484	/* Stop interface */
485	bifp->if_flags &= ~IFF_UP;
486	(*bifp->if_ioctl)(bifp, SIOCSIFFLAGS, NULL);
487
488	/* Restore MAC address. */
489	if_setlladdr(bifp, (u_char *)&p->fec_mac, ETHER_ADDR_LEN);
490
491	/* Restore input vector */
492	bifp->if_input = p->fec_if_input;
493
494	/* Remove our node context pointer. */
495	IF_AFDATA_LOCK(bifp);
496	IFP2NG(bifp) = NULL;
497	IF_AFDATA_UNLOCK(bifp);
498
499	/* Delete port */
500	TAILQ_REMOVE(&b->ng_fec_ports, p, fec_list);
501	FREE(p, M_NETGRAPH);
502	b->fec_ifcnt--;
503
504	if (b->fec_ifcnt == 0)
505		b->fec_if_output = NULL;
506
507	return(0);
508}
509
510/*
511 * Pass an ioctl command down to all the underyling interfaces in a
512 * bundle. Used for setting multicast filters and flags.
513 */
514
515static int
516ng_fec_setport(struct ifnet *ifp, u_long command, caddr_t data)
517{
518	struct ng_fec_private	*priv;
519	struct ng_fec_bundle	*b;
520	struct ifnet		*oifp;
521	struct ng_fec_portlist	*p;
522
523	priv = ifp->if_softc;
524	b = &priv->fec_bundle;
525
526	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
527		oifp = p->fec_if;
528		if (oifp != NULL)
529			(*oifp->if_ioctl)(oifp, command, data);
530	}
531
532	return(0);
533}
534
535static void
536ng_fec_init(void *arg)
537{
538	struct ng_fec_private	*priv;
539	struct ng_fec_bundle	*b;
540	struct ifnet		*ifp, *bifp;
541	struct ng_fec_portlist	*p;
542
543	priv = arg;
544	ifp = priv->ifp;
545	b = &priv->fec_bundle;
546
547	if (b->fec_ifcnt != 2 && b->fec_ifcnt != FEC_BUNDLESIZ) {
548		printf("fec%d: invalid bundle "
549		    "size: %d\n", priv->unit,
550		    b->fec_ifcnt);
551		return;
552	}
553
554	ng_fec_stop(ifp);
555
556	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
557		bifp = p->fec_if;
558		bifp->if_flags |= IFF_UP;
559                (*bifp->if_ioctl)(bifp, SIOCSIFFLAGS, NULL);
560		/* mark iface as up and let the monitor check it */
561		p->fec_ifstat = -1;
562	}
563
564	ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE);
565	ifp->if_drv_flags |= IFF_DRV_RUNNING;
566
567	priv->fec_ch = timeout(ng_fec_tick, priv, hz);
568
569	return;
570}
571
572static void
573ng_fec_stop(struct ifnet *ifp)
574{
575	struct ng_fec_private	*priv;
576	struct ng_fec_bundle	*b;
577	struct ifnet		*bifp;
578	struct ng_fec_portlist	*p;
579
580	priv = ifp->if_softc;
581	b = &priv->fec_bundle;
582
583	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
584		bifp = p->fec_if;
585		bifp->if_flags &= ~IFF_UP;
586                (*bifp->if_ioctl)(bifp, SIOCSIFFLAGS, NULL);
587	}
588
589	untimeout(ng_fec_tick, priv, priv->fec_ch);
590
591	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
592
593	return;
594}
595
596static void
597ng_fec_tick(void *arg)
598{
599	struct ng_fec_private	*priv;
600	struct ng_fec_bundle	*b;
601        struct ifmediareq	ifmr;
602	struct ifnet		*ifp;
603	struct ng_fec_portlist	*p;
604	int			error = 0;
605
606	priv = arg;
607	b = &priv->fec_bundle;
608
609	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
610		bzero((char *)&ifmr, sizeof(ifmr));
611		ifp = p->fec_if;
612		error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
613		if (error) {
614			printf("fec%d: failed to check status "
615			    "of link %s\n", priv->unit, ifp->if_xname);
616			continue;
617		}
618
619        	if (ifmr.ifm_status & IFM_AVALID) {
620			if (ifmr.ifm_status & IFM_ACTIVE) {
621				if (p->fec_ifstat == -1 ||
622				    p->fec_ifstat == 0) {
623					p->fec_ifstat = 1;
624					printf("fec%d: port %s in bundle "
625					    "is up\n", priv->unit,
626					    ifp->if_xname);
627				}
628			} else {
629				if (p->fec_ifstat == -1 ||
630				    p->fec_ifstat == 1) {
631					p->fec_ifstat = 0;
632					printf("fec%d: port %s in bundle "
633					    "is down\n", priv->unit,
634					    ifp->if_xname);
635				}
636			}
637		}
638	}
639
640	ifp = priv->ifp;
641	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
642		priv->fec_ch = timeout(ng_fec_tick, priv, hz);
643
644	return;
645}
646
647static int
648ng_fec_ifmedia_upd(struct ifnet *ifp)
649{
650	return(0);
651}
652
653static void ng_fec_ifmedia_sts(struct ifnet *ifp,
654	struct ifmediareq *ifmr)
655{
656	struct ng_fec_private	*priv;
657	struct ng_fec_bundle	*b;
658	struct ng_fec_portlist	*p;
659
660	priv = ifp->if_softc;
661	b = &priv->fec_bundle;
662
663	ifmr->ifm_status = IFM_AVALID;
664	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
665		if (p->fec_ifstat == 1) {
666			ifmr->ifm_status |= IFM_ACTIVE;
667			break;
668		}
669	}
670
671	return;
672}
673
674/*
675 * Process an ioctl for the virtual interface
676 */
677static int
678ng_fec_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
679{
680	struct ifreq *const ifr = (struct ifreq *) data;
681	int s, error = 0;
682	struct ng_fec_private	*priv;
683	struct ng_fec_bundle	*b;
684
685	priv = ifp->if_softc;
686	b = &priv->fec_bundle;
687
688#ifdef DEBUG
689	ng_fec_print_ioctl(ifp, command, data);
690#endif
691	s = splimp();
692	switch (command) {
693
694	/* These two are mostly handled at a higher layer */
695	case SIOCSIFADDR:
696	case SIOCGIFADDR:
697		error = ether_ioctl(ifp, command, data);
698		break;
699
700	case SIOCSIFMTU:
701		if (ifr->ifr_mtu >= NG_FEC_MTU_MIN &&
702		    ifr->ifr_mtu <= NG_FEC_MTU_MAX) {
703			struct ng_fec_portlist *p;
704			struct ifnet *bifp;
705
706			TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
707				bifp = p->fec_if;
708				error = (*bifp->if_ioctl)(bifp, SIOCSIFMTU,
709				    data);
710				if (error != 0)
711					break;
712			}
713			if (error == 0)
714				ifp->if_mtu = ifr->ifr_mtu;
715		} else
716			error = EINVAL;
717		break;
718
719	/* Set flags */
720	case SIOCSIFFLAGS:
721		/*
722		 * If the interface is marked up and stopped, then start it.
723		 * If it is marked down and running, then stop it.
724		 */
725		if (ifr->ifr_flags & IFF_UP) {
726			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
727				/* Sanity. */
728				if (b->fec_ifcnt != 2 &&
729				    b->fec_ifcnt != FEC_BUNDLESIZ) {
730					printf("fec%d: invalid bundle "
731					    "size: %d\n", priv->unit,
732					    b->fec_ifcnt);
733					error = EINVAL;
734					break;
735				}
736				ng_fec_init(priv);
737			}
738			/*
739			 * Bubble down changes in promisc mode to
740			 * underlying interfaces.
741			 */
742			if ((ifp->if_flags & IFF_PROMISC) !=
743			    (priv->if_flags & IFF_PROMISC)) {
744				ng_fec_setport(ifp, command, data);
745				priv->if_flags = ifp->if_flags;
746			}
747		} else {
748			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
749				ng_fec_stop(ifp);
750		}
751		break;
752
753	case SIOCADDMULTI:
754	case SIOCDELMULTI:
755		ng_fec_setport(ifp, command, data);
756		error = 0;
757		break;
758	case SIOCGIFMEDIA:
759	case SIOCSIFMEDIA:
760		error = ifmedia_ioctl(ifp, ifr, &priv->ifmedia, command);
761		break;
762	/* Stuff that's not supported */
763	case SIOCSIFPHYS:
764		error = EOPNOTSUPP;
765		break;
766
767	default:
768		error = EINVAL;
769		break;
770	}
771	(void) splx(s);
772	return (error);
773}
774
775/*
776 * This routine spies on mbufs received by underlying network device
777 * drivers. When we add an interface to our bundle, we override its
778 * if_input routine with a pointer to ng_fec_input(). This means we
779 * get to look at all the device's packets before sending them to the
780 * real ether_input() for processing by the stack. Once we verify the
781 * packet comes from an interface that's been aggregated into
782 * our bundle, we fix up the rcvif pointer and increment our
783 * packet counters so that it looks like the frames are actually
784 * coming from us.
785 */
786static void
787ng_fec_input(struct ifnet *ifp, struct mbuf *m0)
788{
789	struct ng_node		*node;
790	struct ng_fec_private	*priv;
791	struct ng_fec_bundle	*b;
792	struct ifnet		*bifp;
793	struct ng_fec_portlist	*p;
794
795	/* Sanity check */
796	if (ifp == NULL || m0 == NULL)
797		return;
798
799	node = IFP2NG(ifp);
800
801	/* Sanity check part II */
802	if (node == NULL)
803		return;
804
805	priv = NG_NODE_PRIVATE(node);
806	b = &priv->fec_bundle;
807	bifp = priv->ifp;
808
809	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
810		if (p->fec_if == m0->m_pkthdr.rcvif)
811			break;
812	}
813
814	/* Wasn't meant for us; leave this frame alone. */
815	if (p == NULL)
816		return;
817
818        /*
819	 * Check for a BPF tap on the underlying interface. This
820	 * is mainly a debugging aid: it allows tcpdump-ing of an
821	 * individual interface in a bundle to work, which it
822	 * otherwise would not. BPF tapping of our own aggregate
823	 * interface will occur once we call ether_input().
824	 */
825	BPF_MTAP(m0->m_pkthdr.rcvif, m0);
826
827	/* Convince the system that this is our frame. */
828	m0->m_pkthdr.rcvif = bifp;
829	bifp->if_ipackets++;
830	bifp->if_ibytes += m0->m_pkthdr.len + sizeof(struct ether_header);
831
832	(*bifp->if_input)(bifp, m0);
833
834	return;
835}
836
837/*
838 * Take a quick peek at the packet and see if it's ok for us to use
839 * the inet or inet6 hash methods on it, if they're enabled. We do
840 * this by setting flags in the mbuf header. Once we've made up our
841 * mind what to do, we pass the frame to output vector for further
842 * processing.
843 */
844
845static int
846ng_fec_output(struct ifnet *ifp, struct mbuf *m,
847		struct sockaddr *dst, struct rtentry *rt0)
848{
849	const priv_p priv = (priv_p) ifp->if_softc;
850	struct ng_fec_bundle *b;
851	int error;
852
853	/* Check interface flags */
854	if (!((ifp->if_flags & IFF_UP) &&
855	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
856		m_freem(m);
857		return (ENETDOWN);
858	}
859
860	b = &priv->fec_bundle;
861
862	switch (b->fec_btype) {
863	case FEC_BTYPE_MAC:
864		m->m_flags |= M_FEC_MAC;
865		break;
866#ifdef INET
867	case FEC_BTYPE_INET:
868		/*
869		 * We can't use the INET address port selection
870		 * scheme if this isn't an INET packet.
871		 */
872		if (dst->sa_family == AF_INET)
873			m->m_flags |= M_FEC_INET;
874#ifdef INET6
875		else if (dst->sa_family == AF_INET6)
876			m->m_flags |= M_FEC_INET6;
877#endif
878		else {
879#ifdef DEBUG
880			if_printf(ifp, "can't do inet aggregation of non "
881			    "inet packet\n");
882#endif
883			m->m_flags |= M_FEC_MAC;
884		}
885		break;
886#endif
887	default:
888		if_printf(ifp, "bogus hash type: %d\n",
889		    b->fec_btype);
890		m_freem(m);
891		return(EINVAL);
892		break;
893	}
894
895	/*
896	 * Pass the frame to the output vector for all the protocol
897	 * handling. This will put the ethernet header on the packet
898	 * for us.
899	 */
900	priv->if_error = 0;
901	error = (*b->fec_if_output)(ifp, m, dst, rt0);
902	if (priv->if_error && !error)
903		error = priv->if_error;
904
905	return(error);
906}
907
908/*
909 * Apply a hash to the source and destination addresses in the packet
910 * in order to select an interface. Also check link status and handle
911 * dead links accordingly.
912 */
913
914static int
915ng_fec_choose_port(struct ng_fec_bundle *b,
916	struct mbuf *m, struct ifnet **ifp)
917{
918	struct ether_header	*eh;
919	struct mbuf		*m0;
920#ifdef INET
921	struct ip		*ip;
922#ifdef INET6
923	struct ip6_hdr		*ip6;
924#endif
925#endif
926
927	struct ng_fec_portlist	*p;
928	int			port = 0, mask;
929
930	/*
931	 * If there are only two ports, mask off all but the
932	 * last bit for XORing. If there are 4, mask off all
933	 * but the last 2 bits.
934	 */
935	mask = b->fec_ifcnt == 2 ? 0x1 : 0x3;
936	eh = mtod(m, struct ether_header *);
937#ifdef INET
938	ip = (struct ip *)(mtod(m, char *) +
939	    sizeof(struct ether_header));
940#ifdef INET6
941	ip6 = (struct ip6_hdr *)(mtod(m, char *) +
942	    sizeof(struct ether_header));
943#endif
944#endif
945
946	/*
947	 * The fg_fec_output() routine is supposed to leave a
948	 * flag for us in the mbuf that tells us what hash to
949	 * use, but sometimes a new mbuf is prepended to the
950	 * chain, so we have to search every mbuf in the chain
951	 * to find the flags.
952	 */
953	m0 = m;
954	while (m0) {
955		if (m0->m_flags & (M_FEC_MAC|M_FEC_INET|M_FEC_INET6))
956			break;
957		m0 = m0->m_next;
958	}
959	if (m0 == NULL)
960		return(EINVAL);
961
962	switch (m0->m_flags & (M_FEC_MAC|M_FEC_INET|M_FEC_INET6)) {
963	case M_FEC_MAC:
964		port = (eh->ether_dhost[5] ^
965		    eh->ether_shost[5]) & mask;
966		break;
967#ifdef INET
968	case M_FEC_INET:
969		port = (ntohl(ip->ip_dst.s_addr) ^
970		    ntohl(ip->ip_src.s_addr)) & mask;
971		break;
972#ifdef INET6
973	case M_FEC_INET6:
974		port = (ip6->ip6_dst.s6_addr[15] ^
975		    ip6->ip6_dst.s6_addr[15]) & mask;
976		break;
977#endif
978#endif
979	default:
980		return(EINVAL);
981			break;
982	}
983
984	TAILQ_FOREACH(p, &b->ng_fec_ports, fec_list) {
985		if (port == p->fec_idx)
986			break;
987	}
988
989	/*
990	 * Now that we've chosen a port, make sure it's
991	 * alive. If it's not alive, cycle through the bundle
992	 * looking for a port that is alive. If we don't find
993	 * any, return an error.
994	 */
995	if (p->fec_ifstat != 1) {
996		struct ng_fec_portlist	*n = NULL;
997
998		n = TAILQ_NEXT(p, fec_list);
999		if (n == NULL)
1000			n = TAILQ_FIRST(&b->ng_fec_ports);
1001		while (n != p) {
1002			if (n->fec_ifstat == 1)
1003				break;
1004			n = TAILQ_NEXT(n, fec_list);
1005			if (n == NULL)
1006				n = TAILQ_FIRST(&b->ng_fec_ports);
1007		}
1008		if (n == p)
1009			return(EAGAIN);
1010		p = n;
1011	}
1012
1013	*ifp = p->fec_if;
1014
1015	return(0);
1016}
1017
1018/*
1019 * Now that the packet has been run through ether_output(), yank it
1020 * off our own send queue and stick it on the queue for the appropriate
1021 * underlying physical interface. Note that if the interface's send
1022 * queue is full, we save an error status in our private netgraph
1023 * space which will eventually be handed up to ng_fec_output(), which
1024 * will return it to the rest of the IP stack. We need to do this
1025 * in order to duplicate the effect of ether_output() returning ENOBUFS
1026 * when it detects that an interface's send queue is full. There's no
1027 * other way to signal the error status from here since the if_start()
1028 * routine is spec'ed to return void.
1029 *
1030 * Once the frame is queued, we call ether_output_frame() to initiate
1031 * transmission.
1032 */
1033static void
1034ng_fec_start(struct ifnet *ifp)
1035{
1036	struct ng_fec_private	*priv;
1037	struct ng_fec_bundle	*b;
1038	struct ifnet		*oifp = NULL;
1039	struct mbuf		*m0;
1040	int			error;
1041
1042	priv = ifp->if_softc;
1043	b = &priv->fec_bundle;
1044
1045	IF_DEQUEUE(&ifp->if_snd, m0);
1046	if (m0 == NULL)
1047		return;
1048
1049	BPF_MTAP(ifp, m0);
1050
1051	/* Queue up packet on the proper port. */
1052	error = ng_fec_choose_port(b, m0, &oifp);
1053	if (error) {
1054		ifp->if_ierrors++;
1055		m_freem(m0);
1056		priv->if_error = ENOBUFS;
1057		return;
1058	}
1059	ifp->if_opackets++;
1060
1061	priv->if_error = IF_HANDOFF(&oifp->if_snd, m0, oifp) ? 0 : ENOBUFS;
1062
1063	return;
1064}
1065
1066#ifdef DEBUG
1067/*
1068 * Display an ioctl to the virtual interface
1069 */
1070
1071static void
1072ng_fec_print_ioctl(struct ifnet *ifp, int command, caddr_t data)
1073{
1074	char   *str;
1075
1076	switch (command & IOC_DIRMASK) {
1077	case IOC_VOID:
1078		str = "IO";
1079		break;
1080	case IOC_OUT:
1081		str = "IOR";
1082		break;
1083	case IOC_IN:
1084		str = "IOW";
1085		break;
1086	case IOC_INOUT:
1087		str = "IORW";
1088		break;
1089	default:
1090		str = "IO??";
1091	}
1092	log(LOG_DEBUG, "%s: %s('%c', %d, char[%d])\n",
1093	       ifp->if_xname,
1094	       str,
1095	       IOCGROUP(command),
1096	       command & 0xff,
1097	       IOCPARM_LEN(command));
1098}
1099#endif /* DEBUG */
1100
1101/************************************************************************
1102			NETGRAPH NODE STUFF
1103 ************************************************************************/
1104
1105/*
1106 * Constructor for a node
1107 */
1108static int
1109ng_fec_constructor(node_p node)
1110{
1111	char ifname[NG_FEC_FEC_NAME_MAX + 1];
1112	struct ifnet *ifp;
1113	priv_p priv;
1114	const uint8_t eaddr[ETHER_ADDR_LEN] = {0, 0, 0, 0, 0, 0};
1115	struct ng_fec_bundle *b;
1116	int error = 0;
1117
1118	/* Allocate node and interface private structures */
1119	MALLOC(priv, priv_p, sizeof(*priv), M_NETGRAPH, M_NOWAIT | M_ZERO);
1120	if (priv == NULL)
1121		return (ENOMEM);
1122
1123	ifp = priv->ifp = if_alloc(IFT_ETHER);
1124	if (ifp == NULL) {
1125		FREE(priv, M_NETGRAPH);
1126		return (ENOSPC);
1127	}
1128	b = &priv->fec_bundle;
1129
1130	/* Link them together */
1131	ifp->if_softc = priv;
1132
1133	/* Get an interface unit number */
1134	if ((error = ng_fec_get_unit(&priv->unit)) != 0) {
1135		if_free(ifp);
1136		FREE(priv, M_NETGRAPH);
1137		return (error);
1138	}
1139
1140	/* Link together node and private info */
1141	NG_NODE_SET_PRIVATE(node, priv);
1142	priv->node = node;
1143
1144	/* Initialize interface structure */
1145	if_initname(ifp, NG_FEC_FEC_NAME, priv->unit);
1146	ifp->if_start = ng_fec_start;
1147	ifp->if_ioctl = ng_fec_ioctl;
1148	ifp->if_init = ng_fec_init;
1149	ifp->if_watchdog = NULL;
1150	ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
1151	ifp->if_mtu = NG_FEC_MTU_DEFAULT;
1152	ifp->if_flags = (IFF_SIMPLEX|IFF_BROADCAST|IFF_MULTICAST);
1153	ifp->if_addrlen = 0;			/* XXX */
1154	ifp->if_hdrlen = 0;			/* XXX */
1155	ifp->if_baudrate = 100000000;		/* XXX */
1156	TAILQ_INIT(&ifp->if_addrhead); /* XXX useless - done in if_attach */
1157
1158	/* Give this node the same name as the interface (if possible) */
1159	bzero(ifname, sizeof(ifname));
1160	strlcpy(ifname, ifp->if_xname, sizeof(ifname));
1161	if (ng_name_node(node, ifname) != 0)
1162		log(LOG_WARNING, "%s: can't acquire netgraph name\n", ifname);
1163
1164	/* Attach the interface */
1165	ether_ifattach(ifp, eaddr);
1166	callout_handle_init(&priv->fec_ch);
1167
1168	/* Override output method with our own */
1169	ifp->if_output = ng_fec_output;
1170
1171	TAILQ_INIT(&b->ng_fec_ports);
1172	b->fec_ifcnt = 0;
1173
1174	ifmedia_init(&priv->ifmedia, 0,
1175	    ng_fec_ifmedia_upd, ng_fec_ifmedia_sts);
1176	ifmedia_add(&priv->ifmedia, IFM_ETHER|IFM_NONE, 0, NULL);
1177	ifmedia_set(&priv->ifmedia, IFM_ETHER|IFM_NONE);
1178
1179	/* Done */
1180	return (0);
1181}
1182
1183/*
1184 * Receive a control message
1185 */
1186static int
1187ng_fec_rcvmsg(node_p node, item_p item, hook_p lasthook)
1188{
1189	const priv_p priv = NG_NODE_PRIVATE(node);
1190	struct ng_fec_bundle	*b;
1191	struct ng_mesg *resp = NULL;
1192	struct ng_mesg *msg;
1193	char *ifname;
1194	int error = 0;
1195
1196	NGI_GET_MSG(item, msg);
1197	b = &priv->fec_bundle;
1198
1199	switch (msg->header.typecookie) {
1200	case NGM_FEC_COOKIE:
1201		switch (msg->header.cmd) {
1202		case NGM_FEC_ADD_IFACE:
1203			ifname = msg->data;
1204			error = ng_fec_addport(priv, ifname);
1205			break;
1206		case NGM_FEC_DEL_IFACE:
1207			ifname = msg->data;
1208			error = ng_fec_delport(priv, ifname);
1209			break;
1210		case NGM_FEC_SET_MODE_MAC:
1211			b->fec_btype = FEC_BTYPE_MAC;
1212			break;
1213#ifdef INET
1214		case NGM_FEC_SET_MODE_INET:
1215			b->fec_btype = FEC_BTYPE_INET;
1216			break;
1217#ifdef INET6
1218		case NGM_FEC_SET_MODE_INET6:
1219			b->fec_btype = FEC_BTYPE_INET6;
1220			break;
1221#endif
1222#endif
1223		default:
1224			error = EINVAL;
1225			break;
1226		}
1227		break;
1228	default:
1229		error = EINVAL;
1230		break;
1231	}
1232	NG_RESPOND_MSG(error, node, item, resp);
1233	NG_FREE_MSG(msg);
1234	return (error);
1235}
1236
1237/*
1238 * Shutdown and remove the node and its associated interface.
1239 */
1240static int
1241ng_fec_shutdown(node_p node)
1242{
1243	const priv_p priv = NG_NODE_PRIVATE(node);
1244	struct ng_fec_bundle *b;
1245	struct ng_fec_portlist	*p;
1246
1247	b = &priv->fec_bundle;
1248	ng_fec_stop(priv->ifp);
1249
1250	while (!TAILQ_EMPTY(&b->ng_fec_ports)) {
1251		p = TAILQ_FIRST(&b->ng_fec_ports);
1252		ng_fec_delport(priv, p->fec_if->if_xname);
1253	}
1254
1255	ether_ifdetach(priv->ifp);
1256	if_free_type(priv->ifp, IFT_ETHER);
1257	ifmedia_removeall(&priv->ifmedia);
1258	ng_fec_free_unit(priv->unit);
1259	FREE(priv, M_NETGRAPH);
1260	NG_NODE_SET_PRIVATE(node, NULL);
1261	NG_NODE_UNREF(node);
1262	return (0);
1263}
1264
1265/*
1266 * Handle loading and unloading for this node type.
1267 */
1268static int
1269ng_fec_mod_event(module_t mod, int event, void *data)
1270{
1271	int error = 0;
1272
1273	switch (event) {
1274	case MOD_LOAD:
1275		mtx_init(&ng_fec_mtx, "ng_fec", NULL, MTX_DEF);
1276		break;
1277	case MOD_UNLOAD:
1278		mtx_destroy(&ng_fec_mtx);
1279		break;
1280	default:
1281		error = EOPNOTSUPP;
1282		break;
1283	}
1284	return (error);
1285}
1286