aggr_impl.h revision 11878:ac93462db6d7
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef	_SYS_AGGR_IMPL_H
27#define	_SYS_AGGR_IMPL_H
28
29#include <sys/types.h>
30#include <sys/cred.h>
31#include <sys/mac_ether.h>
32#include <sys/mac_provider.h>
33#include <sys/mac_client.h>
34#include <sys/mac_client_priv.h>
35#include <sys/aggr_lacp.h>
36
37#ifdef	__cplusplus
38extern "C" {
39#endif
40
41#ifdef _KERNEL
42
43#define	AGGR_MINOR_CTL	1		/* control interface minor */
44
45/* flags for aggr_grp_modify() */
46#define	AGGR_MODIFY_POLICY		0x01
47#define	AGGR_MODIFY_MAC			0x02
48#define	AGGR_MODIFY_LACP_MODE		0x04
49#define	AGGR_MODIFY_LACP_TIMER		0x08
50
51/*
52 * Possible value of aggr_rseudo_rx_ring_t.arr_flags. Set when the ring entry
53 * in the pseudo RX group is used.
54 */
55#define	MAC_PSEUDO_RING_INUSE	0x01
56
57typedef struct aggr_unicst_addr_s {
58	uint8_t				aua_addr[ETHERADDRL];
59	struct aggr_unicst_addr_s	*aua_next;
60} aggr_unicst_addr_t;
61
62typedef struct aggr_pseudo_rx_ring_s {
63	mac_ring_handle_t	arr_rh;	/* filled in by aggr_fill_ring() */
64	struct aggr_port_s	*arr_port;
65	mac_ring_handle_t	arr_hw_rh;
66	uint_t			arr_flags;
67	uint64_t		arr_gen;
68} aggr_pseudo_rx_ring_t;
69
70typedef struct aggr_pseudo_rx_group_s {
71	struct aggr_grp_s	*arg_grp; /* filled in by aggr_fill_group() */
72	mac_group_handle_t	arg_gh;   /* filled in by aggr_fill_group() */
73	aggr_unicst_addr_t	*arg_macaddr;
74	aggr_pseudo_rx_ring_t	arg_rings[MAX_RINGS_PER_GROUP];
75	uint_t			arg_ring_cnt;
76} aggr_pseudo_rx_group_t;
77
78typedef struct aggr_pseudo_tx_ring_s {
79	mac_ring_handle_t	atr_rh;	/* filled in by aggr_fill_ring() */
80	struct aggr_port_s	*atr_port;
81	mac_ring_handle_t	atr_hw_rh;
82	uint_t			atr_flags;
83} aggr_pseudo_tx_ring_t;
84
85typedef struct aggr_pseudo_tx_group_s {
86	mac_group_handle_t	atg_gh;	/* filled in by aggr_fill_group() */
87	uint_t			atg_ring_cnt;
88	aggr_pseudo_tx_ring_t	atg_rings[MAX_RINGS_PER_GROUP];
89} aggr_pseudo_tx_group_t;
90
91/*
92 * A link aggregation MAC port.
93 * Note that lp_next is protected by the lg_lock of the group the
94 * port is part of.
95 */
96typedef struct aggr_port_s {
97	struct aggr_port_s *lp_next;
98	struct aggr_grp_s *lp_grp;		/* back ptr to group */
99	datalink_id_t	lp_linkid;
100	uint16_t	lp_portid;
101	uint8_t		lp_addr[ETHERADDRL];	/* port MAC address */
102	uint32_t	lp_refs;		/* refcount */
103	aggr_port_state_t lp_state;
104	uint32_t	lp_started : 1,
105			lp_tx_enabled : 1,
106			lp_collector_enabled : 1,
107			lp_promisc_on : 1,
108			lp_no_link_update : 1,
109			lp_rx_grp_added : 1,
110			lp_tx_grp_added : 1,
111			lp_closing : 1,
112			lp_pad_bits : 24;
113	mac_handle_t	lp_mh;
114	mac_client_handle_t lp_mch;
115	const mac_info_t *lp_mip;
116	mac_notify_handle_t lp_mnh;
117	uint_t		lp_tx_idx;		/* idx in group's tx array */
118	uint64_t	lp_ifspeed;
119	link_state_t	lp_link_state;
120	link_duplex_t	lp_link_duplex;
121	uint64_t	lp_stat[MAC_NSTAT];
122	uint64_t	lp_ether_stat[ETHER_NSTAT];
123	aggr_lacp_port_t lp_lacp;		/* LACP state */
124	lacp_stats_t	lp_lacp_stats;
125	uint32_t	lp_margin;
126	mac_promisc_handle_t lp_mphp;
127	mac_unicast_handle_t lp_mah;
128
129	/* List of non-primary addresses that requires promiscous mode set */
130	aggr_unicst_addr_t	*lp_prom_addr;
131	/* handle of the underlying HW RX group */
132	mac_group_handle_t	lp_hwgh;
133	int			lp_tx_ring_cnt;
134	/* handles of the underlying HW TX rings */
135	mac_ring_handle_t	*lp_tx_rings;
136	/*
137	 * Handles of the pseudo TX rings. Each of them maps to
138	 * corresponding hardware TX ring in lp_tx_rings[]. A
139	 * pseudo TX ring is presented to aggr primary mac
140	 * client even when underlying NIC has no TX ring.
141	 */
142	mac_ring_handle_t	*lp_pseudo_tx_rings;
143	void			*lp_tx_notify_mh;
144} aggr_port_t;
145
146/*
147 * A link aggregation group.
148 *
149 * The following per-group flags are defined:
150 *
151 * - lg_addr_fixed: set when the MAC address has been explicitely set
152 *   when the group was created, or by a m_unicst_set() request.
153 *   If this flag is not set, the MAC address of the group will be
154 *   set to the first port that is added to the group.
155 *
156 * - lg_add_set: used only when lg_addr_fixed is not set. Captures whether
157 *   the MAC address was initialized according to the members of the group.
158 *   When set, the lg_port field points to the port from which the
159 *   MAC address was initialized.
160 *
161 */
162typedef struct aggr_grp_s {
163	datalink_id_t	lg_linkid;
164	uint16_t	lg_key;			/* key (group port number) */
165	uint32_t	lg_refs;		/* refcount */
166	uint16_t	lg_nports;		/* number of MAC ports */
167	uint8_t		lg_addr[ETHERADDRL];	/* group MAC address */
168	uint16_t
169			lg_closing : 1,
170			lg_addr_fixed : 1,	/* fixed MAC address? */
171			lg_started : 1,		/* group started? */
172			lg_promisc : 1,		/* in promiscuous mode? */
173			lg_zcopy : 1,
174			lg_vlan : 1,
175			lg_force : 1,
176			lg_lso : 1,
177			lg_pad_bits : 8;
178	aggr_port_t	*lg_ports;		/* list of configured ports */
179	aggr_port_t	*lg_mac_addr_port;
180	mac_handle_t	lg_mh;
181	zoneid_t	lg_zoneid;
182	uint_t		lg_nattached_ports;
183	krwlock_t	lg_tx_lock;
184	uint_t		lg_ntx_ports;
185	aggr_port_t	**lg_tx_ports;		/* array of tx ports */
186	uint_t		lg_tx_ports_size;	/* size of lg_tx_ports */
187	uint32_t	lg_tx_policy;		/* outbound policy */
188	uint8_t		lg_mac_tx_policy;
189	uint64_t	lg_ifspeed;
190	link_state_t	lg_link_state;
191	link_duplex_t	lg_link_duplex;
192	uint64_t	lg_stat[MAC_NSTAT];
193	uint64_t	lg_ether_stat[ETHER_NSTAT];
194	aggr_lacp_mode_t lg_lacp_mode;		/* off, active, or passive */
195	Agg_t		aggr;			/* 802.3ad data */
196	uint32_t	lg_hcksum_txflags;
197	uint_t		lg_max_sdu;
198	uint32_t	lg_margin;
199	mac_capab_lso_t lg_cap_lso;
200
201	/*
202	 * The following fields are used by the LACP packets processing.
203	 * Specifically, as the LACP packets processing is not performance
204	 * critical, all LACP packets will be handled by a dedicated thread
205	 * instead of in the mac_rx() call. This is to avoid the dead lock
206	 * with mac_unicast_remove(), which holding the mac perimeter of the
207	 * aggr, and wait for the mr_refcnt of the RX ring to drop to zero.
208	 */
209	kmutex_t	lg_lacp_lock;
210	kcondvar_t	lg_lacp_cv;
211	mblk_t		*lg_lacp_head;
212	mblk_t		*lg_lacp_tail;
213	kthread_t	*lg_lacp_rx_thread;
214	boolean_t	lg_lacp_done;
215
216	aggr_pseudo_rx_group_t	lg_rx_group;
217	aggr_pseudo_tx_group_t	lg_tx_group;
218
219	kmutex_t	lg_tx_flowctl_lock;
220	kcondvar_t	lg_tx_flowctl_cv;
221	uint_t		lg_tx_blocked_cnt;
222	mac_ring_handle_t	*lg_tx_blocked_rings;
223	kthread_t	*lg_tx_notify_thread;
224	boolean_t	lg_tx_notify_done;
225
226	/*
227	 * The following fields are used by aggr to wait for all the
228	 * aggr_port_notify_cb() and aggr_port_timer_thread() to finish
229	 * before it calls mac_unregister() when the aggr is deleted.
230	 */
231	kmutex_t	lg_port_lock;
232	kcondvar_t	lg_port_cv;
233	int		lg_port_ref;
234} aggr_grp_t;
235
236#define	AGGR_GRP_REFHOLD(grp) {			\
237	atomic_add_32(&(grp)->lg_refs, 1);	\
238	ASSERT((grp)->lg_refs != 0);		\
239}
240
241#define	AGGR_GRP_REFRELE(grp) {					\
242	ASSERT((grp)->lg_refs != 0);				\
243	membar_exit();						\
244	if (atomic_add_32_nv(&(grp)->lg_refs, -1) == 0)		\
245		aggr_grp_free(grp);				\
246}
247
248#define	AGGR_PORT_REFHOLD(port) {		\
249	atomic_add_32(&(port)->lp_refs, 1);	\
250	ASSERT((port)->lp_refs != 0);		\
251}
252
253#define	AGGR_PORT_REFRELE(port) {				\
254	ASSERT((port)->lp_refs != 0);				\
255	membar_exit();						\
256	if (atomic_add_32_nv(&(port)->lp_refs, -1) == 0)	\
257		aggr_port_free(port);				\
258}
259
260extern dev_info_t *aggr_dip;
261extern int aggr_ioc_init(void);
262extern void aggr_ioc_fini(void);
263
264typedef int (*aggr_grp_info_new_grp_fn_t)(void *, datalink_id_t, uint32_t,
265    uchar_t *, boolean_t, boolean_t, uint32_t, uint32_t, aggr_lacp_mode_t,
266    aggr_lacp_timer_t);
267typedef int (*aggr_grp_info_new_port_fn_t)(void *, datalink_id_t, uchar_t *,
268    aggr_port_state_t, aggr_lacp_state_t *);
269
270extern void aggr_grp_init(void);
271extern void aggr_grp_fini(void);
272extern int aggr_grp_create(datalink_id_t, uint32_t, uint_t, laioc_port_t *,
273    uint32_t, boolean_t, boolean_t, uchar_t *, aggr_lacp_mode_t,
274    aggr_lacp_timer_t, cred_t *);
275extern int aggr_grp_delete(datalink_id_t, cred_t *);
276extern void aggr_grp_free(aggr_grp_t *);
277
278extern int aggr_grp_info(datalink_id_t, void *, aggr_grp_info_new_grp_fn_t,
279    aggr_grp_info_new_port_fn_t, cred_t *);
280extern void aggr_grp_notify(aggr_grp_t *, uint32_t);
281extern boolean_t aggr_grp_attach_port(aggr_grp_t *, aggr_port_t *);
282extern boolean_t aggr_grp_detach_port(aggr_grp_t *, aggr_port_t *);
283extern void aggr_grp_port_mac_changed(aggr_grp_t *, aggr_port_t *,
284    boolean_t *, boolean_t *);
285extern int aggr_grp_add_ports(datalink_id_t, uint_t, boolean_t,
286    laioc_port_t *);
287extern int aggr_grp_rem_ports(datalink_id_t, uint_t, laioc_port_t *);
288extern boolean_t aggr_grp_update_ports_mac(aggr_grp_t *);
289extern int aggr_grp_modify(datalink_id_t, uint8_t, uint32_t, boolean_t,
290    const uchar_t *, aggr_lacp_mode_t, aggr_lacp_timer_t);
291extern void aggr_grp_multicst_port(aggr_port_t *, boolean_t);
292extern uint_t aggr_grp_count(void);
293
294extern void aggr_port_init(void);
295extern void aggr_port_fini(void);
296extern int aggr_port_create(aggr_grp_t *, const datalink_id_t, boolean_t,
297    aggr_port_t **);
298extern void aggr_port_delete(aggr_port_t *);
299extern void aggr_port_free(aggr_port_t *);
300extern int aggr_port_start(aggr_port_t *);
301extern void aggr_port_stop(aggr_port_t *);
302extern int aggr_port_promisc(aggr_port_t *, boolean_t);
303extern int aggr_port_unicst(aggr_port_t *);
304extern int aggr_port_multicst(void *, boolean_t, const uint8_t *);
305extern uint64_t aggr_port_stat(aggr_port_t *, uint_t);
306extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *);
307extern void aggr_port_init_callbacks(aggr_port_t *);
308
309extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t);
310
311extern void aggr_tx_ring_update(void *, uintptr_t);
312extern void aggr_tx_notify_thread(void *);
313extern void aggr_send_port_enable(aggr_port_t *);
314extern void aggr_send_port_disable(aggr_port_t *);
315extern void aggr_send_update_policy(aggr_grp_t *, uint32_t);
316
317extern void aggr_lacp_init(void);
318extern void aggr_lacp_fini(void);
319extern void aggr_lacp_init_port(aggr_port_t *);
320extern void aggr_lacp_init_grp(aggr_grp_t *);
321extern void aggr_lacp_set_mode(aggr_grp_t *, aggr_lacp_mode_t,
322    aggr_lacp_timer_t);
323extern void aggr_lacp_update_mode(aggr_grp_t *, aggr_lacp_mode_t);
324extern void aggr_lacp_update_timer(aggr_grp_t *, aggr_lacp_timer_t);
325extern void aggr_lacp_rx_enqueue(aggr_port_t *, mblk_t *);
326extern void aggr_lacp_port_attached(aggr_port_t *);
327extern void aggr_lacp_port_detached(aggr_port_t *);
328extern void aggr_port_lacp_set_mode(aggr_grp_t *, aggr_port_t *);
329
330extern void aggr_lacp_rx_thread(void *);
331extern void aggr_recv_lacp(aggr_port_t *, mac_resource_handle_t, mblk_t *);
332
333extern void aggr_grp_port_hold(aggr_port_t *);
334extern void aggr_grp_port_rele(aggr_port_t *);
335extern void aggr_grp_port_wait(aggr_grp_t *);
336
337extern int aggr_port_addmac(aggr_port_t *, const uint8_t *);
338extern void aggr_port_remmac(aggr_port_t *, const uint8_t *);
339
340extern mblk_t *aggr_ring_tx(void *, mblk_t *);
341extern mblk_t *aggr_find_tx_ring(void *, mblk_t *,
342    uintptr_t, mac_ring_handle_t *);
343
344#endif	/* _KERNEL */
345
346#ifdef	__cplusplus
347}
348#endif
349
350#endif	/* _SYS_AGGR_IMPL_H */
351