dman.c revision 7656:2621e50fdf4a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28/*
29 * Starcat Management Network Driver
30 *
31 * ****** NOTICE **** This file also resides in the SSC gate as
32 * ****** NOTICE **** usr/src/uts/sun4u/scman/scman.c. Any changes
33 * ****** NOTICE **** made here must be propogated there as well.
34 *
35 */
36
37#include <sys/types.h>
38#include <sys/proc.h>
39#include <sys/disp.h>
40#include <sys/kmem.h>
41#include <sys/stat.h>
42#include <sys/kstat.h>
43#include <sys/ksynch.h>
44#include <sys/stream.h>
45#include <sys/dlpi.h>
46#include <sys/stropts.h>
47#include <sys/strsubr.h>
48#include <sys/debug.h>
49#include <sys/conf.h>
50#include <sys/kstr.h>
51#include <sys/errno.h>
52#include <sys/ethernet.h>
53#include <sys/byteorder.h>
54#include <sys/ddi.h>
55#include <sys/sunddi.h>
56#include <sys/sunldi.h>
57#include <sys/modctl.h>
58#include <sys/strsun.h>
59#include <sys/callb.h>
60#include <sys/pci.h>
61#include <netinet/in.h>
62#include <inet/common.h>
63#include <inet/mi.h>
64#include <inet/nd.h>
65#include <sys/socket.h>
66#include <netinet/igmp_var.h>
67#include <netinet/ip6.h>
68#include <netinet/icmp6.h>
69#include <inet/ip.h>
70#include <inet/ip6.h>
71#include <sys/file.h>
72#include <sys/dman.h>
73#include <sys/autoconf.h>
74#include <sys/zone.h>
75
76extern int ddi_create_internal_pathname(dev_info_t *, char *, int, minor_t);
77
78#define	MAN_IDNAME	"dman"
79#define	DMAN_INT_PATH	"/devices/pseudo/dman@0:dman"
80#define	DMAN_PATH	"/devices/pseudo/clone@0:dman"
81#define	ERI_IDNAME	"eri"
82#define	ERI_PATH	"/devices/pseudo/clone@0:eri"
83
84#if defined(DEBUG)
85
86static void man_print_msp(manstr_t *);
87static void man_print_man(man_t *);
88static void man_print_mdp(man_dest_t *);
89static void man_print_dev(man_dev_t *);
90static void man_print_mip(mi_path_t *);
91static void man_print_mtp(mi_time_t *);
92static void man_print_mpg(man_pg_t *);
93static void man_print_path(man_path_t *);
94static void man_print_work(man_work_t *);
95
96/*
97 * Set manstr_t dlpistate (upper half of multiplexor)
98 */
99#define	SETSTATE(msp, state) \
100	MAN_DBG(MAN_DLPI, ("msp=0x%p @ %d state %s=>%s\n",		\
101		    (void *)msp, __LINE__, dss[msp->ms_dlpistate],	\
102		    dss[(state)]));					\
103		    msp->ms_dlpistate = (state);
104/*
105 * Set man_dest_t dlpistate (lower half of multiplexor)
106 */
107#define	D_SETSTATE(mdp, state) \
108	MAN_DBG(MAN_DLPI, ("dst=0x%p @ %d state %s=>%s\n",	   \
109		    (void *)mdp, __LINE__, dss[mdp->md_dlpistate], \
110		    dss[(state)]));				   \
111		    mdp->md_dlpistate = (state);
112
113static char *promisc[] = {	/* DLPI promisc Strings */
114	"not used",		/* 0x00 */
115	"DL_PROMISC_PHYS",	/* 0x01 */
116	"DL_PROMISC_SAP",	/* 0x02 */
117	"DL_PROMISC_MULTI"	/* 0x03 */
118};
119
120static char *dps[] = {			/* DLPI Primitive Strings */
121	"DL_INFO_REQ",			/* 0x00 */
122	"DL_BIND_REQ",			/* 0x01 */
123	"DL_UNBIND_REQ",		/* 0x02 */
124	"DL_INFO_ACK",			/* 0x03 */
125	"DL_BIND_ACK",			/* 0x04 */
126	"DL_ERROR_ACK",			/* 0x05 */
127	"DL_OK_ACK",			/* 0x06 */
128	"DL_UNITDATA_REQ",		/* 0x07 */
129	"DL_UNITDATA_IND",		/* 0x08 */
130	"DL_UDERROR_IND",		/* 0x09 */
131	"DL_UDQOS_REQ",			/* 0x0a */
132	"DL_ATTACH_REQ",		/* 0x0b */
133	"DL_DETACH_REQ",		/* 0x0c */
134	"DL_CONNECT_REQ",		/* 0x0d */
135	"DL_CONNECT_IND",		/* 0x0e */
136	"DL_CONNECT_RES",		/* 0x0f */
137	"DL_CONNECT_CON",		/* 0x10 */
138	"DL_TOKEN_REQ",			/* 0x11 */
139	"DL_TOKEN_ACK",			/* 0x12 */
140	"DL_DISCONNECT_REQ",		/* 0x13 */
141	"DL_DISCONNECT_IND",		/* 0x14 */
142	"DL_SUBS_UNBIND_REQ",		/* 0x15 */
143	"DL_LIARLIARPANTSONFIRE",	/* 0x16 */
144	"DL_RESET_REQ",			/* 0x17 */
145	"DL_RESET_IND",			/* 0x18 */
146	"DL_RESET_RES",			/* 0x19 */
147	"DL_RESET_CON",			/* 0x1a */
148	"DL_SUBS_BIND_REQ",		/* 0x1b */
149	"DL_SUBS_BIND_ACK",		/* 0x1c */
150	"DL_ENABMULTI_REQ",		/* 0x1d */
151	"DL_DISABMULTI_REQ",		/* 0x1e */
152	"DL_PROMISCON_REQ",		/* 0x1f */
153	"DL_PROMISCOFF_REQ",		/* 0x20 */
154	"DL_DATA_ACK_REQ",		/* 0x21 */
155	"DL_DATA_ACK_IND",		/* 0x22 */
156	"DL_DATA_ACK_STATUS_IND",	/* 0x23 */
157	"DL_REPLY_REQ",			/* 0x24 */
158	"DL_REPLY_IND",			/* 0x25 */
159	"DL_REPLY_STATUS_IND",		/* 0x26 */
160	"DL_REPLY_UPDATE_REQ",		/* 0x27 */
161	"DL_REPLY_UPDATE_STATUS_IND",	/* 0x28 */
162	"DL_XID_REQ",			/* 0x29 */
163	"DL_XID_IND",			/* 0x2a */
164	"DL_XID_RES",			/* 0x2b */
165	"DL_XID_CON",			/* 0x2c */
166	"DL_TEST_REQ",			/* 0x2d */
167	"DL_TEST_IND",			/* 0x2e */
168	"DL_TEST_RES",			/* 0x2f */
169	"DL_TEST_CON",			/* 0x30 */
170	"DL_PHYS_ADDR_REQ",		/* 0x31 */
171	"DL_PHYS_ADDR_ACK",		/* 0x32 */
172	"DL_SET_PHYS_ADDR_REQ",		/* 0x33 */
173	"DL_GET_STATISTICS_REQ",	/* 0x34 */
174	"DL_GET_STATISTICS_ACK",	/* 0x35 */
175};
176
177#define	MAN_DLPI_MAX_PRIM	0x35
178
179static char *dss[] = {			/* DLPI State Strings */
180	"DL_UNBOUND",			/* 0x00	*/
181	"DL_BIND_PENDING",		/* 0x01	*/
182	"DL_UNBIND_PENDING",		/* 0x02	*/
183	"DL_IDLE",			/* 0x03	*/
184	"DL_UNATTACHED",		/* 0x04	*/
185	"DL_ATTACH_PENDING",		/* 0x05	*/
186	"DL_DETACH_PENDING",		/* 0x06	*/
187	"DL_UDQOS_PENDING",		/* 0x07	*/
188	"DL_OUTCON_PENDING",		/* 0x08	*/
189	"DL_INCON_PENDING",		/* 0x09	*/
190	"DL_CONN_RES_PENDING",		/* 0x0a	*/
191	"DL_DATAXFER",			/* 0x0b	*/
192	"DL_USER_RESET_PENDING",	/* 0x0c	*/
193	"DL_PROV_RESET_PENDING",	/* 0x0d	*/
194	"DL_RESET_RES_PENDING",		/* 0x0e	*/
195	"DL_DISCON8_PENDING",		/* 0x0f	*/
196	"DL_DISCON9_PENDING",		/* 0x10	*/
197	"DL_DISCON11_PENDING",		/* 0x11	*/
198	"DL_DISCON12_PENDING",		/* 0x12	*/
199	"DL_DISCON13_PENDING",		/* 0x13	*/
200	"DL_SUBS_BIND_PND",		/* 0x14	*/
201	"DL_SUBS_UNBIND_PND",		/* 0x15	*/
202};
203
204static const char *lss[] = {
205	"UNKNOWN",	/* 0x0 */
206	"INIT",		/* 0x1 */
207	"GOOD",		/* 0x2 */
208	"STALE",	/* 0x3 */
209	"FAIL",		/* 0x4 */
210};
211
212static char *_mw_type[] = {
213	"OPEN_CTL",		/* 0x0 */
214	"CLOSE_CTL",		/* 0x1 */
215	"SWITCH",		/* 0x2 */
216	"PATH_UPDATE",		/* 0x3 */
217	"CLOSE",		/* 0x4 */
218	"CLOSE_STREAM",	/* 0x5 */
219	"DRATTACH",		/* 0x6 */
220	"DRDETACH",		/* 0x7 */
221	"STOP",			/* 0x8 */
222	"DRSWITCH",		/* 0x9 */
223	"KSTAT_UPDATE"		/* 0xA */
224};
225
226uint32_t		man_debug = MAN_WARN;
227
228#define	man_kzalloc(a, b)	man_dbg_kzalloc(__LINE__, a, b)
229#define	man_kfree(a, b)		man_dbg_kfree(__LINE__, a, b)
230void	*man_dbg_kzalloc(int line, size_t size, int kmflags);
231void	man_dbg_kfree(int line, void *buf, size_t size);
232
233#else	/* DEBUG */
234
235uint32_t		man_debug = 0;
236/*
237 * Set manstr_t dlpistate (upper half of multiplexor)
238 */
239#define	SETSTATE(msp, state) msp->ms_dlpistate = (state);
240/*
241 * Set man_dest_t dlpistate (lower half of multiplexor)
242 */
243#define	D_SETSTATE(mdp, state) mdp->md_dlpistate = (state);
244
245#define	man_kzalloc(a, b)	kmem_zalloc(a, b)
246#define	man_kfree(a, b)		kmem_free(a, b)
247
248#endif	/* DEBUG */
249
250#define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
251#define	DL_PROMISCON_TYPE(mp)	\
252		(((union DL_primitives *)(mp)->b_rptr)->promiscon_req.dl_level)
253#define	IOC_CMD(mp)	(((struct iocblk *)(mp)->b_rptr)->ioc_cmd)
254
255/*
256 * Start of kstat-related declarations
257 */
258#define	MK_NOT_COUNTER		(1<<0)	/* is it a counter? */
259#define	MK_ERROR		(1<<2)	/* for error statistics */
260#define	MK_NOT_PHYSICAL		(1<<3)	/* no matching physical stat */
261
262typedef struct man_kstat_info_s {
263	char		*mk_name;	/* e.g. align_errors */
264	char		*mk_physname;	/* e.g. framing (NULL for same) */
265	char		*mk_physalias;	/* e.g. framing (NULL for same) */
266	uchar_t		mk_type;	/* e.g. KSTAT_DATA_UINT32 */
267	int		mk_flags;
268} man_kstat_info_t;
269
270/*
271 * Master declaration macro, note that it uses token pasting
272 */
273#define	MK_DECLARE(name, pname, palias, bits, flags) \
274	{ name,		pname,	palias,	KSTAT_DATA_UINT ## bits, flags }
275
276/*
277 * Obsolete forms don't have the _sinceswitch forms, they are all errors
278 */
279#define	MK_OBSOLETE32(name, alias) MK_DECLARE(alias, name, alias, 32, MK_ERROR)
280#define	MK_OBSOLETE64(name, alias) MK_DECLARE(alias, name, alias, 64, MK_ERROR)
281
282/*
283 * The only non-counters don't have any other aliases
284 */
285#define	MK_NOTCOUNTER32(name) MK_DECLARE(name, name, NULL, 32, MK_NOT_COUNTER)
286#define	MK_NOTCOUNTER64(name) MK_DECLARE(name, name, NULL, 64, MK_NOT_COUNTER)
287
288/*
289 * Normal counter forms
290 */
291#define	MK_DECLARE32(name, alias) \
292	MK_DECLARE(name, name, alias, 32, 0)
293#define	MK_DECLARE64(name, alias) \
294	MK_DECLARE(name, name, alias, 64, 0)
295
296/*
297 * Error counters need special MK_ERROR flag only for the non-AP form
298 */
299#define	MK_ERROR32(name, alias) \
300	MK_DECLARE(name, name, alias, 32, MK_ERROR)
301#define	MK_ERROR64(name, alias) \
302	MK_DECLARE(name, name, alias, 64, MK_ERROR)
303
304/*
305 * These AP-specific stats are not backed by physical statistics
306 */
307#define	MK_NOTPHYS32(name) MK_DECLARE(name, NULL, NULL, 32, MK_NOT_PHYSICAL)
308#define	MK_NOTPHYS64(name) MK_DECLARE(name, NULL, NULL, 64, MK_NOT_PHYSICAL)
309
310/*
311 * START of the actual man_kstat_info declaration using above macros
312 */
313static man_kstat_info_t man_kstat_info[] = {
314	/*
315	 * Link Input/Output stats
316	 */
317	MK_DECLARE32("ipackets", NULL),
318	MK_ERROR32("ierrors", NULL),
319	MK_DECLARE32("opackets", NULL),
320	MK_ERROR32("oerrors", NULL),
321	MK_ERROR32("collisions", NULL),
322	MK_NOTCOUNTER64("ifspeed"),
323	/*
324	 * These are new MIB-II stats, per PSARC 1997/198
325	 */
326	MK_DECLARE32("rbytes", NULL),
327	MK_DECLARE32("obytes", NULL),
328	MK_DECLARE32("multircv", NULL),
329	MK_DECLARE32("multixmt", NULL),
330	MK_DECLARE32("brdcstrcv", NULL),
331	MK_DECLARE32("brdcstxmt", NULL),
332	/*
333	 * Error values
334	 */
335	MK_ERROR32("norcvbuf", NULL),
336	MK_ERROR32("noxmtbuf", NULL),
337	MK_ERROR32("unknowns", NULL),
338	/*
339	 * These are the 64-bit values, they fallback to 32-bit values
340	 */
341	MK_DECLARE64("ipackets64", "ipackets"),
342	MK_DECLARE64("opackets64", "opackets"),
343	MK_DECLARE64("rbytes64", "rbytes"),
344	MK_DECLARE64("obytes64", "obytes"),
345
346	/* New AP switching statistics */
347	MK_NOTPHYS64("man_switches"),
348	MK_NOTPHYS64("man_link_fails"),
349	MK_NOTPHYS64("man_link_stales"),
350	MK_NOTPHYS64("man_icmpv4_probes"),
351	MK_NOTPHYS64("man_icmpv6_probes"),
352
353	MK_ERROR32("align_errors", "framing"),
354	MK_ERROR32("fcs_errors", "crc"),
355	MK_ERROR32("first_collisions", NULL),
356	MK_ERROR32("multi_collisions", NULL),
357	MK_ERROR32("sqe_errors", "sqe"),
358
359	MK_ERROR32("tx_late_collisions", NULL),
360	MK_ERROR32("ex_collisions", "excollisions"),
361	MK_ERROR32("macxmt_errors", NULL),
362	MK_ERROR32("carrier_errors", "nocarrier"),
363	MK_ERROR32("toolong_errors", "buff"),
364	MK_ERROR32("macrcv_errors", NULL),
365
366	MK_OBSOLETE32("framing", "align_errors"),
367	MK_OBSOLETE32("crc", "fcs_errors"),
368	MK_OBSOLETE32("sqe", "sqe_errors"),
369	MK_OBSOLETE32("excollisions", "ex_collisions"),
370	MK_OBSOLETE32("nocarrier", "carrier_errors"),
371	MK_OBSOLETE32("buff", "toolong_errors"),
372};
373
374#define	MAN_NUMSTATS (sizeof (man_kstat_info) / sizeof (man_kstat_info_t))
375
376/*
377 * Miscellaneous ethernet stuff.
378 *
379 * MANs DL_INFO_ACK template.
380 */
381static	dl_info_ack_t man_infoack = {
382	DL_INFO_ACK,				/* dl_primitive */
383	ETHERMTU,				/* dl_max_sdu */
384	0,					/* dl_min_sdu */
385	MAN_ADDRL,				/* dl_addr_length */
386	DL_ETHER,				/* dl_mac_type */
387	0,					/* dl_reserved */
388	0,					/* dl_current_state */
389	-2,					/* dl_sap_length */
390	DL_CLDLS,				/* dl_service_mode */
391	0,					/* dl_qos_length */
392	0,					/* dl_qos_offset */
393	0,					/* dl_range_length */
394	0,					/* dl_range_offset */
395	DL_STYLE2,				/* dl_provider_style */
396	sizeof (dl_info_ack_t),			/* dl_addr_offset */
397	DL_VERSION_2,				/* dl_version */
398	ETHERADDRL,				/* dl_brdcst_addr_length */
399	sizeof (dl_info_ack_t) + MAN_ADDRL,	/* dl_brdcst_addr_offset */
400	0					/* dl_growth */
401};
402
403/*
404 * Ethernet broadcast address definition.
405 */
406static	struct ether_addr	etherbroadcast = {
407	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
408};
409
410static struct ether_addr zero_ether_addr = {
411	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
412};
413
414/*
415 * Set via MAN_SET_SC_IPADDRS ioctl.
416 */
417man_sc_ipaddrs_t	man_sc_ipaddrs = { 0xffffffffU, 0xffffffffU };
418
419/*
420 * Set via MAN_SET_SC_IP6ADDRS ioctl.
421 */
422man_sc_ip6addrs_t	man_sc_ip6addrs = { 0, 0, 0, 0, 0, 0, 0, 0 };
423
424/*
425 * IP & ICMP constants
426 */
427#ifndef	ETHERTYPE_IPV6
428#define	ETHERTYPE_IPV6 0x86DD
429#endif
430
431/*
432 * Function prototypes.
433 *
434 * Upper multiplexor functions.
435 */
436static int	man_attach(dev_info_t *, ddi_attach_cmd_t);
437static int	man_detach(dev_info_t *, ddi_detach_cmd_t);
438static int	man_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439static int	man_open(register queue_t *, dev_t *, int, int, cred_t *);
440static int	man_configure(queue_t *);
441static int	man_deconfigure(void);
442static int	man_init_dests(man_t *, manstr_t *);
443static void	man_start_dest(man_dest_t *, manstr_t *, man_pg_t *);
444static void	man_set_optimized_dest(manstr_t *);
445static int	man_close(queue_t *);
446static void	man_cancel_timers(man_adest_t *);
447static int	man_uwput(queue_t *, mblk_t *);
448static int	man_start(queue_t *, mblk_t *, eaddr_t *);
449static void	man_ioctl(queue_t *, mblk_t *);
450static void	man_set_linkcheck_time(queue_t *, mblk_t *);
451static void	man_setpath(queue_t *, mblk_t *);
452static void	man_geteaddr(queue_t *, mblk_t *);
453static void	man_set_sc_ipaddrs(queue_t *, mblk_t *);
454static void	man_set_sc_ip6addrs(queue_t *, mblk_t *);
455static int	man_get_our_etheraddr(eaddr_t *eap);
456static void	man_nd_getset(queue_t *, mblk_t *);
457static void	man_dl_ioc_hdr_info(queue_t *, mblk_t *);
458static int	man_uwsrv(queue_t *);
459static int	man_proto(queue_t *, mblk_t *);
460static int	man_udreq(queue_t *, mblk_t *);
461static void	man_areq(queue_t *, mblk_t *);
462static mblk_t	*man_alloc_physreq_mp(eaddr_t *);
463static void	man_dreq(queue_t *, mblk_t *);
464static void	man_dodetach(manstr_t *, man_work_t *);
465static void	man_dl_clean(mblk_t **);
466static void	man_breq(queue_t *, mblk_t *);
467static void	man_ubreq(queue_t *, mblk_t *);
468static void	man_ireq(queue_t *, mblk_t *);
469static void	man_ponreq(queue_t *, mblk_t *);
470static void	man_poffreq(queue_t *, mblk_t *);
471static void	man_emreq(queue_t *, mblk_t *);
472static void	man_dmreq(queue_t *, mblk_t *);
473static void	man_pareq(queue_t *, mblk_t *);
474static void	man_spareq(queue_t *, mblk_t *);
475static int	man_dlpi(manstr_t *, mblk_t *);
476static int	man_dlioc(manstr_t *, mblk_t *);
477static int	man_dl_catch(mblk_t **, mblk_t *);
478static void	man_dl_release(mblk_t **, mblk_t *);
479static int	man_match_proto(mblk_t *, mblk_t *);
480static int	man_open_ctl();
481static void	man_close_ctl();
482/*
483 * upper/lower multiplexor functions.
484 */
485static int	man_dlpi_senddown(manstr_t *, mblk_t *);
486static int	man_start_lower(man_dest_t *, mblk_t *, queue_t *, int caller);
487static int	man_lrput(queue_t *, mblk_t *);
488/*
489 * Lower multiplexor functions.
490 */
491static int	man_lwsrv(queue_t *);
492static int	man_lrsrv(queue_t *);
493static void	man_dlpi_replay(man_dest_t *, mblk_t *);
494static int	man_dlioc_replay(man_dest_t *);
495/*
496 * Link failover routines.
497 */
498static int	man_gettimer(int, man_dest_t *);
499static void	man_linkcheck_timer(void *);
500static int	man_needs_linkcheck(man_dest_t *);
501static int	man_do_autoswitch(man_dest_t *);
502static int	man_autoswitch(man_pg_t *, man_dev_t *, man_work_t *);
503static int	man_prep_dests_for_switch(man_pg_t *, man_dest_t **, int *);
504static int	man_str_uses_pg(manstr_t *, man_pg_t *);
505static void	man_do_icmp_bcast(man_dest_t *, t_uscalar_t);
506static mblk_t	*man_alloc_udreq(int, man_dladdr_t *);
507static mblk_t	*man_pinger(t_uscalar_t);
508/*
509 * Functions normally executing outside of the STREAMs perimeter.
510 */
511/*
512 * Functions supporting/processing work requests.
513 */
514static void	man_bwork(void);
515static void	man_iwork(void);		/* inside perimeter */
516void		man_work_add(man_workq_t *, man_work_t *);
517man_work_t	*man_work_alloc(int, int);
518void		man_work_free(man_work_t *);
519/*
520 * Functions implementing/supporting failover.
521 *
522 * Executed inside perimeter.
523 */
524static int	man_do_dr_attach(man_work_t *);
525static int	man_do_dr_switch(man_work_t *);
526static void	man_do_dr_detach(man_work_t *);
527static int	man_iswitch(man_work_t *);
528static void	man_ifail_dest(man_dest_t *);
529static man_dest_t *man_switch_match(man_dest_t *, int, void *);
530static void	man_add_dests(man_pg_t *);
531static void	man_reset_dlpi(void *);
532static mblk_t	*man_dup_mplist(mblk_t *);
533static mblk_t	*man_alloc_ubreq_dreq();
534/*
535 * Executed outside perimeter (us man_lock for synchronization).
536 */
537static void	man_bclose(man_adest_t *);
538static void	man_bswitch(man_adest_t *, man_work_t *);
539static int	man_plumb(man_dest_t *);
540static void	man_unplumb(man_dest_t *);
541static void	man_plink(queue_t *, mblk_t *);
542static void	man_unplink(queue_t *, mblk_t *);
543static void	man_linkrec_insert(man_linkrec_t *);
544static queue_t	*man_linkrec_find(int);
545/*
546 * Functions supporting pathgroups
547 */
548int	man_pg_cmd(mi_path_t *, man_work_t *);
549static int	man_pg_assign(man_pg_t **, mi_path_t *, int);
550static int	man_pg_create(man_pg_t **, man_pg_t **, mi_path_t *);
551static int	man_pg_unassign(man_pg_t **, mi_path_t *);
552static int	man_pg_activate(man_t *, mi_path_t *, man_work_t *);
553static int	man_pg_read(man_pg_t *, mi_path_t *);
554static man_pg_t	*man_find_path_by_dev(man_pg_t *, man_dev_t *, man_path_t **);
555static man_pg_t	*man_find_pg_by_id(man_pg_t *, int);
556static man_path_t	*man_find_path_by_ppa(man_path_t *, int);
557static man_path_t	*man_find_active_path(man_path_t *);
558static man_path_t	*man_find_alternate_path(man_path_t *);
559static void	man_path_remove(man_path_t **, man_path_t *);
560static void	man_path_insert(man_path_t **, man_path_t *);
561static void	man_path_merge(man_path_t **, man_path_t *);
562static int	man_path_kstat_init(man_path_t *);
563static void	man_path_kstat_uninit(man_path_t *);
564/*
565 * Functions supporting kstat reporting.
566 */
567static int	man_kstat_update(kstat_t *, int);
568static void	man_do_kstats(man_work_t *);
569static void	man_update_path_kstats(man_t *);
570static void 	man_update_dev_kstats(kstat_named_t *, man_path_t *);
571static void	man_sum_dests_kstats(kstat_named_t *, man_pg_t *);
572static void	man_kstat_named_init(kstat_named_t *, int);
573static int	man_kstat_byname(kstat_t *, char *, kstat_named_t *);
574static void	man_sum_kstats(kstat_named_t *, kstat_t *, kstat_named_t *);
575/*
576 * Functions supporting ndd.
577 */
578static int	man_param_register(param_t *, int);
579static int	man_pathgroups_report(queue_t *, mblk_t *, caddr_t, cred_t *);
580static void	man_preport(man_path_t *, mblk_t *);
581static int	man_set_active_path(queue_t *, mblk_t *, char *, caddr_t,
582			cred_t *);
583static int	man_get_hostinfo(queue_t *, mblk_t *, caddr_t, cred_t *);
584static char	*man_inet_ntoa(in_addr_t);
585static int	man_param_get(queue_t *, mblk_t *, caddr_t, cred_t *);
586static int	man_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
587static  void    man_param_cleanup(void);
588static  void    man_nd_free(caddr_t *nd_pparam);
589/*
590 * MAN SSC/Domain specific externs.
591 */
592extern int	man_get_iosram(manc_t *);
593extern int	man_domain_configure(void);
594extern int	man_domain_deconfigure(void);
595extern int	man_dossc_switch(uint32_t);
596extern int	man_is_on_domain;
597
598/*
599 * Driver Globals protected by inner perimeter.
600 */
601static manstr_t	*man_strup = NULL;	/* list of MAN STREAMS */
602static caddr_t	man_ndlist = NULL;	/* head of ndd var list */
603void		*man_softstate = NULL;
604
605/*
606 * Driver globals protected by man_lock.
607 */
608kmutex_t		man_lock;		/* lock protecting vars below */
609static kthread_id_t	man_bwork_id = NULL;	/* background thread ID */
610man_workq_t		*man_bwork_q;		/* bgthread work q */
611man_workq_t		*man_iwork_q;		/* inner perim (uwsrv) work q */
612static man_linkrec_t	*man_linkrec_head = NULL;	/* list of linkblks */
613ldi_handle_t		man_ctl_lh = NULL;	/* MAN control handle */
614queue_t			*man_ctl_wq = NULL;	/* MAN control rq */
615static int		man_config_state = MAN_UNCONFIGURED;
616static int		man_config_error = ENODEV;
617
618/*
619 * These parameters are accessed via ndd to report the link configuration
620 * for the MAN driver. They can also be used to force configuration changes.
621 */
622#define	MAN_NOTUSR	0x0f000000
623
624/* ------------------------------------------------------------------------- */
625
626static  param_t	man_param_arr[] = {
627	/* min		max		value		name */
628	{  0,		0xFFFF,		0,		"man_debug_level"},
629};
630
631#define	MAN_NDD_GETABLE	1
632#define	MAN_NDD_SETABLE	2
633
634static  uint32_t	man_param_display[] = {
635/* DISPLAY */
636MAN_NDD_SETABLE,	/* man_debug_level */
637};
638
639/*
640 * STREAMs information.
641 */
642static struct module_info man_m_info = {
643	MAN_IDNUM,			/* mi_idnum */
644	MAN_IDNAME,			/* mi_idname */
645	MAN_MINPSZ,			/* mi_minpsz */
646	MAN_MAXPSZ,			/* mi_maxpsz */
647	MAN_HIWAT,			/* mi_hiwat */
648	MAN_LOWAT			/* mi_lowat */
649};
650
651/*
652 * Upper read queue does not do anything.
653 */
654static struct qinit man_urinit = {
655	NULL,				/* qi_putp */
656	NULL,				/* qi_srvp */
657	man_open,			/* qi_qopen */
658	man_close,			/* qi_qclose */
659	NULL,				/* qi_qadmin */
660	&man_m_info,			/* qi_minfo */
661	NULL				/* qi_mstat */
662};
663
664static struct qinit man_lrinit = {
665	man_lrput,			/* qi_putp */
666	man_lrsrv,			/* qi_srvp */
667	man_open,			/* qi_qopen */
668	man_close,			/* qi_qclose */
669	NULL,				/* qi_qadmin */
670	&man_m_info,			/* qi_minfo */
671	NULL				/* qi_mstat */
672};
673
674static struct qinit man_uwinit = {
675	man_uwput,			/* qi_putp */
676	man_uwsrv,			/* qi_srvp */
677	man_open,			/* qi_qopen */
678	man_close,			/* qi_qclose */
679	NULL,				/* qi_qadmin */
680	&man_m_info,			/* qi_minfo */
681	NULL				/* qi_mstat */
682};
683
684static struct qinit man_lwinit = {
685	NULL,				/* qi_putp */
686	man_lwsrv,			/* qi_srvp */
687	man_open,			/* qi_qopen */
688	man_close,			/* qi_qclose */
689	NULL,				/* qi_qadmin */
690	&man_m_info,			/* qi_minfo */
691	NULL				/* qi_mstat */
692};
693
694static struct streamtab man_maninfo = {
695	&man_urinit,			/* st_rdinit */
696	&man_uwinit,			/* st_wrinit */
697	&man_lrinit,			/* st_muxrinit */
698	&man_lwinit			/* st_muxwrinit */
699};
700
701
702/*
703 * Module linkage information for the kernel.
704 *
705 * Locking Theory:
706 * 	D_MTPERMOD -	Only an inner perimeter: All routines single
707 * 			threaded (except put, see below).
708 *	D_MTPUTSHARED -	Put routines enter inner perimeter shared (not
709 *			exclusive) for concurrency/performance reasons.
710 *
711 *	Anyone who needs exclusive outer perimeter permission (changing
712 *	global data structures) does so via qwriter() calls. The
713 *	background thread does all his work outside of perimeter and
714 *	submits work via qtimeout() when data structures need to be
715 *	modified.
716 */
717
718#define	MAN_MDEV_FLAGS	(D_MP|D_MTPERMOD|D_MTPUTSHARED)
719
720DDI_DEFINE_STREAM_OPS(man_ops, nulldev, nulldev, man_attach,
721    man_detach, nodev, man_info, MAN_MDEV_FLAGS, &man_maninfo,
722    ddi_quiesce_not_supported);
723
724extern int nodev(), nulldev();
725
726static struct modldrv modldrv = {
727	&mod_driverops, 	/* Module type.  This one is a pseudo driver */
728	"MAN MetaDriver",
729	&man_ops,		/* driver ops */
730};
731
732static struct modlinkage modlinkage = {
733	MODREV_1,
734	(void *) &modldrv,
735	NULL
736};
737
738
739/* Virtual Driver loader entry points */
740
741int
742_init(void)
743{
744	int		status = DDI_FAILURE;
745
746	MAN_DBG(MAN_INIT, ("_init:"));
747
748	status = mod_install(&modlinkage);
749	if (status != 0) {
750		cmn_err(CE_WARN, "man_init: mod_install failed"
751		    " error = %d", status);
752		return (status);
753	}
754
755	status = ddi_soft_state_init(&man_softstate, sizeof (man_t), 4);
756	if (status != 0) {
757		cmn_err(CE_WARN, "man_init: ddi_soft_state_init failed"
758		    " error = %d", status);
759		mod_remove(&modlinkage);
760		return (status);
761	}
762
763	man_bwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
764	man_iwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
765
766	mutex_init(&man_lock, NULL, MUTEX_DRIVER, NULL);
767	cv_init(&man_bwork_q->q_cv, NULL, CV_DRIVER, NULL);
768	cv_init(&man_iwork_q->q_cv, NULL, CV_DRIVER, NULL);
769
770	return (0);
771}
772
773/*
774 * _info is called by modinfo().
775 */
776int
777_info(struct modinfo *modinfop)
778{
779	int	status;
780
781	MAN_DBG(MAN_INIT, ("_info:"));
782
783	status = mod_info(&modlinkage, modinfop);
784
785	MAN_DBG(MAN_INIT, ("_info: returns %d", status));
786
787	return (status);
788}
789
790/*
791 * _fini called by modunload() just before driver is unloaded from memory.
792 */
793int
794_fini(void)
795{
796	int status = 0;
797
798	MAN_DBG(MAN_INIT, ("_fini:"));
799
800
801	/*
802	 * The only upper stream left should be man_ctl_lh. Note that
803	 * man_close (upper stream) is synchronous (i.e. it waits for
804	 * all STREAMS framework associated with the upper stream to be
805	 * torn down). This guarantees that man_ctl_lh will never become
806	 * NULL until noone is around to notice. This assumption is made
807	 * in a few places like man_plumb, man_unplumb, etc.
808	 */
809	if (man_strup && (man_strup->ms_next != NULL))
810		return (EBUSY);
811
812	/*
813	 * Deconfigure the driver.
814	 */
815	status = man_deconfigure();
816	if (status)
817		goto exit;
818
819	/*
820	 * need to detach every instance of the driver
821	 */
822	status = mod_remove(&modlinkage);
823	if (status != 0)
824		goto exit;
825
826	ddi_soft_state_fini(&man_softstate);
827
828	/*
829	 * Free up locks.
830	 */
831	mutex_destroy(&man_lock);
832	cv_destroy(&man_bwork_q->q_cv);
833	cv_destroy(&man_iwork_q->q_cv);
834
835	man_kfree(man_bwork_q, sizeof (man_workq_t));
836	man_kfree(man_iwork_q, sizeof (man_workq_t));
837
838exit:
839
840	MAN_DBG(MAN_INIT, ("_fini: returns %d", status));
841
842	return (status);
843}
844
845/*
846 * Deconfigure the MAN driver.
847 */
848static int
849man_deconfigure()
850{
851	man_work_t	*wp;
852	int		status = 0;
853
854	MAN_DBG(MAN_CONFIG, ("man_deconfigure:\n"));
855
856	mutex_enter(&man_lock);
857
858	if (man_is_on_domain) {
859		status = man_domain_deconfigure();
860		if (status != 0)
861			goto exit;
862	}
863
864	man_param_cleanup();	/* Free up NDD resources */
865
866	/*
867	 * I may have to handle straggling work requests. Just qwait?
868	 * or cvwait? Called from _fini - TBD
869	 */
870	ASSERT(man_bwork_q->q_work == NULL);
871	ASSERT(man_iwork_q->q_work == NULL);
872
873	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting CLOSE_CTL\n"));
874
875	if (man_ctl_lh != NULL) {
876		wp = man_work_alloc(MAN_WORK_CLOSE_CTL, KM_SLEEP);
877		wp->mw_flags = MAN_WFLAGS_CVWAITER;
878		man_work_add(man_bwork_q, wp);
879
880		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
881			cv_wait(&wp->mw_cv, &man_lock);
882		}
883		man_work_free(wp);
884	}
885
886	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting STOP\n"));
887	if (man_bwork_id != NULL) {
888
889		wp = man_work_alloc(MAN_WORK_STOP, KM_SLEEP);
890		wp->mw_flags = MAN_WFLAGS_CVWAITER;
891		man_work_add(man_bwork_q, wp);
892
893		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
894			cv_wait(&wp->mw_cv, &man_lock);
895		}
896		man_work_free(wp);
897	}
898	man_config_state = MAN_UNCONFIGURED;
899
900exit:
901	mutex_exit(&man_lock);
902
903	MAN_DBG(MAN_CONFIG, ("man_deconfigure: returns %d\n", status));
904
905	return (status);
906}
907
908/*
909 * man_attach - allocate resources and attach an instance of the MAN driver
910 * The <man>.conf file controls how many instances of the MAN driver are
911 * available.
912 *
913 *	dip - devinfo of node
914 * 	cmd - one of DDI_ATTACH | DDI_RESUME
915 *
916 *	returns	- success - DDI_SUCCESS
917 *		- failure - DDI_FAILURE
918 */
919static int
920man_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
921{
922	man_t		*manp;		/* per instance data */
923	uchar_t		flag = KSTAT_FLAG_WRITABLE; /* support netstat -kc */
924	kstat_t		*ksp;
925	int		minor_node_created = 0;
926	int		instance;
927	eaddr_t		man_eaddr;
928
929	MAN_DBG(MAN_INIT, ("man_attach: \n"));
930
931	if (cmd != DDI_ATTACH) {
932		MAN_DBG(MAN_INIT, ("man_attach: bad command %d\n", cmd));
933		return (DDI_FAILURE);
934	}
935
936	if (man_get_our_etheraddr(&man_eaddr))
937		return (DDI_FAILURE);
938
939	instance = ddi_get_instance(dip);
940
941	/*
942	 * we assume that instance is always equal to zero.
943	 * and there will always only be one instance.
944	 * this is done because when dman opens itself via DMAN_INT_PATH,
945	 * the path assumes that the instance number is zero.
946	 * if we ever need to support multiple instances of the dman
947	 * driver or non-zero instances, this will have to change.
948	 */
949	ASSERT(instance == 0);
950
951	/*
952	 * Allocate per device info pointer and link in to global list of
953	 * MAN devices.
954	 */
955	if ((ddi_soft_state_zalloc(man_softstate, instance) != DDI_SUCCESS) ||
956	    ((manp = ddi_get_soft_state(man_softstate, instance)) == NULL)) {
957		cmn_err(CE_WARN, "man_attach: cannot zalloc soft state!");
958		return (DDI_FAILURE);
959	}
960
961	ddi_set_driver_private(dip, manp);
962	manp->man_dip = dip;
963	manp->man_meta_major = ddi_name_to_major(ddi_get_name(dip));
964	manp->man_meta_ppa = instance;
965
966	/*
967	 * Set ethernet address. Note that this address is duplicated
968	 * at md_src_eaddr.
969	 */
970	ether_copy(&man_eaddr, &manp->man_eaddr);
971	manp->man_eaddr_v = 1;
972
973	MAN_DBG(MAN_INIT, ("man_attach: set ether to %s",
974	    ether_sprintf(&manp->man_eaddr)));
975
976	/*
977	 * Initialize failover-related fields (timers and such),
978	 * taking values from properties if present.
979	 */
980	manp->man_init_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
981	    "init_time", MAN_INIT_TIME);
982
983	manp->man_linkcheck_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
984	    "linkcheck_time", MAN_LINKCHECK_TIME);
985
986	manp->man_linkstale_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
987	    "man_linkstale_time", MAN_LINKSTALE_TIME);
988
989	manp->man_linkstale_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
990	    "man_linkstale_retries", MAN_LINKSTALE_RETRIES);
991
992	manp->man_dr_delay = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
993	    "man_dr_delay", MAN_DR_DELAY);
994
995	manp->man_dr_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
996	    "man_dr_retries", MAN_DR_RETRIES);
997
998	manp->man_kstat_waittime = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
999	    "man_kstat_waittime", MAN_KSTAT_WAITTIME);
1000
1001	manp->man_dlpireset_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
1002	    "man_dlpireset_time", MAN_DLPIRESET_TIME);
1003
1004	if (ddi_create_internal_pathname(dip, MAN_IDNAME, S_IFCHR,
1005	    ddi_get_instance(dip)) == DDI_SUCCESS) {
1006		minor_node_created = 1;
1007	} else {
1008		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1009		    ddi_get_instance(dip));
1010		goto exit;
1011	}
1012
1013	if (ddi_create_minor_node(dip, MAN_IDNAME, S_IFCHR,
1014	    ddi_get_instance(dip), DDI_NT_NET, CLONE_DEV) == DDI_SUCCESS) {
1015		minor_node_created = 1;
1016	} else {
1017		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1018		    ddi_get_instance(dip));
1019		goto exit;
1020	}
1021
1022	/*
1023	 * Allocate meta kstat_t for this instance of the driver.
1024	 * Note that each of man_path_t keeps track of the kstats
1025	 * for the real devices via mp_last_knp.
1026	 */
1027#ifdef	kstat
1028	flag |= KSTAT_FLAG_PERSISTENT;
1029#endif
1030	ksp = kstat_create(MAN_IDNAME, ddi_get_instance(dip), NULL, "net",
1031	    KSTAT_TYPE_NAMED, MAN_NUMSTATS, flag);
1032
1033	if (ksp == NULL) {
1034		cmn_err(CE_WARN, "man_attach(%d): kstat_create failed"
1035		    " - manp(0x%p)", manp->man_meta_ppa,
1036		    (void *)manp);
1037		goto exit;
1038	}
1039
1040	man_kstat_named_init(ksp->ks_data, MAN_NUMSTATS);
1041	ksp->ks_update = man_kstat_update;
1042	ksp->ks_private = (void *) manp;
1043	manp->man_ksp = ksp;
1044	kstat_install(manp->man_ksp);
1045
1046	ddi_report_dev(dip);
1047
1048	MAN_DBG(MAN_INIT, ("man_attach(%d) returns DDI_SUCCESS",
1049	    ddi_get_instance(dip)));
1050
1051	return (DDI_SUCCESS);
1052
1053exit:
1054	if (minor_node_created)
1055		ddi_remove_minor_node(dip, NULL);
1056	ddi_set_driver_private(dip, NULL);
1057	ddi_soft_state_free(man_softstate, instance);
1058
1059	MAN_DBG(MAN_INIT, ("man_attach(%d) eaddr returns DDI_FAILIRE",
1060	    ddi_get_instance(dip)));
1061
1062	return (DDI_FAILURE);
1063
1064}
1065
1066static int
1067man_get_our_etheraddr(eaddr_t *eap)
1068{
1069	manc_t	manc;
1070	int	status = 0;
1071
1072	if (man_is_on_domain) {
1073		if (status = man_get_iosram(&manc))
1074			return (status);
1075		ether_copy(&manc.manc_dom_eaddr, eap);
1076	} else {
1077		(void) localetheraddr((struct ether_addr *)NULL, eap);
1078	}
1079
1080	return (status);
1081}
1082
1083/*
1084 * man_detach - detach an instance of a driver
1085 *
1086 *	dip - devinfo of node
1087 * 	cmd - one of DDI_DETACH | DDI_SUSPEND
1088 *
1089 *	returns	- success - DDI_SUCCESS
1090 *		- failure - DDI_FAILURE
1091 */
1092static int
1093man_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1094{
1095	register man_t	*manp;		/* per instance data */
1096	int		instance;
1097
1098	MAN_DBG(MAN_INIT, ("man_detach(%d):\n", ddi_get_instance(dip)));
1099
1100	if (cmd != DDI_DETACH) {
1101		MAN_DBG(MAN_INIT, ("man_detach: bad command %d\n", cmd));
1102		return (DDI_FAILURE);
1103	}
1104
1105	if (dip == NULL) {
1106		MAN_DBG(MAN_INIT, ("man_detach: dip == NULL\n"));
1107		return (DDI_FAILURE);
1108	}
1109
1110	instance = ddi_get_instance(dip);
1111
1112	mutex_enter(&man_lock);
1113
1114	manp = (man_t *)ddi_get_soft_state(man_softstate, instance);
1115	if (manp == NULL) {
1116		mutex_exit(&man_lock);
1117
1118		cmn_err(CE_WARN, "man_detach: unable to get softstate"
1119		    " for instance = %d, dip = 0x%p!\n", instance,
1120		    (void *)dip);
1121		return (DDI_FAILURE);
1122	}
1123
1124	if (manp->man_refcnt != 0) {
1125		mutex_exit(&man_lock);
1126
1127		cmn_err(CE_WARN, "man_detach: %s%d refcnt %d", MAN_IDNAME,
1128		    instance, manp->man_refcnt);
1129		MAN_DBGCALL(MAN_INIT, man_print_man(manp));
1130
1131		return (DDI_FAILURE);
1132	}
1133
1134	ddi_remove_minor_node(dip, NULL);
1135
1136	mutex_exit(&man_lock);
1137
1138	kstat_delete(manp->man_ksp);
1139	ddi_soft_state_free(man_softstate, instance);
1140	ddi_set_driver_private(dip, NULL);
1141
1142	MAN_DBG(MAN_INIT, ("man_detach returns DDI_SUCCESS"));
1143
1144	return (DDI_SUCCESS);
1145}
1146
1147/*
1148 * man_info:
1149 *	As a standard DLPI style-2, man_info() should always return
1150 *	DDI_FAILURE.
1151 *
1152 *	However, man_open() has special treatment for a direct open
1153 *	via kstr_open() without going through the CLONE driver.
1154 *	To make this special kstr_open() work, we need to map
1155 *	minor of 0 to instance 0.
1156 */
1157/*ARGSUSED*/
1158static int
1159man_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1160{
1161	minor_t minor;
1162
1163	switch (infocmd) {
1164	case DDI_INFO_DEVT2DEVINFO:
1165		break;
1166
1167	case DDI_INFO_DEVT2INSTANCE:
1168		minor = getminor((dev_t)arg);
1169		if (minor == 0) {
1170			*result = (void *)(uintptr_t)minor;
1171			return (DDI_SUCCESS);
1172		}
1173		break;
1174	default:
1175		break;
1176	}
1177	return (DDI_FAILURE);
1178}
1179
1180/* Standard Device Driver entry points */
1181
1182/*
1183 * man_open - open the device
1184 *
1185 *	rq - upper read queue of the stream
1186 *	devp - pointer to a device number
1187 *	flag - information passed from the user program open(2) system call
1188 *	sflag - stream flags
1189 *	credp - pointer to the cred(9S) user credential structure
1190 *
1191 *	returns	- success - 0
1192 *		- failure - errno value for failure
1193 */
1194/*ARGSUSED*/
1195static int
1196man_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
1197{
1198	int			minordev = -1;
1199	manstr_t		*msp;
1200	manstr_t		*tsp;
1201	manstr_t		**prevmsp;
1202	int			status = 0;
1203
1204	MAN_DBG(MAN_OCLOSE, ("man_open: rq(0x%p) sflag(0x%x)\n",
1205	    (void *)rq, sflag));
1206
1207	ASSERT(rq);
1208	ASSERT(sflag != MODOPEN);
1209
1210	/*
1211	 * reopen; q_ptr set to msp at open completion.
1212	 */
1213	if (rq->q_ptr) {
1214		return (0);
1215	}
1216
1217	/*
1218	 * Allocate and initialize manstr_t for this device.
1219	 */
1220	msp = man_kzalloc(sizeof (manstr_t), KM_SLEEP);
1221	SETSTATE(msp, DL_UNATTACHED);
1222	msp->ms_meta_ppa = -1;
1223	msp->ms_rq = rq;
1224	rq->q_ptr = WR(rq)->q_ptr = msp;
1225
1226	/*
1227	 * Get the MAN driver configured on 1st open.  Note that the only way
1228	 * we get sflag != CLONEOPEN is via the call in man_plumbctl().  All
1229	 * CLONEOPEN calls to man_open will be via the file system
1230	 * device node /dev/man, a pseudo clone device.
1231	 */
1232
1233	qprocson(rq);
1234
1235	if (sflag == CLONEOPEN && man_config_state != MAN_CONFIGURED) {
1236		/*
1237		 * First open calls man_configure. Everyone qwaits until
1238		 * we get it open. See man_open_ctl() comments for mutex
1239		 * lock/synchronization info.
1240		 */
1241
1242		mutex_enter(&man_lock);
1243
1244		if (man_config_state == MAN_UNCONFIGURED) {
1245			man_config_state = MAN_CONFIGURING;
1246			mutex_exit(&man_lock);
1247			status = man_configure(rq);
1248			if (status != 0)
1249				goto exit;
1250		} else {
1251			while (man_config_state == MAN_CONFIGURING) {
1252
1253				mutex_exit(&man_lock);
1254				status = qwait_sig(rq);
1255
1256				if (status == 0) {
1257					status = EINTR;
1258					goto exit;
1259				}
1260
1261				mutex_enter(&man_lock);
1262			}
1263			mutex_exit(&man_lock);
1264
1265			if (man_config_error) {
1266				status = man_config_error;
1267				goto exit;
1268			}
1269		}
1270	}
1271
1272	/*
1273	 * Determine minor device number. man_open serialized by
1274	 * D_MTPERMOD.
1275	 */
1276	prevmsp = &man_strup;
1277	if (sflag == CLONEOPEN) {
1278
1279		minordev = 0;
1280		for (; (tsp = *prevmsp) != NULL; prevmsp = &tsp->ms_next) {
1281			if (minordev < tsp->ms_minor)
1282				break;
1283			minordev++;
1284		}
1285		*devp = makedevice(getmajor(*devp), minordev);
1286
1287	} else {
1288		/*
1289		 * Should only get here from man_plumbctl().
1290		 */
1291		/*LINTED E_ASSIGN_UINT_TO_SIGNED_INT*/
1292		minordev = getminor(*devp);
1293
1294		/*
1295		 * No need to protect this here as all opens are
1296		 * qwaiting, and the bgthread (who is doing this open)
1297		 * is the only one who mucks with this variable.
1298		 */
1299		man_ctl_wq = WR(rq);
1300
1301		ASSERT(minordev == 0);	/* TBD delete this */
1302	}
1303
1304	msp->ms_meta_maj = getmajor(*devp);
1305	msp->ms_minor = minordev;
1306	if (minordev == 0)
1307		msp->ms_flags = MAN_SFLAG_CONTROL;
1308
1309	/*
1310	 * Link new entry into global list of active entries.
1311	 */
1312	msp->ms_next = *prevmsp;
1313	*prevmsp = msp;
1314
1315
1316	/*
1317	 * Disable automatic enabling of our write service procedure.
1318	 * We control this explicitly.
1319	 */
1320	noenable(WR(rq));
1321
1322exit:
1323	MAN_DBG(MAN_OCLOSE, ("man_open: exit rq(0x%p) minor %d errno %d\n",
1324	    (void *)rq, minordev, status));
1325
1326	/*
1327	 * Clean up on error.
1328	 */
1329	if (status) {
1330		qprocsoff(rq);
1331		rq->q_ptr = WR(rq)->q_ptr = NULL;
1332		man_kfree((char *)msp, sizeof (manstr_t));
1333	} else
1334		(void) qassociate(rq, -1);
1335
1336	return (status);
1337}
1338
1339/*
1340 * Get the driver configured.  Called from first man_open with exclusive
1341 * inner perimeter.
1342 */
1343static int
1344man_configure(queue_t *rq)
1345{
1346	man_work_t	*wp;
1347	int		status = 0;
1348
1349	MAN_DBG(MAN_CONFIG, ("man_configure:"));
1350
1351	/*
1352	 * Initialize NDD parameters.
1353	 */
1354	if (!man_ndlist &&
1355	    !man_param_register(man_param_arr, A_CNT(man_param_arr))) {
1356		cmn_err(CE_WARN, "man_configure: man_param_register failed!");
1357		man_config_error = ENOMEM;
1358		goto exit;
1359	}
1360
1361	mutex_enter(&man_lock);
1362
1363	/*
1364	 * Start up background thread.
1365	 */
1366	if (man_bwork_id == NULL)
1367		man_bwork_id = thread_create(NULL, 2 * DEFAULTSTKSZ,
1368		    man_bwork, NULL, 0, &p0, TS_RUN, minclsyspri);
1369
1370	/*
1371	 * Submit work to get control stream opened. Qwait until its
1372	 * done. See man_open_ctl for mutex lock/synchronization info.
1373	 */
1374
1375	if (man_ctl_lh == NULL) {
1376		wp = man_work_alloc(MAN_WORK_OPEN_CTL, KM_SLEEP);
1377		wp->mw_flags |= MAN_WFLAGS_QWAITER;
1378		wp->mw_q = WR(rq);
1379
1380		/*
1381		 * Submit work and wait. When man_open_ctl exits
1382		 * man_open, it will cause qwait below to return.
1383		 */
1384		man_work_add(man_bwork_q, wp);
1385		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
1386			mutex_exit(&man_lock);
1387			qwait(rq);
1388			mutex_enter(&man_lock);
1389		}
1390		status = wp->mw_status;
1391		man_work_free(wp);
1392
1393	}
1394	mutex_exit(&man_lock);
1395
1396	/*
1397	 * If on domain, setup IOSRAM and build the pathgroups
1398	 * automatically.
1399	 */
1400	if ((status == 0) && man_is_on_domain)
1401		status = man_domain_configure();
1402
1403exit:
1404	mutex_enter(&man_lock);
1405
1406	man_config_error = status;
1407	if (status != 0)
1408		man_config_state = MAN_UNCONFIGURED;
1409	else
1410		man_config_state = MAN_CONFIGURED;
1411
1412	mutex_exit(&man_lock);
1413
1414	MAN_DBG(MAN_CONFIG, ("man_configure: returns %d\n", status));
1415
1416	return (status);
1417}
1418
1419/*
1420 * man_close - close the device
1421 *
1422 *	rq - upper read queue of the stream
1423 *
1424 *	returns	- success - 0
1425 *		- failure - errno value for failure
1426 */
1427static int
1428man_close(queue_t *rq)
1429{
1430	manstr_t		*close_msp;
1431	manstr_t		*msp;
1432
1433	MAN_DBG(MAN_OCLOSE, ("man_close: rq(0x%p)\n", (void *)rq));
1434
1435	qprocsoff(rq);
1436	close_msp = (manstr_t *)rq->q_ptr;
1437
1438	/*
1439	 * Unlink the per-Stream entry from the active list and free it.
1440	 */
1441	if (close_msp == man_strup)
1442		man_strup = close_msp->ms_next;
1443	else {
1444		for (msp = man_strup; msp && msp->ms_next != close_msp; )
1445			msp = msp->ms_next;
1446
1447		if (msp == NULL) {
1448			cmn_err(CE_WARN, "man_close: no stream!");
1449			return (ENODEV);
1450		}
1451
1452		msp->ms_next = close_msp->ms_next;
1453	}
1454
1455	if (close_msp->ms_dests != NULL) {
1456		/*
1457		 * Still DL_ATTACHED
1458		 */
1459		man_work_t *wp;
1460
1461		wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_SLEEP);
1462		man_dodetach(close_msp, wp);
1463	}
1464
1465	if (close_msp->ms_flags & MAN_SFLAG_CONTROL) {
1466		/*
1467		 * Driver about to unload.
1468		 */
1469		man_ctl_wq = NULL;
1470	}
1471
1472	rq->q_ptr = WR(rq)->q_ptr = NULL;
1473	man_kfree((char *)close_msp, sizeof (manstr_t));
1474	(void) qassociate(rq, -1);
1475
1476	MAN_DBG(MAN_OCLOSE, ("man_close: exit\n"));
1477
1478	return (0);
1479}
1480
1481/*
1482 * Ask bgthread to tear down lower stream and qwait
1483 * until its done.
1484 */
1485static void
1486man_dodetach(manstr_t *msp, man_work_t *wp)
1487{
1488	man_dest_t	*mdp;
1489	int		i;
1490	mblk_t		*mp;
1491
1492	mdp = msp->ms_dests;
1493	msp->ms_dests = NULL;
1494	msp->ms_destp = NULL;
1495
1496	/*
1497	 * Excise lower dests array, set it closing and hand it to
1498	 * background thread to dispose of.
1499	 */
1500	for (i = 0; i < MAN_MAX_DESTS; i++) {
1501
1502		mdp[i].md_state |= MAN_DSTATE_CLOSING;
1503		mdp[i].md_msp = NULL;
1504		mdp[i].md_rq = NULL;
1505
1506		if (mdp[i].md_lc_timer_id != 0) {
1507			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
1508			mdp[i].md_lc_timer_id = 0;
1509		}
1510		if (mdp[i].md_bc_id != 0) {
1511			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
1512			mdp[i].md_bc_id = 0;
1513		}
1514
1515		mutex_enter(&mdp[i].md_lock);
1516		while ((mp = mdp[i].md_dmp_head) != NULL) {
1517			mdp[i].md_dmp_head = mp->b_next;
1518			mp->b_next = NULL;
1519			freemsg(mp);
1520		}
1521		mdp[i].md_dmp_count = 0;
1522		mdp[i].md_dmp_tail = NULL;
1523		mutex_exit(&mdp[i].md_lock);
1524	}
1525
1526	/*
1527	 * Dump any DL type messages previously caught.
1528	 */
1529	man_dl_clean(&msp->ms_dl_mp);
1530	man_dl_clean(&msp->ms_dlioc_mp);
1531
1532	/*
1533	 * We need to clear fast path flag when dlioc messages are cleaned.
1534	 */
1535	msp->ms_flags &= ~MAN_SFLAG_FAST;
1536
1537	/*
1538	 * MAN_WORK_CLOSE_STREAM work request preallocated by caller.
1539	 */
1540	ASSERT(wp->mw_type == MAN_WORK_CLOSE_STREAM);
1541	ASSERT(mdp != NULL);
1542	wp->mw_arg.a_mdp = mdp;
1543	wp->mw_arg.a_ndests = MAN_MAX_DESTS;
1544	wp->mw_arg.a_pg_id = -1;	/* Don't care */
1545
1546	mutex_enter(&man_lock);
1547	man_work_add(man_bwork_q, wp);
1548	msp->ms_manp->man_refcnt--;
1549	mutex_exit(&man_lock);
1550
1551	msp->ms_manp = NULL;
1552
1553}
1554
1555
1556/*
1557 * man_uwput - handle DLPI messages issued from upstream, the write
1558 * side of the upper half of multiplexor. Called with shared access to
1559 * the inner perimeter.
1560 *
1561 *	wq - upper write queue of mxx
1562 *	mp - mblk ptr to DLPI request
1563 */
1564static int
1565man_uwput(register queue_t *wq, register mblk_t *mp)
1566{
1567	register manstr_t	*msp;		/* per stream data */
1568	register man_t		*manp;		/* per instance data */
1569
1570	msp = (manstr_t *)wq->q_ptr;
1571
1572	MAN_DBG(MAN_UWPUT, ("man_uwput: wq(0x%p) mp(0x%p) db_type(0x%x)"
1573	    " msp(0x%p)\n",
1574	    (void *)wq, (void *)mp, DB_TYPE(mp), (void *)msp));
1575#if DEBUG
1576	if (man_debug & MAN_UWPUT) {
1577		if (DB_TYPE(mp) == M_IOCTL) {
1578			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1579			MAN_DBG(MAN_UWPUT,
1580			    ("man_uwput: M_IOCTL ioc_cmd(0x%x)\n",
1581			    iocp->ioc_cmd));
1582		} else if (DB_TYPE(mp) == M_CTL) {
1583			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1584			MAN_DBG(MAN_UWPUT,
1585			    ("man_uwput: M_CTL ioc_cmd(0x%x)\n",
1586			    iocp->ioc_cmd));
1587		}
1588	}
1589#endif	/* DEBUG */
1590
1591
1592	switch (DB_TYPE(mp)) {
1593	case M_DATA:
1594		manp = msp->ms_manp;
1595
1596		if (((msp->ms_flags & (MAN_SFLAG_FAST | MAN_SFLAG_RAW)) == 0) ||
1597		    (msp->ms_dlpistate != DL_IDLE) ||
1598		    (manp == NULL)) {
1599
1600			merror(wq, mp, EPROTO);
1601			break;
1602		}
1603
1604		if (wq->q_first) {
1605			(void) putq(wq, mp);
1606			qenable(wq);
1607		} else {
1608			ehdr_t	*ep = (ehdr_t *)mp->b_rptr;
1609
1610			(void) man_start(wq, mp, &ep->ether_dhost);
1611		}
1612		break;
1613
1614	case M_PROTO:
1615	case M_PCPROTO:
1616		if ((DL_PRIM(mp) == DL_UNITDATA_IND) && !wq->q_first) {
1617			(void) man_udreq(wq, mp);
1618		} else {
1619			(void) putq(wq, mp);
1620			qenable(wq);
1621		}
1622		break;
1623
1624	case M_IOCTL:
1625	case M_IOCDATA:
1626		qwriter(wq, mp, man_ioctl, PERIM_INNER);
1627		break;
1628
1629	case M_CTL:
1630		freemsg(mp);
1631		break;
1632
1633	case M_FLUSH:
1634		MAN_DBG(MAN_UWPUT, ("man_wput: M_FLUSH\n"));
1635		if (*mp->b_rptr & FLUSHW)
1636			flushq(wq, FLUSHDATA);
1637		if (*mp->b_rptr & FLUSHR) {
1638			flushq(RD(wq), FLUSHDATA);
1639			*mp->b_rptr &= ~FLUSHW;
1640			qreply(wq, mp);
1641		} else {
1642			freemsg(mp);
1643		}
1644		break;
1645
1646	default:
1647		MAN_DBG(MAN_WARN,
1648		    ("man_uwput: illegal mblk(0x%p) type(0x%x)\n",
1649		    (void *)mp, DB_TYPE(mp)));
1650		freemsg(mp);
1651		break;
1652	} /* End switch */
1653
1654	MAN_DBG(MAN_UWPUT, ("man_uwput: exit wq(0x%p) mp(0x%p)\n",
1655	    (void *)wq, (void *)mp));
1656
1657	return (0);
1658}
1659
1660/*
1661 * man_start - handle data messages issued from upstream.  Send down
1662 * to particular man_dest based on ether_addr, otherwise send out to all
1663 * valid man_dests.
1664 *
1665 *	wq - upper write queue of mxx
1666 *	mp - mblk ptr to DLPI request
1667 * 	caller - Caller ID for decision making on canput failure
1668 *
1669 * Returns:
1670 *	0	- Data xmitted or No flow control situation detected.
1671 *	1	- Flow control situation detected.
1672 *
1673 * STREAMS Flow Control: can be used if there is only one destination
1674 * for a stream (1 to 1 multiplexor). In this case, we will use the upper
1675 * write queue to store mblks when in flow control. If there are multiple
1676 * destinations, we cannot use the STREAMs based flow control (1 to many
1677 * multiplexor). In this case, we will use the lower write queue to store
1678 * mblks when in flow control. Since destinations come and go, we may
1679 * transition between 1-to-1 and 1-to-m. So it may be the case that we have
1680 * some mblks stored on the upper queue, and some on the lower queue. However,
1681 * we will never send mblks out of order. See man_uwput and man_start_lower().
1682 *
1683 * A simple flow control mechanism is implemented for the deferred mblk list,
1684 * as this list is expected to be used temporarily for a very short
1685 * period required for switching paths. This flow control mechanism is
1686 * used only as a defensive approach to avoid infinite growth of this list.
1687 */
1688static int
1689man_start(register queue_t *wq, register mblk_t *mp, eaddr_t *eap)
1690{
1691	register manstr_t	*msp;		/* per stream data */
1692	register man_dest_t	*mdp = NULL;	/* destination */
1693	mblk_t			*tmp;
1694	int			i;
1695	int			status = 0;
1696
1697	msp = (manstr_t *)wq->q_ptr;
1698
1699	MAN_DBG(MAN_DATA, ("man_start: msp(0x%p) ether_addr(%s)\n",
1700	    (void *)msp, ether_sprintf(eap)));
1701
1702	if (msp->ms_dests == NULL) {
1703		cmn_err(CE_WARN, "man_start: no destinations");
1704		freemsg(mp);
1705		return (0);
1706	}
1707
1708	/*
1709	 * Optimization if only one valid destination.
1710	 */
1711	mdp = msp->ms_destp;
1712
1713	if (IS_UNICAST(eap)) {
1714		queue_t			*flow_wq = NULL;
1715
1716		if (mdp == NULL) {
1717			/*
1718			 * TDB - This needs to be optimized (some bits in
1719			 * ehp->dhost will act as an index.
1720			 */
1721			for (i = 0; i < MAN_MAX_DESTS; i++) {
1722
1723				mdp = &msp->ms_dests[i];
1724
1725				if ((mdp->md_state == MAN_DSTATE_READY) &&
1726				    (ether_cmp(eap, &mdp->md_dst_eaddr) == 0))
1727					break;
1728				mdp = NULL;
1729			}
1730		} else {
1731			/*
1732			 * 1 to 1 multiplexing, use upper wq for flow control.
1733			 */
1734			flow_wq = wq;
1735		}
1736
1737		if (mdp != NULL) {
1738			/*
1739			 * Its going somewhere specific
1740			 */
1741			status =  man_start_lower(mdp, mp, flow_wq, MAN_UPPER);
1742
1743		} else {
1744			MAN_DBG(MAN_DATA, ("man_start: no destination"
1745			    " for eaddr %s\n", ether_sprintf(eap)));
1746			freemsg(mp);
1747		}
1748	} else {
1749		/*
1750		 * Broadcast or multicast - send everone a copy.
1751		 */
1752		if (mdp == NULL) {
1753			for (i = 0; i < MAN_MAX_DESTS; i++) {
1754				mdp = &msp->ms_dests[i];
1755
1756				if (mdp->md_state != MAN_DSTATE_READY)
1757					continue;
1758
1759				if ((tmp = copymsg(mp)) != NULL) {
1760					(void) man_start_lower(mdp, tmp,
1761					    NULL, MAN_UPPER);
1762				} else {
1763					MAN_DBG(MAN_DATA, ("man_start: copymsg"
1764					    " failed!"));
1765				}
1766			}
1767			freemsg(mp);
1768		} else {
1769			if (mdp->md_state == MAN_DSTATE_READY)
1770				status =  man_start_lower(mdp, mp, wq,
1771				    MAN_UPPER);
1772			else
1773				freemsg(mp);
1774		}
1775	}
1776	return (status);
1777}
1778
1779/*
1780 * Send a DL_UNITDATA or M_DATA fastpath data mblk to a particular
1781 * destination. Others mblk types sent down via * man_dlpi_senddown().
1782 *
1783 * Returns:
1784 *	0	- Data xmitted
1785 *	1	- Data not xmitted due to flow control.
1786 */
1787static int
1788man_start_lower(man_dest_t *mdp, mblk_t *mp, queue_t *flow_wq, int caller)
1789{
1790	queue_t		*wq = mdp->md_wq;
1791	int		status = 0;
1792
1793	/*
1794	 * Lower stream ready for data transmit.
1795	 */
1796	if (mdp->md_state == MAN_DSTATE_READY &&
1797	    mdp->md_dlpistate == DL_IDLE) {
1798
1799		ASSERT(mdp->md_wq != NULL);
1800
1801		if (caller == MAN_UPPER) {
1802			/*
1803			 * Check for flow control conditions for lower
1804			 * stream.
1805			 */
1806			if (mdp->md_dmp_head == NULL &&
1807			    wq->q_first == NULL && canputnext(wq)) {
1808
1809				(void) putnext(wq, mp);
1810
1811			} else {
1812				mutex_enter(&mdp->md_lock);
1813				if (mdp->md_dmp_head != NULL) {
1814					/*
1815					 * A simple flow control mechanism.
1816					 */
1817					if (mdp->md_dmp_count >= MAN_HIWAT) {
1818						freemsg(mp);
1819					} else {
1820						/*
1821						 * Add 'mp' to the deferred
1822						 * msg list.
1823						 */
1824						mdp->md_dmp_tail->b_next = mp;
1825						mdp->md_dmp_tail = mp;
1826						mdp->md_dmp_count +=
1827						    msgsize(mp);
1828					}
1829					mutex_exit(&mdp->md_lock);
1830					/*
1831					 * Inform flow control situation
1832					 * to the caller.
1833					 */
1834					status = 1;
1835					qenable(wq);
1836					goto exit;
1837				}
1838				mutex_exit(&mdp->md_lock);
1839				/*
1840				 * If 1 to 1 mux, use upper write queue for
1841				 * flow control.
1842				 */
1843				if (flow_wq != NULL) {
1844					/*
1845					 * putbq() message and indicate
1846					 * flow control situation to the
1847					 * caller.
1848					 */
1849					putbq(flow_wq, mp);
1850					qenable(flow_wq);
1851					status = 1;
1852					goto exit;
1853				}
1854				/*
1855				 * 1 to many mux, use lower write queue for
1856				 * flow control. Be mindful not to overflow
1857				 * the lower MAN STREAM q.
1858				 */
1859				if (canput(wq)) {
1860					(void) putq(wq, mp);
1861					qenable(wq);
1862				} else {
1863					MAN_DBG(MAN_DATA, ("man_start_lower:"
1864					    " lower q flow controlled -"
1865					    " discarding packet"));
1866					freemsg(mp);
1867					goto exit;
1868				}
1869			}
1870
1871		} else {
1872			/*
1873			 * man_lwsrv  is draining flow controlled mblks.
1874			 */
1875			if (canputnext(wq))
1876				(void) putnext(wq, mp);
1877			else
1878				status = 1;
1879		}
1880		goto exit;
1881	}
1882
1883	/*
1884	 * Lower stream in transition, do flow control.
1885	 */
1886	status = 1;
1887
1888	if (mdp->md_state == MAN_DSTATE_NOTPRESENT) {
1889nodest:
1890		cmn_err(CE_WARN,
1891		    "man_start_lower: no dest for mdp(0x%p), caller(%d)!",
1892		    (void *)mdp, caller);
1893		if (caller == MAN_UPPER)
1894			freemsg(mp);
1895		goto exit;
1896	}
1897
1898	if (mdp->md_state & MAN_DSTATE_CLOSING) {
1899		MAN_DBG(MAN_DATA, ("man_start_lower: mdp(0x%p) closing",
1900		    (void *)mdp));
1901		if (caller == MAN_UPPER)
1902			freemsg(mp);
1903		goto exit;
1904	}
1905
1906	if ((mdp->md_state & MAN_DSTATE_PLUMBING) ||
1907	    (mdp->md_state == MAN_DSTATE_INITIALIZING) ||
1908	    (mdp->md_dlpistate != DL_IDLE)) {
1909		/*
1910		 * Defer until PLUMBED and DL_IDLE. See man_lwsrv().
1911		 */
1912		if (caller == MAN_UPPER) {
1913			/*
1914			 * Upper stream sending data down, add to defered mblk
1915			 * list for stream.
1916			 */
1917			mutex_enter(&mdp->md_lock);
1918			if (mdp->md_dmp_count >= MAN_HIWAT) {
1919				freemsg(mp);
1920			} else {
1921				if (mdp->md_dmp_head == NULL) {
1922					ASSERT(mdp->md_dmp_tail == NULL);
1923					mdp->md_dmp_head = mp;
1924					mdp->md_dmp_tail = mp;
1925				} else {
1926					mdp->md_dmp_tail->b_next = mp;
1927					mdp->md_dmp_tail = mp;
1928				}
1929				mdp->md_dmp_count += msgsize(mp);
1930			}
1931			mutex_exit(&mdp->md_lock);
1932		}
1933
1934		goto exit;
1935	}
1936
1937exit:
1938	return (status);
1939}
1940
1941/*
1942 * man_ioctl - handle ioctl requests for this driver (I_PLINK/I_PUNLINK)
1943 * or pass thru to the physical driver below.  Note that most M_IOCTLs we
1944 * care about come down the control msp, but the IOC ones come down the IP.
1945 * Called with exclusive inner perimeter.
1946 *
1947 *	wq - upper write queue of mxx
1948 *	mp - mblk ptr to DLPI ioctl request
1949 */
1950static void
1951man_ioctl(register queue_t *wq, register mblk_t *mp)
1952{
1953	manstr_t		*msp;
1954	struct iocblk		*iocp;
1955
1956	iocp = (struct iocblk *)mp->b_rptr;
1957	msp = (manstr_t *)wq->q_ptr;
1958
1959#ifdef DEBUG
1960	{
1961		char			ioc_cmd[30];
1962
1963		sprintf(ioc_cmd, "not handled IOCTL 0x%x", iocp->ioc_cmd);
1964		MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI),
1965		    ("man_ioctl: wq(0x%p) mp(0x%p) cmd(%s)\n",
1966		    (void *)wq, (void *)mp,
1967		    (iocp->ioc_cmd == I_PLINK) ? "I_PLINK" :
1968		    (iocp->ioc_cmd == I_PUNLINK) ? "I_PUNLINK" :
1969		    (iocp->ioc_cmd == MAN_SETPATH) ? "MAN_SETPATH" :
1970		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
1971		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
1972	}
1973#endif /* DEBUG */
1974
1975
1976	/*
1977	 *  Handle the requests...
1978	 */
1979	switch ((unsigned int)iocp->ioc_cmd) {
1980
1981	case I_PLINK:
1982		man_plink(wq, mp);
1983		break;
1984
1985	case I_PUNLINK:
1986		man_unplink(wq, mp);
1987		break;
1988
1989	case MAN_SETPATH:
1990		man_setpath(wq, mp);
1991		break;
1992
1993	case MAN_GETEADDR:
1994		man_geteaddr(wq, mp);
1995		break;
1996
1997	case MAN_SET_LINKCHECK_TIME:
1998		man_set_linkcheck_time(wq, mp);
1999		break;
2000
2001	case MAN_SET_SC_IPADDRS:
2002		man_set_sc_ipaddrs(wq, mp);
2003		break;
2004
2005	case MAN_SET_SC_IP6ADDRS:
2006		man_set_sc_ip6addrs(wq, mp);
2007		break;
2008
2009	case DLIOCRAW:
2010		if (man_dlioc(msp, mp))
2011			miocnak(wq, mp, 0, ENOMEM);
2012		else {
2013			msp->ms_flags |= MAN_SFLAG_RAW;
2014			miocack(wq, mp, 0, 0);
2015		}
2016		break;
2017
2018	case DL_IOC_HDR_INFO:
2019		man_dl_ioc_hdr_info(wq, mp);
2020		break;
2021
2022	case MAN_ND_GET:
2023	case MAN_ND_SET:
2024		man_nd_getset(wq, mp);
2025		break;
2026
2027	default:
2028		MAN_DBG(MAN_DDI, ("man_ioctl: unknown ioc_cmd %d\n",
2029		    (unsigned int)iocp->ioc_cmd));
2030		miocnak(wq, mp, 0, EINVAL);
2031		break;
2032	}
2033exit:
2034	MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI), ("man_ioctl: exit\n"));
2035
2036}
2037
2038/*
2039 * man_plink: handle I_PLINK requests on the control stream
2040 */
2041void
2042man_plink(queue_t *wq, mblk_t *mp)
2043{
2044	struct linkblk	*linkp;
2045	man_linkrec_t	*lrp;
2046	int		status = 0;
2047
2048	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2049
2050	/*
2051	 * Create a record to hold lower stream info. man_plumb will
2052	 * retrieve it after calling ldi_ioctl(I_PLINK)
2053	 */
2054	lrp = man_kzalloc(sizeof (man_linkrec_t), KM_NOSLEEP);
2055	if (lrp == NULL) {
2056		status = ENOMEM;
2057		goto exit;
2058	}
2059
2060	lrp->l_muxid = linkp->l_index;
2061	lrp->l_wq = linkp->l_qbot;
2062	lrp->l_rq = RD(linkp->l_qbot);
2063
2064	man_linkrec_insert(lrp);
2065
2066exit:
2067	if (status)
2068		miocnak(wq, mp, 0, status);
2069	else
2070		miocack(wq, mp, 0, 0);
2071
2072}
2073
2074/*
2075 * man_unplink - handle I_PUNLINK requests on the control stream
2076 */
2077void
2078man_unplink(queue_t *wq, mblk_t *mp)
2079{
2080	struct linkblk	*linkp;
2081
2082	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2083	RD(linkp->l_qbot)->q_ptr = NULL;
2084	WR(linkp->l_qbot)->q_ptr = NULL;
2085	miocack(wq, mp, 0, 0);
2086}
2087
2088void
2089man_linkrec_insert(man_linkrec_t *lrp)
2090{
2091	mutex_enter(&man_lock);
2092
2093	lrp->l_next = man_linkrec_head;
2094	man_linkrec_head = lrp;
2095
2096	mutex_exit(&man_lock);
2097
2098}
2099
2100static queue_t *
2101man_linkrec_find(int muxid)
2102{
2103	man_linkrec_t	*lpp;
2104	man_linkrec_t	*lp;
2105	queue_t		*wq = NULL;
2106
2107	mutex_enter(&man_lock);
2108
2109	if (man_linkrec_head == NULL)
2110		goto exit;
2111
2112	lp = lpp = man_linkrec_head;
2113	if (lpp->l_muxid == muxid) {
2114		man_linkrec_head = lpp->l_next;
2115	} else {
2116		for (lp = lpp->l_next; lp; lp = lp->l_next) {
2117			if (lp->l_muxid == muxid)
2118				break;
2119			lpp = lp;
2120		}
2121	}
2122
2123	if (lp == NULL)
2124		goto exit;
2125
2126	wq = lp->l_wq;
2127	ASSERT(wq != NULL);
2128
2129	lpp->l_next = lp->l_next;
2130	man_kfree(lp, sizeof (man_linkrec_t));
2131
2132exit:
2133	mutex_exit(&man_lock);
2134
2135	return (wq);
2136}
2137
2138/*
2139 * Set instance linkcheck timer value.
2140 */
2141static void
2142man_set_linkcheck_time(queue_t *wq, mblk_t *mp)
2143{
2144	mi_time_t	*mtp;
2145	int		error;
2146	man_t		*manp;
2147
2148	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: enter"));
2149
2150	error = miocpullup(mp, sizeof (mi_time_t));
2151	if (error != 0)
2152		goto exit;
2153
2154	mtp = (mi_time_t *)mp->b_cont->b_rptr;
2155
2156	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: mtp"));
2157	MAN_DBGCALL(MAN_LINK, man_print_mtp(mtp));
2158
2159	manp = ddi_get_soft_state(man_softstate, mtp->mtp_man_ppa);
2160	if (manp == NULL) {
2161		error = ENODEV;
2162		goto exit;
2163	}
2164
2165	manp->man_linkcheck_time = mtp->mtp_time;
2166exit:
2167	if (error)
2168		miocnak(wq, mp, 0, error);
2169	else
2170		miocack(wq, mp, sizeof (mi_time_t), 0);
2171}
2172
2173/*
2174 * Man path ioctl processing. Should only happen on the SSC. Called
2175 * with exclusive inner perimeter.
2176 */
2177static void
2178man_setpath(queue_t *wq, mblk_t *mp)
2179{
2180	mi_path_t		*mip;
2181	int			error;
2182
2183	error = miocpullup(mp, sizeof (mi_path_t));
2184	if (error != 0)
2185		goto exit;
2186
2187	mip = (mi_path_t *)mp->b_cont->b_rptr;
2188	mutex_enter(&man_lock);
2189	error = man_pg_cmd(mip, NULL);
2190	mutex_exit(&man_lock);
2191
2192exit:
2193	if (error)
2194		miocnak(wq, mp, 0, error);
2195	else
2196		miocack(wq, mp, sizeof (mi_path_t), 0);
2197}
2198
2199/*
2200 * Get the local ethernet address of this machine.
2201 */
2202static void
2203man_geteaddr(queue_t *wq, mblk_t *mp)
2204{
2205	eaddr_t			*eap;
2206	int			error;
2207
2208	error = miocpullup(mp, sizeof (eaddr_t));
2209	if (error != 0) {
2210		miocnak(wq, mp, 0, error);
2211		return;
2212	}
2213
2214	eap = (eaddr_t *)mp->b_cont->b_rptr;
2215	(void) localetheraddr(NULL, eap);
2216	miocack(wq, mp, sizeof (eaddr_t), 0);
2217}
2218
2219/*
2220 * Set my SC and other SC IPv4 addresses for use in man_pinger routine.
2221 */
2222static void
2223man_set_sc_ipaddrs(queue_t *wq, mblk_t *mp)
2224{
2225	int			error;
2226
2227	error = miocpullup(mp, sizeof (man_sc_ipaddrs_t));
2228	if (error != 0)
2229		goto exit;
2230
2231	man_sc_ipaddrs = *(man_sc_ipaddrs_t *)mp->b_cont->b_rptr;
2232
2233#ifdef DEBUG
2234	{
2235		char	buf[INET_ADDRSTRLEN];
2236
2237		(void) inet_ntop(AF_INET,
2238		    (void *) &man_sc_ipaddrs.ip_other_sc_ipaddr,
2239		    buf, INET_ADDRSTRLEN);
2240		MAN_DBG(MAN_CONFIG, ("ip_other_sc_ipaddr = %s", buf));
2241		(void) inet_ntop(AF_INET,
2242		    (void *) &man_sc_ipaddrs.ip_my_sc_ipaddr,
2243		    buf, INET_ADDRSTRLEN);
2244		MAN_DBG(MAN_CONFIG, ("ip_my_sc_ipaddr = %s", buf));
2245	}
2246#endif /* DEBUG */
2247exit:
2248	if (error)
2249		miocnak(wq, mp, 0, error);
2250	else
2251		miocack(wq, mp, sizeof (man_sc_ipaddrs_t), 0);
2252}
2253
2254/*
2255 * Set my SC and other SC IPv6 addresses for use in man_pinger routine.
2256 */
2257static void
2258man_set_sc_ip6addrs(queue_t *wq, mblk_t *mp)
2259{
2260	int			error;
2261
2262	error = miocpullup(mp, sizeof (man_sc_ip6addrs_t));
2263	if (error != 0)
2264		goto exit;
2265
2266	man_sc_ip6addrs = *(man_sc_ip6addrs_t *)mp->b_cont->b_rptr;
2267
2268#ifdef DEBUG
2269	{
2270		char	buf[INET6_ADDRSTRLEN];
2271
2272		(void) inet_ntop(AF_INET6,
2273		    (void *) &man_sc_ip6addrs.ip6_other_sc_ipaddr,
2274		    buf, INET6_ADDRSTRLEN);
2275		MAN_DBG(MAN_CONFIG, ("ip6_other_sc_ipaddr = %s", buf));
2276		(void) inet_ntop(AF_INET6,
2277		    (void *) &man_sc_ip6addrs.ip6_my_sc_ipaddr,
2278		    buf, INET6_ADDRSTRLEN);
2279		MAN_DBG(MAN_CONFIG, ("ip6_my_sc_ipaddr = %s", buf));
2280	}
2281#endif /* DEBUG */
2282exit:
2283	if (error)
2284		miocnak(wq, mp, 0, error);
2285	else
2286		miocack(wq, mp, sizeof (man_sc_ip6addrs_t), 0);
2287}
2288
2289/*
2290 * M_DATA fastpath info request.
2291 */
2292static void
2293man_dl_ioc_hdr_info(queue_t *wq, mblk_t *mp)
2294{
2295	manstr_t		*msp;
2296	man_t			*manp;
2297	mblk_t			*nmp;
2298	man_dladdr_t		*dlap;
2299	dl_unitdata_req_t	*dludp;
2300	struct	ether_header	*headerp;
2301	t_uscalar_t		off, len;
2302	int			status = 0;
2303
2304	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: enter"));
2305
2306	msp = (manstr_t *)wq->q_ptr;
2307	manp = msp->ms_manp;
2308	if (manp == NULL) {
2309		status = EINVAL;
2310		goto exit;
2311	}
2312
2313	status = miocpullup(mp, sizeof (dl_unitdata_req_t) + MAN_ADDRL);
2314	if (status != 0)
2315		goto exit;
2316
2317	/*
2318	 * Sanity check the DL_UNITDATA_REQ destination address
2319	 * offset and length values.
2320	 */
2321	dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
2322	off = dludp->dl_dest_addr_offset;
2323	len = dludp->dl_dest_addr_length;
2324	if (dludp->dl_primitive != DL_UNITDATA_REQ ||
2325	    !MBLKIN(mp->b_cont, off, len) || len != MAN_ADDRL) {
2326		status = EINVAL;
2327		goto exit;
2328	}
2329
2330	dlap = (man_dladdr_t  *)(mp->b_cont->b_rptr + off);
2331
2332	/*
2333	 * Allocate a new mblk to hold the ether header.
2334	 */
2335	if ((nmp = allocb(ETHERHEADER_SIZE, BPRI_MED)) == NULL) {
2336		status = ENOMEM;
2337		goto exit;
2338	}
2339
2340	/* We only need one dl_ioc_hdr mblk for replay */
2341	if (!(msp->ms_flags & MAN_SFLAG_FAST))
2342		status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2343
2344	/* Forward the packet to all lower destinations. */
2345	if ((status != 0) || ((status = man_dlpi_senddown(msp, mp)) != 0)) {
2346		freemsg(nmp);
2347		goto exit;
2348	}
2349
2350	nmp->b_wptr += ETHERHEADER_SIZE;
2351
2352	/*
2353	 * Fill in the ether header.
2354	 */
2355	headerp = (struct ether_header *)nmp->b_rptr;
2356	ether_copy(&dlap->dl_phys, &headerp->ether_dhost);
2357	ether_copy(&manp->man_eaddr, &headerp->ether_shost);
2358	put_ether_type(headerp, dlap->dl_sap);
2359
2360	/*
2361	 * Link new mblk in after the "request" mblks.
2362	 */
2363	linkb(mp, nmp);
2364
2365exit:
2366	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: returns, status = %d",
2367	    status));
2368
2369	if (status) {
2370		miocnak(wq, mp, 0, status);
2371	} else {
2372		msp = (manstr_t *)wq->q_ptr;
2373		msp->ms_flags |= MAN_SFLAG_FAST;
2374		miocack(wq, mp, msgsize(mp->b_cont), 0);
2375	}
2376
2377}
2378
2379/*
2380 * man_uwsrv - Upper write queue service routine to handle deferred
2381 * DLPI messages issued from upstream, the write side of the upper half
2382 * of multiplexor. It is also used by man_bwork to switch the lower
2383 * multiplexor.
2384 *
2385 *	wq - upper write queue of mxx
2386 */
2387static int
2388man_uwsrv(queue_t *wq)
2389{
2390	register mblk_t		*mp;
2391	manstr_t		*msp;		/* per stream data */
2392	man_t			*manp;		/* per instance data */
2393	ehdr_t			*ep;
2394	int			status;
2395
2396	msp = (manstr_t *)wq->q_ptr;
2397
2398	MAN_DBG(MAN_UWSRV, ("man_uwsrv: wq(0x%p) msp", (void *)wq));
2399	MAN_DBGCALL(MAN_UWSRV, man_print_msp(msp));
2400
2401	if (msp == NULL)
2402		goto done;
2403
2404	manp = msp->ms_manp;
2405
2406	while (mp = getq(wq)) {
2407
2408		switch (DB_TYPE(mp)) {
2409		/*
2410		 * Can probably remove this as I never put data messages
2411		 * here.
2412		 */
2413		case M_DATA:
2414			if (manp) {
2415				ep = (ehdr_t *)mp->b_rptr;
2416				status = man_start(wq, mp, &ep->ether_dhost);
2417				if (status) {
2418					/*
2419					 * man_start() indicated flow control
2420					 * situation, stop processing now.
2421					 */
2422					goto break_loop;
2423				}
2424			} else
2425				freemsg(mp);
2426			break;
2427
2428		case M_PROTO:
2429		case M_PCPROTO:
2430			status = man_proto(wq, mp);
2431			if (status) {
2432				/*
2433				 * man_proto() indicated flow control
2434				 * situation detected by man_start(),
2435				 * stop processing now.
2436				 */
2437				goto break_loop;
2438			}
2439			break;
2440
2441		default:
2442			MAN_DBG(MAN_UWSRV, ("man_uwsrv: discarding mp(0x%p)",
2443			    (void *)mp));
2444			freemsg(mp);
2445			break;
2446		}
2447	}
2448
2449break_loop:
2450	/*
2451	 * Check to see if bgthread wants us to do something inside the
2452	 * perimeter.
2453	 */
2454	if ((msp->ms_flags & MAN_SFLAG_CONTROL) &&
2455	    man_iwork_q->q_work != NULL) {
2456
2457		man_iwork();
2458	}
2459
2460done:
2461
2462	MAN_DBG(MAN_UWSRV, ("man_uwsrv: returns"));
2463
2464	return (0);
2465}
2466
2467
2468/*
2469 * man_proto - handle DLPI protocol requests issued from upstream.
2470 * Called by man_uwsrv().  We disassociate upper and lower multiplexor
2471 * DLPI state transitions. The upper stream here (manstr_t) transitions
2472 * appropriately, saves the DLPI requests via man_dlpi(), and then
2473 * arranges for the DLPI request to be sent down via man_dlpi_senddown() if
2474 * appropriate.
2475 *
2476 *	wq - upper write queue of mxx
2477 *	mp - mbl ptr to protocol request
2478 */
2479static int
2480man_proto(queue_t *wq, mblk_t *mp)
2481{
2482	union DL_primitives	*dlp;
2483	int			flow_status = 0;
2484
2485	dlp = (union DL_primitives *)mp->b_rptr;
2486
2487	MAN_DBG((MAN_UWSRV | MAN_DLPI),
2488	    ("man_proto: mp(0x%p) prim(%s)\n", (void *)mp,
2489	    dps[dlp->dl_primitive]));
2490
2491	switch (dlp->dl_primitive) {
2492	case DL_UNITDATA_REQ:
2493		flow_status = man_udreq(wq, mp);
2494		break;
2495
2496	case DL_ATTACH_REQ:
2497		man_areq(wq, mp);
2498		break;
2499
2500	case DL_DETACH_REQ:
2501		man_dreq(wq, mp);
2502		break;
2503
2504	case DL_BIND_REQ:
2505		man_breq(wq, mp);
2506		break;
2507
2508	case DL_UNBIND_REQ:
2509		man_ubreq(wq, mp);
2510		break;
2511
2512	case DL_INFO_REQ:
2513		man_ireq(wq, mp);
2514		break;
2515
2516	case DL_PROMISCON_REQ:
2517		man_ponreq(wq, mp);
2518		break;
2519
2520	case DL_PROMISCOFF_REQ:
2521		man_poffreq(wq, mp);
2522		break;
2523
2524	case DL_ENABMULTI_REQ:
2525		man_emreq(wq, mp);
2526		break;
2527
2528	case DL_DISABMULTI_REQ:
2529		man_dmreq(wq, mp);
2530		break;
2531
2532	case DL_PHYS_ADDR_REQ:
2533		man_pareq(wq, mp);
2534		break;
2535
2536	case DL_SET_PHYS_ADDR_REQ:
2537		man_spareq(wq, mp);
2538		break;
2539
2540	default:
2541		MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: prim(%d)\n",
2542		    dlp->dl_primitive));
2543		dlerrorack(wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
2544		break;
2545
2546	} /* End switch */
2547
2548	MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: exit\n"));
2549	return (flow_status);
2550
2551}
2552
2553static int
2554man_udreq(queue_t *wq, mblk_t *mp)
2555{
2556	manstr_t		*msp;
2557	dl_unitdata_req_t	*dludp;
2558	mblk_t	*nmp;
2559	man_dladdr_t		*dlap;
2560	t_uscalar_t 		off, len;
2561	int 			flow_status = 0;
2562
2563	msp = (manstr_t *)wq->q_ptr;
2564
2565
2566	if (msp->ms_dlpistate != DL_IDLE) {
2567		dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
2568		return (flow_status);
2569	}
2570	dludp = (dl_unitdata_req_t *)mp->b_rptr;
2571	off = dludp->dl_dest_addr_offset;
2572	len = dludp->dl_dest_addr_length;
2573
2574	/*
2575	 * Validate destination address format.
2576	 */
2577	if (!MBLKIN(mp, off, len) || (len != MAN_ADDRL)) {
2578		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADADDR, 0);
2579		return (flow_status);
2580	}
2581
2582	/*
2583	 * Error if no M_DATA follows.
2584	 */
2585	nmp = mp->b_cont;
2586	if (nmp == NULL) {
2587		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADDATA, 0);
2588		return (flow_status);
2589	}
2590
2591	dlap = (man_dladdr_t *)(mp->b_rptr + off);
2592
2593	flow_status = man_start(wq, mp, &dlap->dl_phys);
2594	return (flow_status);
2595}
2596
2597/*
2598 * Handle DL_ATTACH_REQ.
2599 */
2600static void
2601man_areq(queue_t *wq, mblk_t *mp)
2602{
2603	man_t			*manp;	/* per instance data */
2604	manstr_t		*msp;	/* per stream data */
2605	short			ppa;
2606	union DL_primitives	*dlp;
2607	mblk_t			*preq = NULL;
2608	int			did_refcnt = FALSE;
2609	int			dlerror = 0;
2610	int			status = 0;
2611
2612	msp = (manstr_t *)wq->q_ptr;
2613	dlp = (union DL_primitives *)mp->b_rptr;
2614
2615	/*
2616	 * Attach us to MAN PPA (device instance).
2617	 */
2618	if (MBLKL(mp) < DL_ATTACH_REQ_SIZE) {
2619		dlerror = DL_BADPRIM;
2620		goto exit;
2621	}
2622
2623	if (msp->ms_dlpistate != DL_UNATTACHED) {
2624		dlerror = DL_OUTSTATE;
2625		goto exit;
2626	}
2627
2628	ppa = dlp->attach_req.dl_ppa;
2629	if (ppa == -1 || qassociate(wq, ppa) != 0) {
2630		dlerror = DL_BADPPA;
2631		MAN_DBG(MAN_WARN, ("man_areq: bad PPA %d", ppa));
2632		goto exit;
2633	}
2634
2635	mutex_enter(&man_lock);
2636	manp = ddi_get_soft_state(man_softstate, ppa);
2637	ASSERT(manp != NULL);	/* qassociate() succeeded */
2638
2639	manp->man_refcnt++;
2640	did_refcnt = TRUE;
2641	mutex_exit(&man_lock);
2642
2643	/*
2644	 * Create a DL replay list for the lower stream. These wont
2645	 * actually be sent down until the lower streams are made active
2646	 * (sometime after the call to man_init_dests below).
2647	 */
2648	preq = man_alloc_physreq_mp(&manp->man_eaddr);
2649	if (preq == NULL) {
2650		dlerror = DL_SYSERR;
2651		status = ENOMEM;
2652		goto exit;
2653	}
2654
2655	/*
2656	 * Make copy for dlpi resync of upper and lower streams.
2657	 */
2658	if (man_dlpi(msp, mp)) {
2659		dlerror = DL_SYSERR;
2660		status = ENOMEM;
2661		goto exit;
2662	}
2663
2664	/* TBD - need to clean off ATTACH req on failure here. */
2665	if (man_dlpi(msp, preq)) {
2666		dlerror = DL_SYSERR;
2667		status = ENOMEM;
2668		goto exit;
2669	}
2670
2671	/*
2672	 * man_init_dests/man_start_dest needs these set before call.
2673	 */
2674	msp->ms_manp = manp;
2675	msp->ms_meta_ppa = ppa;
2676
2677	/*
2678	 *  Allocate and init lower destination structures.
2679	 */
2680	ASSERT(msp->ms_dests == NULL);
2681	if (man_init_dests(manp, msp)) {
2682		mblk_t	 *tmp;
2683
2684		/*
2685		 * If we cant get the lower streams ready, then
2686		 * remove the messages from the DL replay list and
2687		 * fail attach.
2688		 */
2689		while ((tmp = msp->ms_dl_mp) != NULL) {
2690			msp->ms_dl_mp = msp->ms_dl_mp->b_next;
2691			tmp->b_next = tmp->b_prev = NULL;
2692			freemsg(tmp);
2693		}
2694
2695		msp->ms_manp = NULL;
2696		msp->ms_meta_ppa = -1;
2697
2698		dlerror = DL_SYSERR;
2699		status = ENOMEM;
2700		goto exit;
2701	}
2702
2703	MAN_DBG(MAN_DLPI, ("man_areq: ppa 0x%x man_refcnt: %d\n",
2704	    ppa, manp->man_refcnt));
2705
2706	SETSTATE(msp, DL_UNBOUND);
2707
2708exit:
2709	if (dlerror == 0) {
2710		dlokack(wq, mp, DL_ATTACH_REQ);
2711	} else {
2712		if (did_refcnt) {
2713			mutex_enter(&man_lock);
2714			manp->man_refcnt--;
2715			mutex_exit(&man_lock);
2716		}
2717		dlerrorack(wq, mp, DL_ATTACH_REQ, dlerror, status);
2718		(void) qassociate(wq, -1);
2719	}
2720	if (preq != NULL)
2721		freemsg(preq);
2722
2723}
2724
2725/*
2726 * Called at DL_ATTACH time.
2727 * Man_lock is held to protect pathgroup list(man_pg).
2728 */
2729static int
2730man_init_dests(man_t *manp, manstr_t *msp)
2731{
2732	man_dest_t	*mdp;
2733	man_pg_t	*mpg;
2734	int		i;
2735
2736	mdp = man_kzalloc(MAN_DEST_ARRAY_SIZE, KM_NOSLEEP);
2737	if (mdp == NULL)
2738		return (ENOMEM);
2739
2740	msp->ms_dests = mdp;
2741
2742	mutex_enter(&man_lock);
2743	for (i = 0; i < MAN_MAX_DESTS; i++) {
2744
2745		mdp[i].md_muxid = -1;	/* muxid 0 is valid */
2746		mutex_init(&mdp->md_lock, NULL, MUTEX_DRIVER, NULL);
2747
2748		mpg = man_find_pg_by_id(manp->man_pg, i);
2749
2750		if (mpg && man_find_active_path(mpg->mpg_pathp))
2751			man_start_dest(&mdp[i], msp, mpg);
2752	}
2753	mutex_exit(&man_lock);
2754
2755	return (0);
2756}
2757
2758/*
2759 * Get a destination ready for use.
2760 */
2761static void
2762man_start_dest(man_dest_t *mdp, manstr_t *msp, man_pg_t *mpg)
2763{
2764	man_path_t	*ap;
2765
2766	mdp->md_muxid = -1;
2767	mdp->md_dlpistate = DL_UNATTACHED;
2768	mdp->md_msp = msp;
2769	mdp->md_rq = msp->ms_rq;
2770	mdp->md_pg_id = mpg->mpg_pg_id;
2771
2772	ASSERT(msp->ms_manp);
2773
2774	ether_copy(&msp->ms_manp->man_eaddr, &mdp->md_src_eaddr);
2775	ether_copy(&mpg->mpg_dst_eaddr, &mdp->md_dst_eaddr);
2776
2777	ap = man_find_active_path(mpg->mpg_pathp);
2778	ASSERT(ap);
2779	mdp->md_device = ap->mp_device;
2780
2781	/*
2782	 * Set up linktimers so that first time through, we will do
2783	 * a failover.
2784	 */
2785	mdp->md_linkstate = MAN_LINKFAIL;
2786	mdp->md_state = MAN_DSTATE_INITIALIZING;
2787	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
2788	    (void *)mdp, man_gettimer(MAN_TIMER_INIT, mdp));
2789
2790	/*
2791	 * As an optimization, if there is only one destination,
2792	 * remember the destination pointer. Used by man_start().
2793	 */
2794	man_set_optimized_dest(msp);
2795
2796	MAN_DBG(MAN_DEST, ("man_start_dest: mdp"));
2797	MAN_DBGCALL(MAN_DEST, man_print_mdp(mdp));
2798}
2799
2800static void
2801man_set_optimized_dest(manstr_t *msp)
2802{
2803	int		count = 0;
2804	int		i;
2805	man_dest_t	*mdp = NULL;
2806
2807	for (i = 0; i < MAN_MAX_DESTS; i++) {
2808		if (msp->ms_dests[i].md_msp != NULL) {
2809			count++;
2810			mdp = &msp->ms_dests[i];
2811		}
2812	}
2813
2814	if (count == 1)
2815		msp->ms_destp = mdp;
2816	else
2817		msp->ms_destp = NULL;
2818
2819}
2820
2821/*
2822 * Catch dlpi message for replaying, and arrange to send it down
2823 * to any destinations not PLUMBING. See man_dlpi_replay().
2824 */
2825static int
2826man_dlpi(manstr_t *msp, mblk_t *mp)
2827{
2828	int	status;
2829
2830	status = man_dl_catch(&msp->ms_dl_mp, mp);
2831	if (status == 0)
2832		status = man_dlpi_senddown(msp, mp);
2833
2834	return (status);
2835}
2836
2837/*
2838 * Catch IOCTL type DL_ messages.
2839 */
2840static int
2841man_dlioc(manstr_t *msp, mblk_t *mp)
2842{
2843	int status;
2844
2845	status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2846	if (status == 0)
2847		status = man_dlpi_senddown(msp, mp);
2848
2849	return (status);
2850}
2851
2852/*
2853 * We catch all DLPI messages that we have to resend to a new AP'ed
2854 * device to put him in the right state.  We link these messages together
2855 * w/ their b_next fields and hang it off of msp->ms_dl_mp.  We
2856 * must be careful to restore b_next fields before doing dupmsg/freemsg!
2857 *
2858 *	msp - pointer of stream struct to process
2859 *	mblk - pointer to DLPI request to catch
2860 */
2861static int
2862man_dl_catch(mblk_t **mplist, mblk_t *mp)
2863{
2864	mblk_t			*dupmp;
2865	mblk_t			*tmp;
2866	unsigned		prim;
2867	int			status = 0;
2868
2869	dupmp = copymsg(mp);
2870	if (dupmp == NULL) {
2871		status = ENOMEM;
2872		goto exit;
2873	}
2874
2875
2876	if (*mplist == NULL)
2877		*mplist = dupmp;
2878	else {
2879		for (tmp = *mplist; tmp->b_next; )
2880			tmp = tmp->b_next;
2881
2882		tmp->b_next = dupmp;
2883	}
2884
2885	prim = DL_PRIM(mp);
2886	MAN_DBG(MAN_DLPI,
2887	    ("man_dl_catch: adding %s\n",
2888	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
2889	    (prim == DLIOCRAW) ? "DLIOCRAW" :
2890	    (prim == DL_PROMISCON_REQ) ? promisc[DL_PROMISCON_TYPE(mp)] :
2891	    dps[prim]));
2892
2893exit:
2894
2895	return (status);
2896}
2897
2898/*
2899 * Send down a single DLPI M_[PC]PROTO to all currently valid dests.
2900 *
2901 *	msp - ptr to NDM stream structure DL_ messages was received on.
2902 *	mp - ptr to mblk containing DL_ request.
2903 */
2904static int
2905man_dlpi_senddown(manstr_t *msp, mblk_t *mp)
2906{
2907	man_dest_t	*mdp;
2908	int		i;
2909	mblk_t		*rmp[MAN_MAX_DESTS];	/* Copy to replay */
2910	int		dstate[MAN_MAX_DESTS];
2911	int		no_dests = TRUE;
2912	int		status = 0;
2913
2914	if (msp->ms_dests == NULL)
2915		goto exit;
2916
2917	for (i = 0; i < MAN_MAX_DESTS; i++) {
2918		mdp = &msp->ms_dests[i];
2919		if (mdp->md_state == MAN_DSTATE_READY) {
2920			dstate[i] = TRUE;
2921			no_dests = FALSE;
2922		} else {
2923			dstate[i] = FALSE;
2924		}
2925		rmp[i] = NULL;
2926	}
2927
2928	if (no_dests)
2929		goto exit;
2930
2931	/*
2932	 * Build replay and duplicate list for all possible destinations.
2933	 */
2934	for (i = 0; i < MAN_MAX_DESTS; i++) {
2935		if (dstate[i]) {
2936			rmp[i] = copymsg(mp);
2937			if (rmp[i] == NULL) {
2938				status = ENOMEM;
2939				break;
2940			}
2941		}
2942	}
2943
2944	if (status == 0) {
2945		for (i = 0; i < MAN_MAX_DESTS; i++)
2946			if (dstate[i]) {
2947				mdp = &msp->ms_dests[i];
2948
2949				ASSERT(mdp->md_wq != NULL);
2950				ASSERT(mp->b_next == NULL);
2951				ASSERT(mp->b_prev == NULL);
2952
2953				man_dlpi_replay(mdp, rmp[i]);
2954			}
2955	} else {
2956		for (; i >= 0; i--)
2957			if (dstate[i] && rmp[i])
2958				freemsg(rmp[i]);
2959	}
2960
2961exit:
2962	return (status);
2963}
2964
2965/*
2966 * man_dlpi_replay - traverse the list of DLPI requests and reapply them to
2967 * get the upper and lower streams into the same state. Called holding inner
2968 * perimeter lock exclusive. Note thet we defer M_IOCTL type dlpi messages
2969 * until we get an OK_ACK to our ATTACH (see man_lrsrv and
2970 * man_dlioc_replay).
2971 *
2972 * 	mdp - pointer to lower queue (destination)
2973 *	rmp - list of mblks to send down stream.
2974 */
2975static void
2976man_dlpi_replay(man_dest_t *mdp, mblk_t *rmp)
2977{
2978	mblk_t			*mp;
2979	union DL_primitives	*dlp = NULL;
2980
2981	MAN_DBG(MAN_DLPI, ("man_dlpi_replay: mdp(0x%p)", (void *)mdp));
2982
2983	while (rmp) {
2984		mp = rmp;
2985		rmp = rmp->b_next;
2986		mp->b_prev = mp->b_next = NULL;
2987
2988		dlp = (union DL_primitives *)mp->b_rptr;
2989		MAN_DBG(MAN_DLPI,
2990		    ("man_dlpi_replay: mdp(0x%p) sending %s\n",
2991		    (void *)mdp,
2992		    (dlp->dl_primitive == DL_IOC_HDR_INFO) ?
2993		    "DL_IOC_HDR_INFO" : (dlp->dl_primitive == DLIOCRAW) ?
2994		    "DLIOCRAW" : dps[(unsigned)(dlp->dl_primitive)]));
2995
2996		if (dlp->dl_primitive == DL_ATTACH_REQ) {
2997			/*
2998			 * insert the lower devices ppa.
2999			 */
3000			dlp->attach_req.dl_ppa = mdp->md_device.mdev_ppa;
3001		}
3002
3003		(void) putnext(mdp->md_wq, mp);
3004	}
3005
3006}
3007
3008static void
3009man_dreq(queue_t *wq, mblk_t *mp)
3010{
3011	manstr_t	*msp;	/* per stream data */
3012	man_work_t	*wp;
3013
3014	msp = (manstr_t *)wq->q_ptr;
3015
3016	if (MBLKL(mp) < DL_DETACH_REQ_SIZE) {
3017		dlerrorack(wq, mp, DL_DETACH_REQ, DL_BADPRIM, 0);
3018		return;
3019	}
3020
3021	if (msp->ms_dlpistate != DL_UNBOUND) {
3022		dlerrorack(wq, mp, DL_DETACH_REQ, DL_OUTSTATE, 0);
3023		return;
3024	}
3025
3026	ASSERT(msp->ms_dests != NULL);
3027
3028	wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_NOSLEEP);
3029	if (wp == NULL) {
3030		dlerrorack(wq, mp, DL_DETACH_REQ, DL_SYSERR, ENOMEM);
3031		return;
3032	}
3033	man_dodetach(msp, wp);
3034	(void) qassociate(wq, -1);
3035
3036	SETSTATE(msp, DL_UNATTACHED);
3037
3038	dlokack(wq, mp, DL_DETACH_REQ);
3039}
3040
3041static void
3042man_dl_clean(mblk_t **mplist)
3043{
3044	mblk_t	*tmp;
3045
3046	/*
3047	 * Toss everything.
3048	 */
3049	while ((tmp = *mplist) != NULL) {
3050		*mplist = (*mplist)->b_next;
3051		tmp->b_next = tmp->b_prev = NULL;
3052		freemsg(tmp);
3053	}
3054
3055}
3056
3057/*
3058 * man_dl_release - Remove the corresponding DLPI request from the
3059 * catch list. Walk thru the catch list looking for the other half of
3060 * the pair and delete it.  If we are detaching, delete the entire list.
3061 *
3062 *	msp - pointer of stream struct to process
3063 *	mp  - pointer to mblk to first half of pair.  We will delete other
3064 * 		half of pair based on this.
3065 */
3066static void
3067man_dl_release(mblk_t **mplist, mblk_t *mp)
3068{
3069	uchar_t			match_dbtype;
3070	mblk_t			*tmp;
3071	mblk_t			*tmpp;
3072	int			matched = FALSE;
3073
3074	if (*mplist == NULL)
3075		goto exit;
3076
3077	match_dbtype = DB_TYPE(mp);
3078
3079	/*
3080	 * Currently we only clean DL_ PROTO type messages. There is
3081	 * no way to turn off M_CTL or DL_IOC stuff other than sending
3082	 * down a DL_DETACH, which resets everything.
3083	 */
3084	if (match_dbtype != M_PROTO && match_dbtype != M_PCPROTO) {
3085		goto exit;
3086	}
3087
3088	/*
3089	 * Selectively find a caught mblk that matches this one and
3090	 * remove it from the list
3091	 */
3092	tmp = tmpp = *mplist;
3093	matched = man_match_proto(mp, tmp);
3094	if (matched) {
3095		*mplist = tmp->b_next;
3096		tmp->b_next = tmp->b_prev = NULL;
3097	} else {
3098		for (tmp = tmp->b_next; tmp != NULL; tmp = tmp->b_next) {
3099			if (matched = man_match_proto(mp, tmp))
3100				break;
3101			tmpp = tmp;
3102		}
3103
3104		if (matched) {
3105			tmpp->b_next = tmp->b_next;
3106			tmp->b_next = tmp->b_prev = NULL;
3107		}
3108	}
3109
3110exit:
3111	if (matched) {
3112
3113		MAN_DBG(MAN_DLPI, ("man_dl_release: release %s",
3114		    (DL_PRIM(mp) == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3115		    (DL_PRIM(mp) == DLIOCRAW) ? "DLIOCRAW" :
3116		    dps[(int)DL_PRIM(mp)]));
3117
3118		freemsg(tmp);
3119	}
3120	MAN_DBG(MAN_DLPI, ("man_dl_release: returns"));
3121
3122}
3123
3124/*
3125 * Compare two DL_ messages. If they are complimentary (e.g. DL_UNBIND
3126 * compliments DL_BIND), return true.
3127 */
3128static int
3129man_match_proto(mblk_t *mp1, mblk_t *mp2)
3130{
3131	t_uscalar_t	prim1;
3132	t_uscalar_t	prim2;
3133	int		matched = FALSE;
3134
3135	/*
3136	 * Primitive to clean off list.
3137	 */
3138	prim1 = DL_PRIM(mp1);
3139	prim2 = DL_PRIM(mp2);
3140
3141	switch (prim1) {
3142	case DL_UNBIND_REQ:
3143		if (prim2 == DL_BIND_REQ)
3144			matched = TRUE;
3145		break;
3146
3147	case DL_PROMISCOFF_REQ:
3148		if (prim2 == DL_PROMISCON_REQ) {
3149			dl_promiscoff_req_t	*poff1;
3150			dl_promiscoff_req_t	*poff2;
3151
3152			poff1 = (dl_promiscoff_req_t *)mp1->b_rptr;
3153			poff2 = (dl_promiscoff_req_t *)mp2->b_rptr;
3154
3155			if (poff1->dl_level == poff2->dl_level)
3156				matched = TRUE;
3157		}
3158		break;
3159
3160	case DL_DISABMULTI_REQ:
3161		if (prim2 == DL_ENABMULTI_REQ) {
3162			union DL_primitives	*dlp;
3163			t_uscalar_t		off;
3164			eaddr_t			*addrp1;
3165			eaddr_t			*addrp2;
3166
3167			dlp = (union DL_primitives *)mp1->b_rptr;
3168			off = dlp->disabmulti_req.dl_addr_offset;
3169			addrp1 = (eaddr_t *)(mp1->b_rptr + off);
3170
3171			dlp = (union DL_primitives *)mp2->b_rptr;
3172			off = dlp->disabmulti_req.dl_addr_offset;
3173			addrp2 = (eaddr_t *)(mp2->b_rptr + off);
3174
3175			if (ether_cmp(addrp1, addrp2) == 0)
3176				matched = 1;
3177		}
3178		break;
3179
3180	default:
3181		break;
3182	}
3183
3184	MAN_DBG(MAN_DLPI, ("man_match_proto returns %d", matched));
3185
3186	return (matched);
3187}
3188
3189/*
3190 * Bind upper stream to a particular SAP. Called with exclusive innerperim
3191 * QPAIR, shared outerperim.
3192 */
3193static void
3194man_breq(queue_t *wq, mblk_t *mp)
3195{
3196	man_t			*manp;	/* per instance data */
3197	manstr_t		*msp;	/* per stream data */
3198	union DL_primitives	*dlp;
3199	man_dladdr_t		man_addr;
3200	t_uscalar_t		sap;
3201	t_uscalar_t		xidtest;
3202
3203	msp = (manstr_t *)wq->q_ptr;
3204
3205	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
3206		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADPRIM, 0);
3207		return;
3208	}
3209
3210	if (msp->ms_dlpistate != DL_UNBOUND) {
3211		dlerrorack(wq, mp, DL_BIND_REQ, DL_OUTSTATE, 0);
3212		return;
3213	}
3214
3215	dlp = (union DL_primitives *)mp->b_rptr;
3216	manp = msp->ms_manp;			/* valid after attach */
3217	sap = dlp->bind_req.dl_sap;
3218	xidtest = dlp->bind_req.dl_xidtest_flg;
3219
3220	ASSERT(manp);
3221
3222	if (xidtest) {
3223		dlerrorack(wq, mp, DL_BIND_REQ, DL_NOAUTO, 0);
3224		return;
3225	}
3226
3227	if (sap > ETHERTYPE_MAX) {
3228		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADSAP, 0);
3229		return;
3230	}
3231
3232	if (man_dlpi(msp, mp)) {
3233		dlerrorack(wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
3234		return;
3235	}
3236
3237	msp->ms_sap = sap;
3238
3239	SETSTATE(msp, DL_IDLE);
3240
3241	man_addr.dl_sap = msp->ms_sap;
3242	ether_copy(&msp->ms_manp->man_eaddr, &man_addr.dl_phys);
3243
3244	dlbindack(wq, mp, msp->ms_sap, &man_addr, MAN_ADDRL, 0, 0);
3245
3246}
3247
3248static void
3249man_ubreq(queue_t *wq, mblk_t *mp)
3250{
3251	manstr_t		*msp;	/* per stream data */
3252
3253	msp = (manstr_t *)wq->q_ptr;
3254
3255	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
3256		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
3257		return;
3258	}
3259
3260	if (msp->ms_dlpistate != DL_IDLE) {
3261		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
3262		return;
3263	}
3264
3265	if (man_dlpi_senddown(msp, mp)) {
3266		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
3267		return;
3268	}
3269
3270	man_dl_release(&msp->ms_dl_mp, mp);
3271
3272	SETSTATE(msp, DL_UNBOUND);
3273
3274	dlokack(wq, mp, DL_UNBIND_REQ);
3275
3276}
3277
3278static void
3279man_ireq(queue_t *wq, mblk_t *mp)
3280{
3281	manstr_t	*msp;
3282	dl_info_ack_t	*dlip;
3283	man_dladdr_t	*dlap;
3284	eaddr_t		*ep;
3285	size_t	size;
3286
3287	msp = (manstr_t *)wq->q_ptr;
3288
3289	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
3290		dlerrorack(wq, mp, DL_INFO_REQ, DL_BADPRIM, 0);
3291		return;
3292	}
3293
3294	/* Exchange current msg for a DL_INFO_ACK. */
3295	size = sizeof (dl_info_ack_t) + MAN_ADDRL + ETHERADDRL;
3296	mp = mexchange(wq, mp, size, M_PCPROTO, DL_INFO_ACK);
3297	if (mp == NULL) {
3298		MAN_DBG(MAN_DLPI, ("man_ireq: man_ireq: mp == NULL."));
3299		return;
3300	}
3301
3302	/* Fill in the DL_INFO_ACK fields and reply. */
3303	dlip = (dl_info_ack_t *)mp->b_rptr;
3304	*dlip = man_infoack;
3305	dlip->dl_current_state = msp->ms_dlpistate;
3306	dlap = (man_dladdr_t *)(mp->b_rptr + dlip->dl_addr_offset);
3307	dlap->dl_sap = msp->ms_sap;
3308
3309	/*
3310	 * If attached, return physical address.
3311	 */
3312	if (msp->ms_manp != NULL) {
3313		ether_copy(&msp->ms_manp->man_eaddr, &dlap->dl_phys);
3314	} else {
3315		bzero((caddr_t)&dlap->dl_phys, ETHERADDRL);
3316	}
3317
3318	ep = (struct ether_addr *)(mp->b_rptr + dlip->dl_brdcst_addr_offset);
3319	ether_copy(&etherbroadcast, ep);
3320
3321	qreply(wq, mp);
3322
3323}
3324
3325
3326static void
3327man_ponreq(queue_t *wq, mblk_t *mp)
3328{
3329	manstr_t	*msp;
3330	int		flag;
3331
3332	msp = (manstr_t *)wq->q_ptr;
3333
3334	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
3335		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
3336		return;
3337	}
3338
3339	switch (((dl_promiscon_req_t *)mp->b_rptr)->dl_level) {
3340	case DL_PROMISC_PHYS:
3341		flag = MAN_SFLAG_ALLPHYS;
3342		break;
3343
3344	case DL_PROMISC_SAP:
3345		flag = MAN_SFLAG_ALLSAP;
3346		break;
3347
3348	case DL_PROMISC_MULTI:
3349		flag = MAN_SFLAG_ALLMULTI;
3350		break;
3351
3352	default:
3353		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_NOTSUPPORTED, 0);
3354		return;
3355	}
3356
3357	/*
3358	 * Catch request for replay, and forward down to any lower
3359	 * lower stream.
3360	 */
3361	if (man_dlpi(msp, mp)) {
3362		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_SYSERR, ENOMEM);
3363		return;
3364	}
3365
3366	msp->ms_flags |= flag;
3367
3368	dlokack(wq, mp, DL_PROMISCON_REQ);
3369
3370}
3371
3372static void
3373man_poffreq(queue_t *wq, mblk_t *mp)
3374{
3375	manstr_t		*msp;
3376	int			flag;
3377
3378	msp = (manstr_t *)wq->q_ptr;
3379
3380	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
3381		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
3382		return;
3383	}
3384
3385	switch (((dl_promiscoff_req_t *)mp->b_rptr)->dl_level) {
3386	case DL_PROMISC_PHYS:
3387		flag = MAN_SFLAG_ALLPHYS;
3388		break;
3389
3390	case DL_PROMISC_SAP:
3391		flag = MAN_SFLAG_ALLSAP;
3392		break;
3393
3394	case DL_PROMISC_MULTI:
3395		flag = MAN_SFLAG_ALLMULTI;
3396		break;
3397
3398	default:
3399		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTSUPPORTED, 0);
3400		return;
3401	}
3402
3403	if ((msp->ms_flags & flag) == 0) {
3404		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
3405		return;
3406	}
3407
3408	if (man_dlpi_senddown(msp, mp)) {
3409		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_SYSERR, ENOMEM);
3410		return;
3411	}
3412
3413	man_dl_release(&msp->ms_dl_mp, mp);
3414
3415	msp->ms_flags &= ~flag;
3416
3417	dlokack(wq, mp, DL_PROMISCOFF_REQ);
3418
3419}
3420
3421/*
3422 * Enable multicast requests. We might need to track addresses instead of
3423 * just passing things through (see eri_dmreq) - TBD.
3424 */
3425static void
3426man_emreq(queue_t *wq, mblk_t *mp)
3427{
3428	manstr_t		*msp;
3429	union DL_primitives	*dlp;
3430	eaddr_t			*addrp;
3431	t_uscalar_t		off;
3432	t_uscalar_t		len;
3433
3434	msp = (manstr_t *)wq->q_ptr;
3435
3436	if (MBLKL(mp) < DL_ENABMULTI_REQ_SIZE) {
3437		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADPRIM, 0);
3438		return;
3439	}
3440
3441	if (msp->ms_dlpistate == DL_UNATTACHED) {
3442		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3443		return;
3444	}
3445
3446	dlp = (union DL_primitives *)mp->b_rptr;
3447	len = dlp->enabmulti_req.dl_addr_length;
3448	off = dlp->enabmulti_req.dl_addr_offset;
3449	addrp = (struct ether_addr *)(mp->b_rptr + off);
3450
3451	if ((len != ETHERADDRL) ||
3452	    !MBLKIN(mp, off, len) ||
3453	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3454		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3455		return;
3456	}
3457
3458	/*
3459	 * Catch request for replay, and forward down to any lower
3460	 * lower stream.
3461	 */
3462	if (man_dlpi(msp, mp)) {
3463		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3464		return;
3465	}
3466
3467	dlokack(wq, mp, DL_ENABMULTI_REQ);
3468
3469}
3470
3471static void
3472man_dmreq(queue_t *wq, mblk_t *mp)
3473{
3474	manstr_t		*msp;
3475	union DL_primitives	*dlp;
3476	eaddr_t			*addrp;
3477	t_uscalar_t		off;
3478	t_uscalar_t		len;
3479
3480	msp = (manstr_t *)wq->q_ptr;
3481
3482	if (MBLKL(mp) < DL_DISABMULTI_REQ_SIZE) {
3483		dlerrorack(wq, mp, DL_DISABMULTI_REQ, DL_BADPRIM, 0);
3484		return;
3485	}
3486
3487	if (msp->ms_dlpistate == DL_UNATTACHED) {
3488		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3489		return;
3490	}
3491
3492	dlp = (union DL_primitives *)mp->b_rptr;
3493	len = dlp->enabmulti_req.dl_addr_length;
3494	off = dlp->enabmulti_req.dl_addr_offset;
3495	addrp = (struct ether_addr *)(mp->b_rptr + off);
3496
3497	if ((len != ETHERADDRL) ||
3498	    !MBLKIN(mp, off, len) ||
3499	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3500		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3501		return;
3502	}
3503
3504	if (man_dlpi_senddown(msp, mp)) {
3505		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3506		return;
3507	}
3508
3509	man_dl_release(&msp->ms_dl_mp, mp);
3510
3511	dlokack(wq, mp, DL_DISABMULTI_REQ);
3512
3513}
3514
3515static void
3516man_pareq(queue_t *wq, mblk_t *mp)
3517{
3518	manstr_t		*msp;
3519	union	DL_primitives	*dlp;
3520	uint32_t		type;
3521	struct	ether_addr	addr;
3522
3523	msp = (manstr_t *)wq->q_ptr;
3524
3525	if (MBLKL(mp) < DL_PHYS_ADDR_REQ_SIZE) {
3526		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3527		return;
3528	}
3529
3530	dlp = (union DL_primitives *)mp->b_rptr;
3531	type = dlp->physaddr_req.dl_addr_type;
3532	if (msp->ms_manp == NULL) {
3533		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3534		return;
3535	}
3536
3537	switch (type) {
3538	case	DL_FACT_PHYS_ADDR:
3539		(void) localetheraddr((struct ether_addr *)NULL, &addr);
3540		break;
3541
3542	case	DL_CURR_PHYS_ADDR:
3543		ether_bcopy(&msp->ms_manp->man_eaddr, &addr);
3544		break;
3545
3546	default:
3547		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_NOTSUPPORTED, 0);
3548		return;
3549	}
3550
3551	dlphysaddrack(wq, mp, &addr, ETHERADDRL);
3552}
3553
3554/*
3555 * TBD - this routine probably should be protected w/ an ndd
3556 * tuneable, or a man.conf parameter.
3557 */
3558static void
3559man_spareq(queue_t *wq, mblk_t *mp)
3560{
3561	manstr_t		*msp;
3562	union DL_primitives	*dlp;
3563	t_uscalar_t		off;
3564	t_uscalar_t		len;
3565	eaddr_t			*addrp;
3566
3567	msp = (manstr_t *)wq->q_ptr;
3568
3569	if (MBLKL(mp) < DL_SET_PHYS_ADDR_REQ_SIZE) {
3570		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3571		return;
3572	}
3573
3574	dlp = (union DL_primitives *)mp->b_rptr;
3575	len = dlp->set_physaddr_req.dl_addr_length;
3576	off = dlp->set_physaddr_req.dl_addr_offset;
3577
3578	if (!MBLKIN(mp, off, len)) {
3579		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3580		return;
3581	}
3582
3583	addrp = (struct ether_addr *)(mp->b_rptr + off);
3584
3585	/*
3586	 * Error if length of address isn't right or the address
3587	 * specified is a multicast or broadcast address.
3588	 */
3589	if ((len != ETHERADDRL) ||
3590	    ((addrp->ether_addr_octet[0] & 01) == 1) ||
3591	    (ether_cmp(addrp, &etherbroadcast) == 0)) {
3592		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADADDR, 0);
3593		return;
3594	}
3595	/*
3596	 * Error if this stream is not attached to a device.
3597	 */
3598	if (msp->ms_manp == NULL) {
3599		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3600		return;
3601	}
3602
3603	/*
3604	 * We will also resend DL_SET_PHYS_ADDR_REQ for each dest
3605	 * when it is linked under us.
3606	 */
3607	if (man_dlpi_senddown(msp, mp)) {
3608		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_SYSERR, ENOMEM);
3609		return;
3610	}
3611
3612	ether_copy(addrp, msp->ms_manp->man_eaddr.ether_addr_octet);
3613
3614	MAN_DBG(MAN_DLPI, ("man_sareq: snagged %s\n",
3615	    ether_sprintf(&msp->ms_manp->man_eaddr)));
3616
3617	dlokack(wq, mp, DL_SET_PHYS_ADDR_REQ);
3618
3619}
3620
3621/*
3622 * These routines make up the lower part of the MAN streams framework.
3623 */
3624
3625/*
3626 * man_lwsrv - Deferred mblks for down stream. We end up here when
3627 * the destination is not DL_IDLE when traffic comes downstream.
3628 *
3629 *	wq - lower write queue of mxx
3630 */
3631static int
3632man_lwsrv(queue_t *wq)
3633{
3634	mblk_t		*mp;
3635	mblk_t		*mlistp;
3636	man_dest_t	*mdp;
3637	size_t		count;
3638
3639	mdp = (man_dest_t *)wq->q_ptr;
3640
3641	MAN_DBG(MAN_LWSRV, ("man_lwsrv: wq(0x%p) mdp(0x%p)"
3642	    " md_rq(0x%p)\n", (void *)wq, (void *)mdp,
3643	    mdp ? (void *)mdp->md_rq : NULL));
3644
3645	if (mdp == NULL)
3646		goto exit;
3647
3648	if (mdp->md_state & MAN_DSTATE_CLOSING) {
3649			flushq(wq, FLUSHDATA);
3650			flushq(RD(wq), FLUSHDATA);
3651			goto exit;
3652	}
3653
3654	/*
3655	 * Arrange to send deferred mp's first, then mblks on the
3656	 * service queue. Since we are exclusive in the inner perimeter,
3657	 * we dont have to worry about md_lock, like the put procedures,
3658	 * which are MTPUTSHARED.
3659	 */
3660	mutex_enter(&mdp->md_lock);
3661	mlistp = mdp->md_dmp_head;
3662	mdp->md_dmp_head = NULL;
3663	count = mdp->md_dmp_count;
3664	mdp->md_dmp_count = 0;
3665	mutex_exit(&mdp->md_lock);
3666
3667	while (mlistp != NULL) {
3668		mp = mlistp;
3669		mlistp = mp->b_next;
3670		mp->b_next = NULL;
3671		count -= msgsize(mp);
3672		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3673
3674			mutex_enter(&mdp->md_lock);
3675			mdp->md_dmp_count += count + msgsize(mp);
3676			mp->b_next = mlistp;
3677			mdp->md_dmp_head = mp;
3678			mutex_exit(&mdp->md_lock);
3679			goto exit;
3680		}
3681	}
3682	mdp->md_dmp_tail = NULL;
3683
3684	while (mp = getq(wq)) {
3685		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3686			/*
3687			 * Put it back on queue, making sure to avoid
3688			 * infinite loop mentioned in putbq(9F)
3689			 */
3690			noenable(wq);
3691			putbq(wq, mp);
3692			enableok(wq);
3693
3694			break;
3695		}
3696	}
3697
3698exit:
3699
3700	return (0);
3701}
3702
3703/*
3704 * man_lrput - handle DLPI messages issued from downstream.
3705 *
3706 *	rq - lower read queue of mxx
3707 *	mp - mblk ptr to DLPI request
3708 *
3709 *	returns 0
3710 */
3711static int
3712man_lrput(queue_t *rq, mblk_t *mp)
3713{
3714	man_dest_t	*mdp;
3715	manstr_t	*msp;
3716
3717#if defined(DEBUG)
3718	union DL_primitives	*dlp;
3719	t_uscalar_t		prim = MAN_DLPI_MAX_PRIM + 1;
3720	char			*prim_str;
3721#endif  /* DEBUG */
3722
3723	mdp = (man_dest_t *)rq->q_ptr;
3724
3725#if defined(DEBUG)
3726	if (DB_TYPE(mp) == M_PROTO) {
3727		dlp = (union DL_primitives *)mp->b_rptr;
3728		prim = dlp->dl_primitive;
3729	}
3730
3731	prim_str = (prim > MAN_DLPI_MAX_PRIM) ? "NON DLPI" :
3732	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3733	    (prim == DLIOCRAW) ? "DLIOCRAW" :
3734	    dps[(unsigned int)prim];
3735	MAN_DBG(MAN_LRPUT, ("man_lrput: rq(0x%p) mp(0x%p) mdp(0x%p)"
3736	    " db_type(0x%x) dl_prim %s", (void *)rq,
3737	    (void *)mp, (void *)mdp, DB_TYPE(mp), prim_str));
3738	MAN_DBGCALL(MAN_LRPUT2, man_print_mdp(mdp));
3739#endif  /* DEBUG */
3740
3741	if (DB_TYPE(mp) == M_FLUSH) {
3742		/* Turn around */
3743		if (*mp->b_rptr & FLUSHW) {
3744			*mp->b_rptr &= ~FLUSHR;
3745			qreply(rq, mp);
3746		} else
3747			freemsg(mp);
3748		return (0);
3749	}
3750
3751	if (mdp == NULL || mdp->md_state != MAN_DSTATE_READY) {
3752
3753		MAN_DBG(MAN_LRPUT, ("man_lrput: not ready mdp(0x%p),"
3754		    " state(%d)", (void *)mdp, mdp ? mdp->md_state : -1));
3755		freemsg(mp);
3756		return (0);
3757	}
3758
3759	/*
3760	 * If we have a destination in the right state, forward on datagrams.
3761	 */
3762	if (MAN_IS_DATA(mp)) {
3763		if (mdp->md_dlpistate == DL_IDLE && canputnext(mdp->md_rq)) {
3764
3765			msp = mdp->md_msp;
3766			if (!(msp->ms_flags & MAN_SFLAG_PROMISC))
3767				mdp->md_rcvcnt++; /* Count for failover */
3768			/*
3769			 * go put mblk_t directly up to next queue.
3770			 */
3771			MAN_DBG(MAN_LRPUT, ("man_lrput: putnext to rq(0x%p)",
3772			    (void *)mdp->md_rq));
3773			(void) putnext(mdp->md_rq, mp);
3774		} else {
3775			freemsg(mp);
3776		}
3777	} else {
3778		/*
3779		 * Handle in man_lrsrv with exclusive inner perimeter lock.
3780		 */
3781		putq(rq, mp);
3782	}
3783
3784	return (0);
3785}
3786
3787/*
3788 * Either this is a response from our attempt to sync the upper and lower
3789 * stream states, or its data. If its not data. Do DL_* response processing
3790 * and transition md_dlpistate accordingly. If its data, toss it.
3791 */
3792static int
3793man_lrsrv(queue_t *rq)
3794{
3795	man_dest_t		*mdp;
3796	mblk_t			*mp;
3797	union DL_primitives	*dlp;
3798	ulong_t			prim;
3799	ulong_t			cprim;
3800	int			need_dl_reset = FALSE;
3801
3802#if defined(DEBUG)
3803		struct iocblk	*iocp;
3804		char		ioc_cmd[256];
3805#endif  /* DEBUG */
3806
3807	MAN_DBG(MAN_LRSRV, ("man_lrsrv: rq(0x%p)", (void *)rq));
3808
3809	mdp = (man_dest_t *)rq->q_ptr;
3810
3811	if ((mdp == NULL) || (mdp->md_state & MAN_DSTATE_CLOSING)) {
3812			flushq(rq, FLUSHDATA);
3813			flushq(WR(rq), FLUSHDATA);
3814			goto exit;
3815	}
3816
3817	while (mp = getq(rq)) {
3818
3819
3820	/*
3821	 * If we're not connected, or its a datagram, toss it.
3822	 */
3823	if (MAN_IS_DATA(mp) || mdp->md_state != MAN_DSTATE_READY) {
3824
3825		MAN_DBG(MAN_LRSRV, ("man_lrsrv: dropping mblk mdp(0x%p)"
3826		    " is_data(%d)", (void *)mdp, MAN_IS_DATA(mp)));
3827		freemsg(mp);
3828		continue;
3829	}
3830
3831	/*
3832	 * Should be response to man_dlpi_replay. Discard unless there
3833	 * is a failure we care about.
3834	 */
3835
3836	switch (DB_TYPE(mp)) {
3837	case M_PROTO:
3838	case M_PCPROTO:
3839		/* Do proto processing below. */
3840		break;
3841
3842	case M_IOCNAK:
3843		/*
3844		 * DL_IOC* failed for some reason.
3845		 */
3846		need_dl_reset = TRUE;
3847
3848#if defined(DEBUG)
3849		iocp = (struct iocblk *)mp->b_rptr;
3850
3851		sprintf(ioc_cmd, "0x%x", iocp->ioc_cmd);
3852		MAN_DBG(MAN_LRSRV, ("man_lrsrv: M_IOCNAK err %d for cmd(%s)\n",
3853		    iocp->ioc_error,
3854		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3855		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
3856#endif  /* DEBUG */
3857
3858		/* FALLTHRU */
3859
3860	case M_IOCACK:
3861	case M_CTL:
3862		/*
3863		 * OK response from DL_IOC*, ignore.
3864		 */
3865		goto dl_reset;
3866	}
3867
3868	dlp = (union DL_primitives *)mp->b_rptr;
3869	prim = dlp->dl_primitive;
3870
3871	MAN_DBG(MAN_LRSRV, ("man_lrsrv: prim %s", dps[(int)prim]));
3872
3873	/*
3874	 * DLPI state processing big theory: We do not rigorously check
3875	 * DLPI states (e.g. PENDING stuff). Simple rules:
3876	 *
3877	 * 	1) If we see an OK_ACK to an ATTACH_REQ, dlpistate = DL_UNBOUND.
3878	 *	2) If we see an BIND_ACK to a BIND_REQ, dlpistate = DL_IDLE.
3879	 *	3) If we see a OK_ACK response to an UNBIND_REQ
3880	 *	   dlpistate = DL_UNBOUND.
3881	 *	4) If we see a OK_ACK response to a DETACH_REQ,
3882	 *	   dlpistate = DL_UNATTACHED.
3883	 *
3884	 * Everything that isn't handle by 1-4 above is handled by 5)
3885	 *
3886	 *	5) A NAK to any DL_* messages we care about causes
3887	 *	   dlpistate = DL_UNATTACHED and man_reset_dlpi to run
3888	 *
3889	 * TBD - need a reset counter so we can try a switch if it gets
3890	 * too high.
3891	 */
3892
3893	switch (prim) {
3894	case DL_OK_ACK:
3895		cprim = dlp->ok_ack.dl_correct_primitive;
3896
3897		switch (cprim) {
3898		case DL_ATTACH_REQ:
3899			if (man_dlioc_replay(mdp)) {
3900				D_SETSTATE(mdp, DL_UNBOUND);
3901			} else {
3902				need_dl_reset = TRUE;
3903				break;
3904			}
3905			break;
3906
3907		case DL_DETACH_REQ:
3908			D_SETSTATE(mdp, DL_UNATTACHED);
3909			break;
3910
3911		case DL_UNBIND_REQ:
3912			/*
3913			 * Cancel timer and set md_dlpistate.
3914			 */
3915			D_SETSTATE(mdp, DL_UNBOUND);
3916
3917			ASSERT(mdp->md_bc_id == 0);
3918			if (mdp->md_lc_timer_id != 0) {
3919				(void) quntimeout(man_ctl_wq,
3920				    mdp->md_lc_timer_id);
3921				mdp->md_lc_timer_id = 0;
3922			}
3923		}
3924		MAN_DBG(MAN_DLPI,
3925		    ("		cprim %s", dps[(int)cprim]));
3926		break;
3927
3928	case DL_BIND_ACK:
3929		/*
3930		 * We're ready for data. Get man_lwsrv to run to
3931		 * process any defered data and start linkcheck timer.
3932		 */
3933		D_SETSTATE(mdp, DL_IDLE);
3934		qenable(mdp->md_wq);
3935		mdp->md_linkstate = MAN_LINKGOOD;
3936		if (man_needs_linkcheck(mdp)) {
3937			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
3938			    man_linkcheck_timer, (void *)mdp,
3939			    man_gettimer(MAN_TIMER_LINKCHECK, mdp));
3940		}
3941
3942		break;
3943
3944	case DL_ERROR_ACK:
3945		cprim = dlp->error_ack.dl_error_primitive;
3946		switch (cprim) {
3947		case DL_ATTACH_REQ:
3948		case DL_BIND_REQ:
3949		case DL_DISABMULTI_REQ:
3950		case DL_ENABMULTI_REQ:
3951		case DL_PROMISCON_REQ:
3952		case DL_PROMISCOFF_REQ:
3953		case DL_SET_PHYS_ADDR_REQ:
3954			need_dl_reset = TRUE;
3955			break;
3956
3957		/*
3958		 * ignore error TBD (better comment)
3959		 */
3960		case DL_UNBIND_REQ:
3961		case DL_DETACH_REQ:
3962			break;
3963		}
3964
3965		MAN_DBG(MAN_DLPI,
3966		    ("\tdl_errno %d dl_unix_errno %d cprim %s",
3967		    dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno,
3968		    dps[(int)cprim]));
3969		break;
3970
3971	case DL_UDERROR_IND:
3972		MAN_DBG(MAN_DLPI,
3973		    ("\tdl_errno %d unix_errno %d",
3974		    dlp->uderror_ind.dl_errno,
3975		    dlp->uderror_ind.dl_unix_errno));
3976		break;
3977
3978	case DL_INFO_ACK:
3979		break;
3980
3981	default:
3982		/*
3983		 * We should not get here.
3984		 */
3985		cmn_err(CE_WARN, "man_lrsrv: unexpected DL prim 0x%lx!",
3986		    prim);
3987		need_dl_reset = TRUE;
3988		break;
3989	}
3990
3991dl_reset:
3992	freemsg(mp);
3993
3994	if (need_dl_reset) {
3995		man_pg_t	*mpg;
3996		man_path_t	*mp;
3997
3998		if (qsize(rq)) {	/* Dump all messages. */
3999			flushq(rq, FLUSHDATA);
4000			flushq(WR(rq), FLUSHDATA);
4001		}
4002
4003		mdp->md_dlpierrors++;
4004		D_SETSTATE(mdp, DL_UNATTACHED);
4005		if (mdp->md_lc_timer_id != 0) {
4006			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4007			mdp->md_lc_timer_id = 0;
4008		}
4009
4010		mutex_enter(&man_lock);
4011		ASSERT(mdp->md_msp != NULL);
4012		ASSERT(mdp->md_msp->ms_manp != NULL);
4013		mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg,
4014		    mdp->md_pg_id);
4015		ASSERT(mpg != NULL);
4016		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4017		    mdp->md_device.mdev_ppa);
4018		ASSERT(mp != NULL);
4019		mp->mp_device.mdev_state |= MDEV_FAILED;
4020		if ((mdp->md_dlpierrors >= MAN_MAX_DLPIERRORS) &&
4021		    (man_is_on_domain ||
4022		    mdp->md_msp->ms_manp->man_meta_ppa == 1)) {
4023			/*
4024			 * Autoswitching is disabled for instance 0
4025			 * on the SC as we expect the domain to
4026			 * initiate the path switching.
4027			 */
4028			(void) man_do_autoswitch((man_dest_t *)mdp);
4029			MAN_DBG(MAN_WARN, ("man_lrsrv: dlpi failure(%d,%d),"
4030			    " switching path", mdp->md_device.mdev_major,
4031			    mdp->md_device.mdev_ppa));
4032		} else {
4033			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
4034			    man_reset_dlpi, (void *)mdp,
4035			    man_gettimer(MAN_TIMER_DLPIRESET, mdp));
4036		}
4037		mutex_exit(&man_lock);
4038	}
4039
4040
4041	} /* End while (getq()) */
4042
4043exit:
4044	MAN_DBG(MAN_DLPI, ("man_lrsrv: returns"));
4045
4046	return (0);
4047}
4048
4049static int
4050man_needs_linkcheck(man_dest_t *mdp)
4051{
4052	/*
4053	 * Not ready for linkcheck.
4054	 */
4055	if (mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
4056		return (0);
4057
4058	/*
4059	 * Linkchecking needs to be done on IP streams. For domain, all
4060	 * driver instances need checking, for SC only instance 1 needs it.
4061	 */
4062	if ((man_is_on_domain || mdp->md_msp->ms_manp->man_meta_ppa == 1) &&
4063	    (mdp->md_msp->ms_sap == ETHERTYPE_IP ||
4064	    mdp->md_msp->ms_sap == ETHERTYPE_IPV6))
4065
4066		return (1);
4067
4068	/*
4069	 * Linkcheck not need on this link.
4070	 */
4071	return (0);
4072}
4073
4074/*
4075 * The following routines process work requests posted to man_iwork_q
4076 * from the non-STREAMS half of the driver (see man_bwork.c). The work
4077 * requires access to the inner perimeter lock of the driver. This
4078 * lock is acquired by man_uwsrv, who calls man_iwork to process the
4079 * man_iwork_q->
4080 */
4081
4082/*
4083 * The man_bwork has posted some work for us to do inside the
4084 * perimeter. This mainly involves updating lower multiplexor data
4085 * structures (non-blocking type stuff). So, we can hold the man_lock
4086 * until we are done processing all work items. Note that some of these
4087 * routines in turn submit work back to the bgthread, which they can do
4088 * since we hold the man_lock.
4089 */
4090static void
4091man_iwork()
4092{
4093	man_work_t	*wp;
4094	int		wp_finished;
4095
4096	MAN_DBG(MAN_SWITCH, ("man_iwork: q_work(0x%p)",
4097	    (void *)man_iwork_q->q_work));
4098
4099	mutex_enter(&man_lock);
4100
4101	while (man_iwork_q->q_work) {
4102
4103		wp = man_iwork_q->q_work;
4104		man_iwork_q->q_work = wp->mw_next;
4105		wp->mw_next = NULL;
4106
4107		mutex_exit(&man_lock);
4108
4109		MAN_DBG(MAN_SWITCH, ("man_iwork: type %s",
4110		    _mw_type[wp->mw_type]));
4111
4112		wp_finished = TRUE;
4113
4114		switch (wp->mw_type) {
4115		case MAN_WORK_DRATTACH:
4116			(void) man_do_dr_attach(wp);
4117			break;
4118
4119		case MAN_WORK_DRSWITCH:
4120			/*
4121			 * Return status to man_dr_detach immediately. If
4122			 * no error submitting SWITCH request, man_iswitch
4123			 * or man_bclose will cv_signal man_dr_detach on
4124			 * completion of SWITCH work request.
4125			 */
4126			if (man_do_dr_switch(wp) == 0)
4127				wp_finished = FALSE;
4128			break;
4129
4130		case MAN_WORK_DRDETACH:
4131			man_do_dr_detach(wp);
4132			break;
4133
4134		case MAN_WORK_SWITCH:
4135			if (man_iswitch(wp))
4136				wp_finished = FALSE;
4137			break;
4138
4139		case MAN_WORK_KSTAT_UPDATE:
4140			man_do_kstats(wp);
4141			break;
4142
4143		default:
4144			cmn_err(CE_WARN, "man_iwork: "
4145			    "illegal work type(%d)", wp->mw_type);
4146			break;
4147		}
4148
4149		mutex_enter(&man_lock);
4150
4151		/*
4152		 * If we've completed the work request, delete, or
4153		 * cv_signal waiter.
4154		 */
4155		if (wp_finished) {
4156			wp->mw_flags |= MAN_WFLAGS_DONE;
4157
4158			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
4159				cv_signal(&wp->mw_cv);
4160			else
4161				man_work_free(wp);
4162		}
4163	}
4164
4165	mutex_exit(&man_lock);
4166}
4167
4168/*
4169 * man_dr_detach has submitted a request to DRSWITCH a path.
4170 * He is in cv_wait_sig(wp->mw_cv). We forward the work request on to
4171 * man_bwork as a switch request. It should end up back at
4172 * man_iwork, who will cv_signal(wp->mw_cv) man_dr_detach.
4173 *
4174 * Called holding inner perimeter lock.
4175 * man_lock is held to synchronize access to pathgroup list(man_pg).
4176 */
4177static int
4178man_do_dr_switch(man_work_t *wp)
4179{
4180	man_t		*manp;
4181	man_pg_t	*mpg;
4182	man_path_t	*mp;
4183	man_path_t	*ap;
4184	man_adest_t	*adp;
4185	mi_path_t	mpath;
4186	int		status = 0;
4187
4188	adp = &wp->mw_arg;
4189
4190	MAN_DBG(MAN_SWITCH, ("man_do_dr_switch: pg_id %d work:", adp->a_pg_id));
4191	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4192
4193	mutex_enter(&man_lock);
4194	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4195	if (manp == NULL || manp->man_pg == NULL) {
4196		status = ENODEV;
4197		goto exit;
4198	}
4199
4200	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4201	if (mpg == NULL) {
4202		status = ENODEV;
4203		goto exit;
4204	}
4205
4206	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4207		status = EAGAIN;
4208		goto exit;
4209	}
4210
4211	/*
4212	 * Check to see if detaching device is active. If so, activate
4213	 * an alternate.
4214	 */
4215	mp = man_find_active_path(mpg->mpg_pathp);
4216	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4217
4218		ap = man_find_alternate_path(mpg->mpg_pathp);
4219		if (ap == NULL) {
4220			status = EBUSY;
4221			goto exit;
4222		}
4223
4224		bzero((char *)&mpath, sizeof (mi_path_t));
4225
4226		mpath.mip_cmd = MI_PATH_ACTIVATE;
4227		mpath.mip_man_ppa = 0;
4228		mpath.mip_pg_id = 0;
4229		mpath.mip_devs[0] = ap->mp_device;
4230		mpath.mip_ndevs = 1;
4231		ether_copy(&manp->man_eaddr, &mpath.mip_eaddr);
4232
4233		/*
4234		 * DR thread is sleeping on wp->mw_cv. We change the work
4235		 * request from DRSWITCH to SWITCH and submit it to
4236		 * for processing by man_bwork (via man_pg_cmd). At
4237		 * completion the SWITCH work request is processed by
4238		 * man_iswitch() or man_bclose and the DR thread will
4239		 * be cv_signal'd.
4240		 */
4241		wp->mw_type = MAN_WORK_SWITCH;
4242		if (status = man_pg_cmd(&mpath, wp))
4243			goto exit;
4244
4245	} else {
4246		/*
4247		 * Tell man_dr_detach that detaching device is not currently
4248		 * in use.
4249		 */
4250		status = ENODEV;
4251	}
4252
4253exit:
4254	if (status) {
4255		/*
4256		 * ENODEV is a noop, not really an error.
4257		 */
4258		if (status != ENODEV)
4259			wp->mw_status = status;
4260	}
4261	mutex_exit(&man_lock);
4262
4263	return (status);
4264}
4265
4266/*
4267 * man_dr_attach has submitted a request to DRATTACH a path,
4268 * add that path to the path list.
4269 *
4270 * Called holding perimeter lock.
4271 */
4272static int
4273man_do_dr_attach(man_work_t *wp)
4274{
4275	man_t		*manp;
4276	man_adest_t	*adp;
4277	mi_path_t	mpath;
4278	manc_t		manc;
4279	int		status = 0;
4280
4281	adp = &wp->mw_arg;
4282
4283	MAN_DBG(MAN_SWITCH, ("man_do_dr_attach: pg_id %d work:", adp->a_pg_id));
4284	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4285
4286	mutex_enter(&man_lock);
4287	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4288	if (manp == NULL || manp->man_pg == NULL) {
4289		status = ENODEV;
4290		goto exit;
4291	}
4292
4293	if (status = man_get_iosram(&manc)) {
4294		goto exit;
4295	}
4296	/*
4297	 * Extract SC ethernet address from IOSRAM.
4298	 */
4299	ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4300
4301	mpath.mip_pg_id = adp->a_pg_id;
4302	mpath.mip_man_ppa = adp->a_man_ppa;
4303	/*
4304	 * man_dr_attach passes the new device info in a_sf_dev.
4305	 */
4306	MAN_DBG(MAN_DR, ("man_do_dr_attach: "));
4307	MAN_DBGCALL(MAN_DR, man_print_dev(&adp->a_sf_dev));
4308	mpath.mip_devs[0] = adp->a_sf_dev;
4309	mpath.mip_ndevs = 1;
4310	mpath.mip_cmd = MI_PATH_ADD;
4311	status = man_pg_cmd(&mpath, NULL);
4312
4313exit:
4314	mutex_exit(&man_lock);
4315	return (status);
4316}
4317
4318/*
4319 * man_dr_detach has submitted a request to DRDETACH a path.
4320 * He is in cv_wait_sig(wp->mw_cv). We remove the path and
4321 * cv_signal(wp->mw_cv) man_dr_detach.
4322 *
4323 * Called holding perimeter lock.
4324 */
4325static void
4326man_do_dr_detach(man_work_t *wp)
4327{
4328	man_t		*manp;
4329	man_pg_t	*mpg;
4330	man_path_t	*mp;
4331	man_adest_t	*adp;
4332	manc_t		manc;
4333	mi_path_t	mpath;
4334	int		i;
4335	int		found;
4336	int		status = 0;
4337
4338	adp = &wp->mw_arg;
4339
4340	MAN_DBG(MAN_SWITCH, ("man_do_dr_detach: pg_id %d work:", adp->a_pg_id));
4341	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4342
4343	mutex_enter(&man_lock);
4344	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4345	if (manp == NULL || manp->man_pg == NULL) {
4346		status = ENODEV;
4347		goto exit;
4348	}
4349
4350	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4351	if (mpg == NULL) {
4352		status = ENODEV;
4353		goto exit;
4354	}
4355
4356	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4357		status = EAGAIN;
4358		goto exit;
4359	}
4360
4361	/*
4362	 * We should have switched detaching path if it was active.
4363	 */
4364	mp = man_find_active_path(mpg->mpg_pathp);
4365	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4366		status = EAGAIN;
4367		goto exit;
4368	}
4369
4370	/*
4371	 * Submit an ASSIGN command, minus the detaching device.
4372	 */
4373	bzero((char *)&mpath, sizeof (mi_path_t));
4374
4375	if (status = man_get_iosram(&manc)) {
4376		goto exit;
4377	}
4378
4379	mpath.mip_cmd = MI_PATH_ASSIGN;
4380	mpath.mip_man_ppa = 0;
4381	mpath.mip_pg_id = 0;
4382
4383	mp = mpg->mpg_pathp;
4384	i = 0;
4385	found = FALSE;
4386	while (mp != NULL) {
4387		if (mp->mp_device.mdev_ppa != adp->a_sf_dev.mdev_ppa) {
4388			mpath.mip_devs[i] = mp->mp_device;
4389			i++;
4390		} else {
4391			found = TRUE;
4392		}
4393		mp = mp->mp_next;
4394	}
4395
4396	if (found) {
4397		/*
4398		 * Need to include SCs ethernet address in command.
4399		 */
4400		mpath.mip_ndevs = i;
4401		ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4402
4403		status = man_pg_cmd(&mpath, NULL);
4404	}
4405
4406	/*
4407	 * Hand back status to man_dr_detach request.
4408	 */
4409exit:
4410	if (status != ENODEV)
4411		wp->mw_status = status;
4412
4413	mutex_exit(&man_lock);
4414
4415}
4416
4417
4418/*
4419 * The background thread has configured new lower multiplexor streams for
4420 * the given destinations. Update the appropriate destination data structures
4421 * inside the inner perimeter. We must take care to deal with destinations
4422 * whose upper stream has closed or detached from lower streams.
4423 *
4424 * Returns
4425 *	0		Done with work request.
4426 *	1		Reused work request.
4427 */
4428static int
4429man_iswitch(man_work_t *wp)
4430{
4431	man_adest_t	*adp;
4432	man_t		*manp;
4433	man_pg_t	*mpg;
4434	man_path_t	*mp = NULL;
4435	man_dest_t	*mdp;
4436	man_dest_t	*tdp;
4437	int		i;
4438	int		switch_ok = TRUE;
4439
4440	adp = &wp->mw_arg;
4441
4442	if (wp->mw_status != 0) {
4443		switch_ok = FALSE;	/* Never got things opened */
4444	}
4445
4446	/*
4447	 * Update destination structures as appropriate.
4448	 */
4449	for (i = 0; i < adp->a_ndests; i++) {
4450		man_dest_t	tmp;
4451
4452		/*
4453		 * Check to see if lower stream we just switch is still
4454		 * around.
4455		 */
4456		tdp = &adp->a_mdp[i];
4457		mdp = man_switch_match(tdp, adp->a_pg_id, tdp->md_switch_id);
4458
4459		if (mdp == NULL)
4460			continue;
4461
4462		if (switch_ok == FALSE) {
4463			/*
4464			 * Switch failed for some reason.  Clear
4465			 * PLUMBING flag and retry switch again later.
4466			 */
4467			man_ifail_dest(mdp);
4468			continue;
4469		}
4470
4471		/*
4472		 * Swap new info, for old. We return the old info to
4473		 * man_bwork to close things up below.
4474		 */
4475		bcopy((char *)mdp, (char *)&tmp, sizeof (man_dest_t));
4476
4477		ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4478		ASSERT(mdp->md_state == tdp->md_state);
4479
4480		mdp->md_state = tdp->md_state;
4481
4482		/*
4483		 * save the wq from the destination passed(tdp).
4484		 */
4485		mdp->md_wq = tdp->md_wq;
4486		RD(mdp->md_wq)->q_ptr = (void *)(mdp);
4487		WR(mdp->md_wq)->q_ptr = (void *)(mdp);
4488
4489		mdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4490		mdp->md_state |= MAN_DSTATE_READY;
4491
4492		ASSERT(mdp->md_device.mdev_major == adp->a_sf_dev.mdev_major);
4493
4494		ASSERT(tdp->md_device.mdev_ppa == adp->a_st_dev.mdev_ppa);
4495		ASSERT(tdp->md_device.mdev_major == adp->a_st_dev.mdev_major);
4496
4497		mdp->md_device = tdp->md_device;
4498		mdp->md_muxid = tdp->md_muxid;
4499		mdp->md_linkstate = MAN_LINKUNKNOWN;
4500		(void) drv_getparm(TIME, &mdp->md_lastswitch);
4501		mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4502		mdp->md_switch_id = 0;
4503		mdp->md_switches++;
4504		mdp->md_dlpierrors = 0;
4505		D_SETSTATE(mdp, DL_UNATTACHED);
4506
4507		/*
4508		 * Resync lower w/ upper dlpi state. This will start link
4509		 * timer if/when lower stream goes to DL_IDLE (see man_lrsrv).
4510		 */
4511		man_reset_dlpi((void *)mdp);
4512
4513		bcopy((char *)&tmp, (char *)tdp, sizeof (man_dest_t));
4514	}
4515
4516	if (switch_ok) {
4517		for (i = 0; i < adp->a_ndests; i++) {
4518			tdp = &adp->a_mdp[i];
4519
4520			tdp->md_state &= ~MAN_DSTATE_PLUMBING;
4521			tdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4522			tdp->md_state |= MAN_DSTATE_READY;
4523		}
4524	} else {
4525		/*
4526		 * Never got switch-to destinations open, free them.
4527		 */
4528		man_kfree(adp->a_mdp,
4529		    sizeof (man_dest_t) * adp->a_ndests);
4530	}
4531
4532	/*
4533	 * Clear pathgroup switching flag and update path flags.
4534	 */
4535	mutex_enter(&man_lock);
4536	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4537
4538	ASSERT(manp != NULL);
4539	ASSERT(manp->man_pg != NULL);
4540
4541	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4542	ASSERT(mpg != NULL);
4543	ASSERT(mpg->mpg_flags & MAN_PG_SWITCHING);
4544	mpg->mpg_flags &= ~MAN_PG_SWITCHING;
4545
4546	/*
4547	 * Switch succeeded, mark path we switched from as failed, and
4548	 * device we switch to as active and clear its failed flag (if set).
4549	 * Sync up kstats.
4550	 */
4551	if (switch_ok) {
4552		mp = man_find_active_path(mpg->mpg_pathp);
4553		if (mp != NULL) {
4554
4555			ASSERT(adp->a_sf_dev.mdev_major != 0);
4556
4557			MAN_DBG(MAN_SWITCH, ("man_iswitch: switch from dev:"));
4558			MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_sf_dev));
4559
4560			mp->mp_device.mdev_state &= ~MDEV_ACTIVE;
4561		} else
4562			ASSERT(adp->a_sf_dev.mdev_major == 0);
4563
4564		MAN_DBG(MAN_SWITCH, ("man_iswitch: switch to dev:"));
4565		MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_st_dev));
4566
4567		ASSERT(adp->a_st_dev.mdev_major != 0);
4568
4569		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4570		    adp->a_st_dev.mdev_ppa);
4571
4572		ASSERT(mp != NULL);
4573
4574		mp->mp_device.mdev_state |= MDEV_ACTIVE;
4575	}
4576
4577	/*
4578	 * Decrement manp reference count and hand back work request if
4579	 * needed.
4580	 */
4581	manp->man_refcnt--;
4582
4583	if (switch_ok) {
4584		wp->mw_type = MAN_WORK_CLOSE;
4585		man_work_add(man_bwork_q, wp);
4586	}
4587
4588	mutex_exit(&man_lock);
4589
4590	return (switch_ok);
4591}
4592
4593/*
4594 * Find the destination in the upper stream that we just switched.
4595 */
4596man_dest_t *
4597man_switch_match(man_dest_t *sdp, int pg_id, void *sid)
4598{
4599	man_dest_t	*mdp = NULL;
4600	manstr_t	*msp;
4601
4602	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4603		/*
4604		 * Check if upper stream closed, or detached.
4605		 */
4606		if (msp != sdp->md_msp)
4607			continue;
4608
4609		if (msp->ms_dests == NULL)
4610			break;
4611
4612		mdp = &msp->ms_dests[pg_id];
4613
4614		/*
4615		 * Upper stream detached and reattached while we were
4616		 * switching.
4617		 */
4618		if (mdp->md_switch_id != sid) {
4619			mdp = NULL;
4620			break;
4621		}
4622	}
4623
4624	return (mdp);
4625}
4626
4627/*
4628 * bg_thread cant complete the switch for some reason. (Re)start the
4629 * linkcheck timer again.
4630 */
4631static void
4632man_ifail_dest(man_dest_t *mdp)
4633{
4634	ASSERT(mdp->md_lc_timer_id == 0);
4635	ASSERT(mdp->md_bc_id == 0);
4636	ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4637
4638	MAN_DBG(MAN_SWITCH, ("man_ifail_dest"));
4639	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
4640
4641	mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4642	mdp->md_linkstate = MAN_LINKFAIL;
4643
4644	/*
4645	 * If we have not yet initialized link, or the upper stream is
4646	 * DL_IDLE, restart the linktimer.
4647	 */
4648	if ((mdp->md_state & MAN_DSTATE_INITIALIZING) ||
4649	    ((mdp->md_msp->ms_sap == ETHERTYPE_IPV6 ||
4650	    mdp->md_msp->ms_sap == ETHERTYPE_IP) &&
4651	    mdp->md_msp->ms_dlpistate == DL_IDLE)) {
4652
4653		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
4654		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4655	}
4656
4657}
4658
4659/*
4660 * Arrange to replay all of ms_dl_mp on the new lower stream to get it
4661 * in sync with the upper stream. Note that this includes setting the
4662 * physical address.
4663 *
4664 * Called from qtimeout with inner perimeter lock.
4665 */
4666static void
4667man_reset_dlpi(void *argp)
4668{
4669	man_dest_t	*mdp = (man_dest_t *)argp;
4670	manstr_t	*msp;
4671	mblk_t		*mp;
4672	mblk_t		*rmp = NULL;
4673	mblk_t		*tmp;
4674
4675	mdp->md_lc_timer_id = 0;
4676
4677	if (mdp->md_state != MAN_DSTATE_READY) {
4678		MAN_DBG(MAN_DLPI, ("man_reset_dlpi: not ready!"));
4679		return;
4680	}
4681
4682	msp = mdp->md_msp;
4683
4684	rmp = man_dup_mplist(msp->ms_dl_mp);
4685	if (rmp == NULL)
4686		goto fail;
4687
4688	/*
4689	 * Send down an unbind and detach request, just to clean things
4690	 * out, we ignore ERROR_ACKs for unbind and detach in man_lrsrv.
4691	 */
4692	tmp = man_alloc_ubreq_dreq();
4693	if (tmp == NULL) {
4694		goto fail;
4695	}
4696	mp = tmp;
4697	while (mp->b_next != NULL)
4698		mp = mp->b_next;
4699	mp->b_next = rmp;
4700	rmp = tmp;
4701
4702	man_dlpi_replay(mdp, rmp);
4703
4704	return;
4705
4706fail:
4707
4708	while (rmp) {
4709		mp = rmp;
4710		rmp = rmp->b_next;
4711		mp->b_next = mp->b_prev = NULL;
4712		freemsg(mp);
4713	}
4714
4715	ASSERT(mdp->md_lc_timer_id == 0);
4716	ASSERT(mdp->md_bc_id == 0);
4717
4718	/*
4719	 * If low on memory, try again later. I Could use qbufcall, but that
4720	 * could fail and I would have to try and recover from that w/
4721	 * qtimeout anyway.
4722	 */
4723	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_reset_dlpi,
4724	    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4725}
4726
4727/*
4728 * Once we receive acknowledgement that DL_ATTACH_REQ was successful,
4729 * we can send down the DL_* related IOCTLs (e.g. DL_IOC_HDR). If we
4730 * try and send them downsteam w/o waiting, the ioctl's get processed before
4731 * the ATTACH_REQ and they are rejected. TBD - could just do the lower
4732 * dlpi state change in lock step. TBD
4733 */
4734static int
4735man_dlioc_replay(man_dest_t *mdp)
4736{
4737	mblk_t		*rmp;
4738	int		status = 1;
4739
4740	if (mdp->md_msp->ms_dlioc_mp == NULL)
4741		goto exit;
4742
4743	rmp = man_dup_mplist(mdp->md_msp->ms_dlioc_mp);
4744	if (rmp == NULL) {
4745		status = 0;
4746		goto exit;
4747	}
4748
4749	man_dlpi_replay(mdp, rmp);
4750exit:
4751	return (status);
4752}
4753
4754static mblk_t *
4755man_alloc_ubreq_dreq()
4756{
4757	mblk_t			*dreq;
4758	mblk_t			*ubreq = NULL;
4759	union DL_primitives	*dlp;
4760
4761	dreq = allocb(DL_DETACH_REQ_SIZE, BPRI_MED);
4762	if (dreq == NULL)
4763		goto exit;
4764
4765	dreq->b_datap->db_type = M_PROTO;
4766	dlp = (union DL_primitives *)dreq->b_rptr;
4767	dlp->dl_primitive = DL_DETACH_REQ;
4768	dreq->b_wptr += DL_DETACH_REQ_SIZE;
4769
4770	ubreq = allocb(DL_UNBIND_REQ_SIZE, BPRI_MED);
4771	if (ubreq == NULL) {
4772		freemsg(dreq);
4773		goto exit;
4774	}
4775
4776	ubreq->b_datap->db_type = M_PROTO;
4777	dlp = (union DL_primitives *)ubreq->b_rptr;
4778	dlp->dl_primitive = DL_UNBIND_REQ;
4779	ubreq->b_wptr += DL_UNBIND_REQ_SIZE;
4780
4781	ubreq->b_next = dreq;
4782
4783exit:
4784
4785	return (ubreq);
4786}
4787
4788static mblk_t *
4789man_dup_mplist(mblk_t *mp)
4790{
4791	mblk_t	*listp = NULL;
4792	mblk_t	*tailp = NULL;
4793
4794	for (; mp != NULL; mp = mp->b_next) {
4795
4796		mblk_t	*nmp;
4797		mblk_t	*prev;
4798		mblk_t	*next;
4799
4800		prev = mp->b_prev;
4801		next = mp->b_next;
4802		mp->b_prev = mp->b_next = NULL;
4803
4804		nmp = copymsg(mp);
4805
4806		mp->b_prev = prev;
4807		mp->b_next = next;
4808
4809		if (nmp == NULL)
4810			goto nomem;
4811
4812		if (listp == NULL) {
4813			listp = tailp = nmp;
4814		} else {
4815			tailp->b_next = nmp;
4816			tailp = nmp;
4817		}
4818	}
4819
4820	return (listp);
4821nomem:
4822
4823	while (listp) {
4824		mp = listp;
4825		listp = mp->b_next;
4826		mp->b_next = mp->b_prev = NULL;
4827		freemsg(mp);
4828	}
4829
4830	return (NULL);
4831
4832}
4833
4834static mblk_t *
4835man_alloc_physreq_mp(eaddr_t *man_eap)
4836{
4837
4838	mblk_t			*mp;
4839	union DL_primitives	*dlp;
4840	t_uscalar_t		off;
4841	eaddr_t			*eap;
4842
4843	mp = allocb(DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL, BPRI_MED);
4844	if (mp == NULL)
4845		goto exit;
4846
4847	mp->b_datap->db_type = M_PROTO;
4848	dlp = (union DL_primitives *)mp->b_wptr;
4849	dlp->set_physaddr_req.dl_primitive = DL_SET_PHYS_ADDR_REQ;
4850	dlp->set_physaddr_req.dl_addr_length = ETHERADDRL;
4851	off = DL_SET_PHYS_ADDR_REQ_SIZE;
4852	dlp->set_physaddr_req.dl_addr_offset =  off;
4853	mp->b_wptr += DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL;
4854
4855	eap = (eaddr_t *)(mp->b_rptr + off);
4856	ether_copy(man_eap, eap);
4857
4858exit:
4859	MAN_DBG(MAN_DLPI, ("man_alloc_physreq: physaddr %s\n",
4860	    ether_sprintf(eap)));
4861
4862	return (mp);
4863}
4864
4865/*
4866 * A new path in a pathgroup has become active for the first time. Setup
4867 * the lower destinations in prepartion for man_pg_activate to call
4868 * man_autoswitch.
4869 */
4870static void
4871man_add_dests(man_pg_t *mpg)
4872{
4873	manstr_t	*msp;
4874	man_dest_t	*mdp;
4875
4876	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4877
4878		if (!man_str_uses_pg(msp, mpg))
4879			continue;
4880
4881		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4882
4883/*
4884 * TBD - Take out
4885 *		ASSERT(mdp->md_device.mdev_state == MDEV_UNASSIGNED);
4886 *		ASSERT(mdp->md_state == MAN_DSTATE_NOTPRESENT);
4887 */
4888		if (mdp->md_device.mdev_state != MDEV_UNASSIGNED) {
4889			cmn_err(CE_NOTE, "man_add_dests mdev !unassigned");
4890			MAN_DBGCALL(MAN_PATH, man_print_mdp(mdp));
4891		}
4892
4893		man_start_dest(mdp, msp, mpg);
4894	}
4895
4896}
4897
4898static int
4899man_remove_dests(man_pg_t *mpg)
4900{
4901	manstr_t	*msp;
4902	int		close_cnt = 0;
4903	man_dest_t	*cdp;
4904	man_dest_t	*mdp;
4905	man_dest_t	*tdp;
4906	man_work_t	*wp;
4907	mblk_t		*mp;
4908	int		status = 0;
4909
4910	wp = man_work_alloc(MAN_WORK_CLOSE, KM_NOSLEEP);
4911	if (wp == NULL) {
4912		status = ENOMEM;
4913		goto exit;
4914	}
4915
4916	/*
4917	 * Count up number of destinations we need to close.
4918	 */
4919	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4920		if (!man_str_uses_pg(msp, mpg))
4921			continue;
4922
4923		close_cnt++;
4924	}
4925
4926	if (close_cnt == 0)
4927		goto exit;
4928
4929	cdp = man_kzalloc(sizeof (man_dest_t) * close_cnt, KM_NOSLEEP);
4930	if (cdp == NULL) {
4931		status = ENOMEM;
4932		man_work_free(wp);
4933		goto exit;
4934	}
4935
4936	tdp = cdp;
4937	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4938		if (!man_str_uses_pg(msp, mpg))
4939			continue;
4940
4941		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4942
4943		mdp->md_state |= MAN_DSTATE_CLOSING;
4944		mdp->md_device.mdev_state = MDEV_UNASSIGNED;
4945		mdp->md_msp = NULL;
4946		mdp->md_rq = NULL;
4947
4948		/*
4949		 * Clean up optimized destination pointer if we are
4950		 * closing it.
4951		 */
4952		man_set_optimized_dest(msp);
4953
4954		if (mdp->md_lc_timer_id != 0) {
4955			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4956			mdp->md_lc_timer_id = 0;
4957		}
4958		if (mdp->md_bc_id != 0) {
4959			qunbufcall(man_ctl_wq, mdp->md_bc_id);
4960			mdp->md_bc_id = 0;
4961		}
4962
4963		mutex_enter(&mdp->md_lock);
4964		while ((mp = mdp->md_dmp_head) != NULL) {
4965			mdp->md_dmp_head = mp->b_next;
4966			mp->b_next = NULL;
4967			freemsg(mp);
4968		}
4969		mdp->md_dmp_count = 0;
4970		mdp->md_dmp_tail = NULL;
4971		mutex_exit(&mdp->md_lock);
4972
4973		*tdp++ = *mdp;
4974
4975		mdp->md_state = MAN_DSTATE_NOTPRESENT;
4976		mdp->md_muxid = -1;
4977	}
4978
4979	wp->mw_arg.a_mdp = cdp;
4980	wp->mw_arg.a_ndests = close_cnt;
4981	man_work_add(man_bwork_q, wp);
4982
4983exit:
4984	return (status);
4985
4986}
4987
4988/*
4989 * Returns TRUE if stream uses pathgroup, FALSE otherwise.
4990 */
4991static int
4992man_str_uses_pg(manstr_t *msp, man_pg_t *mpg)
4993{
4994	int	status;
4995
4996	status = ((msp->ms_flags & MAN_SFLAG_CONTROL)	||
4997	    (msp->ms_dests == NULL)	||
4998	    (msp->ms_manp == NULL)	||
4999	    (msp->ms_manp->man_meta_ppa != mpg->mpg_man_ppa));
5000
5001	return (!status);
5002}
5003
5004static int
5005man_gettimer(int timer, man_dest_t *mdp)
5006{
5007
5008	int attached = TRUE;
5009	int time = 0;
5010
5011	if (mdp == NULL || mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
5012		attached = FALSE;
5013
5014	switch (timer) {
5015	case MAN_TIMER_INIT:
5016		if (attached)
5017			time = mdp->md_msp->ms_manp->man_init_time;
5018		else
5019			time = MAN_INIT_TIME;
5020		break;
5021
5022	case MAN_TIMER_LINKCHECK:
5023		if (attached) {
5024			if (mdp->md_linkstate == MAN_LINKSTALE)
5025				time = mdp->md_msp->ms_manp->man_linkstale_time;
5026			else
5027				time = mdp->md_msp->ms_manp->man_linkcheck_time;
5028		} else
5029			time = MAN_LINKCHECK_TIME;
5030		break;
5031
5032	case MAN_TIMER_DLPIRESET:
5033		if (attached)
5034			time = mdp->md_msp->ms_manp->man_dlpireset_time;
5035		else
5036			time = MAN_DLPIRESET_TIME;
5037		break;
5038
5039	default:
5040		MAN_DBG(MAN_LINK, ("man_gettimer: unknown timer %d", timer));
5041		time = MAN_LINKCHECK_TIME;
5042		break;
5043	}
5044
5045	return (drv_usectohz(time));
5046}
5047
5048/*
5049 * Check the links for each active destination. Called inside inner
5050 * perimeter via qtimeout. This timer only runs on the domain side of the
5051 * driver. It should never run on the SC side.
5052 *
5053 * On a MAN_LINKGOOD link, we check/probe the link health every
5054 * MAN_LINKCHECK_TIME seconds. If the link goes MAN_LINKSTALE, the we probe
5055 * the link every MAN_LINKSTALE_TIME seconds, and fail the link after probing
5056 * the link MAN_LINKSTALE_RETRIES times.
5057 * The man_lock is held to synchronize access pathgroup list(man_pg).
5058 */
5059void
5060man_linkcheck_timer(void *argp)
5061{
5062	man_dest_t		*mdp = (man_dest_t *)argp;
5063	int			restart_timer = TRUE;
5064	int			send_ping = TRUE;
5065	int			newstate;
5066	int			oldstate;
5067	man_pg_t		*mpg;
5068	man_path_t		*mp;
5069
5070	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: mdp"));
5071	MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5072
5073	/*
5074	 * Clear timeout id and check if someones waiting on us to
5075	 * complete a close.
5076	 */
5077	mdp->md_lc_timer_id = 0;
5078
5079	if (mdp->md_state == MAN_DSTATE_NOTPRESENT ||
5080	    mdp->md_state & MAN_DSTATE_BUSY) {
5081
5082		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: not ready mdp"));
5083		MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5084		goto exit;
5085	}
5086
5087	mutex_enter(&man_lock);
5088	/*
5089	 * If the lower stream needs initializing, just go straight to
5090	 * switch code. As the linkcheck timer is started for all
5091	 * SAPs, do not send ping packets during the initialization.
5092	 */
5093	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5094		send_ping = FALSE;
5095		goto do_switch;
5096	}
5097
5098	newstate = oldstate = mdp->md_linkstate;
5099
5100	if (!man_needs_linkcheck(mdp)) {
5101		cmn_err(CE_NOTE,
5102		    "man_linkcheck_timer: unneeded linkcheck on mdp(0x%p)",
5103		    (void *)mdp);
5104		mutex_exit(&man_lock);
5105		return;
5106	}
5107
5108	/*
5109	 * The above call to  man_needs_linkcheck() validates
5110	 * mdp->md_msp and mdp->md_msp->ms_manp pointers.
5111	 */
5112	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5113	ASSERT(mpg != NULL);
5114	mp = man_find_path_by_ppa(mpg->mpg_pathp, mdp->md_device.mdev_ppa);
5115	ASSERT(mp != NULL);
5116
5117	/*
5118	 * This is the most common case, when traffic is flowing.
5119	 */
5120	if (mdp->md_rcvcnt != mdp->md_lastrcvcnt) {
5121
5122		newstate = MAN_LINKGOOD;
5123		mdp->md_lastrcvcnt = mdp->md_rcvcnt;
5124		send_ping = FALSE;
5125
5126		/*
5127		 * Clear the FAILED flag and update lru.
5128		 */
5129		mp->mp_device.mdev_state &= ~MDEV_FAILED;
5130		(void) drv_getparm(TIME, &mp->mp_lru);
5131
5132		if (mdp->md_link_updown_msg == MAN_LINK_DOWN_MSG) {
5133			man_t *manp = mdp->md_msp->ms_manp;
5134
5135			cmn_err(CE_NOTE, "%s%d Link up",
5136			    ddi_major_to_name(manp->man_meta_major),
5137			    manp->man_meta_ppa);
5138
5139			mdp->md_link_updown_msg = MAN_LINK_UP_MSG;
5140		}
5141
5142		goto done;
5143	}
5144
5145	/*
5146	 * If we're here, it means we have not seen any traffic
5147	 */
5148	switch (oldstate) {
5149	case MAN_LINKINIT:
5150	case MAN_LINKGOOD:
5151		newstate = MAN_LINKSTALE;
5152		mdp->md_linkstales++;
5153		mdp->md_linkstale_retries =
5154		    mdp->md_msp->ms_manp->man_linkstale_retries;
5155		break;
5156
5157	case MAN_LINKSTALE:
5158	case MAN_LINKFAIL:
5159		mdp->md_linkstales++;
5160		mdp->md_linkstale_retries--;
5161		if (mdp->md_linkstale_retries < 0) {
5162			newstate = MAN_LINKFAIL;
5163			mdp->md_linkfails++;
5164			mdp->md_linkstale_retries =
5165			    mdp->md_msp->ms_manp->man_linkstale_retries;
5166			/*
5167			 * Mark the destination as FAILED and
5168			 * update lru.
5169			 */
5170			if (oldstate != MAN_LINKFAIL) {
5171				mp->mp_device.mdev_state |= MDEV_FAILED;
5172				(void) drv_getparm(TIME, &mp->mp_lru);
5173			}
5174		}
5175		break;
5176
5177	default:
5178		cmn_err(CE_WARN, "man_linkcheck_timer: illegal link"
5179		    " state %d", oldstate);
5180		break;
5181	}
5182done:
5183
5184	if (oldstate != newstate) {
5185
5186		MAN_DBG(MAN_LINK, ("man_linkcheck_timer"
5187		    " link state %s -> %s", lss[oldstate],
5188		    lss[newstate]));
5189
5190		mdp->md_linkstate = newstate;
5191	}
5192
5193	/*
5194	 * Do any work required from state transitions above.
5195	 */
5196	if (newstate == MAN_LINKFAIL) {
5197do_switch:
5198		if (!man_do_autoswitch(mdp)) {
5199			/*
5200			 * Stop linkcheck timer until switch completes.
5201			 */
5202			restart_timer = FALSE;
5203			send_ping = FALSE;
5204		}
5205	}
5206
5207	mutex_exit(&man_lock);
5208	if (send_ping)
5209		man_do_icmp_bcast(mdp, mdp->md_msp->ms_sap);
5210
5211	if (restart_timer)
5212		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
5213		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
5214
5215exit:
5216	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: returns"));
5217
5218}
5219
5220/*
5221 * Handle linkcheck initiated autoswitching.
5222 * Called with man_lock held.
5223 */
5224static int
5225man_do_autoswitch(man_dest_t *mdp)
5226{
5227	man_pg_t	*mpg;
5228	man_path_t	*ap;
5229	int		status = 0;
5230
5231	ASSERT(MUTEX_HELD(&man_lock));
5232	/*
5233	 * Set flags and refcnt. Cleared in man_iswitch when SWITCH completes.
5234	 */
5235	mdp->md_msp->ms_manp->man_refcnt++;
5236
5237	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5238	ASSERT(mpg);
5239
5240	if (mpg->mpg_flags & MAN_PG_SWITCHING)
5241		return (EBUSY);
5242
5243	mpg->mpg_flags |= MAN_PG_SWITCHING;
5244
5245	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5246		/*
5247		 * We're initializing, ask for a switch to our currently
5248		 * active device.
5249		 */
5250		status = man_autoswitch(mpg, &mdp->md_device, NULL);
5251	} else {
5252
5253		if (mdp->md_msp != NULL && mdp->md_msp->ms_manp != NULL &&
5254		    mdp->md_link_updown_msg == MAN_LINK_UP_MSG) {
5255
5256			man_t *manp = mdp->md_msp->ms_manp;
5257
5258			cmn_err(CE_NOTE, "%s%d Link down",
5259			    ddi_major_to_name(manp->man_meta_major),
5260			    manp->man_meta_ppa);
5261		}
5262		mdp->md_link_updown_msg = MAN_LINK_DOWN_MSG;
5263
5264		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: link failure on %s%d",
5265		    ddi_major_to_name(mdp->md_device.mdev_major),
5266		    mdp->md_device.mdev_ppa));
5267
5268		ap = man_find_alternate_path(mpg->mpg_pathp);
5269
5270		if (ap == NULL) {
5271			status = ENODEV;
5272			goto exit;
5273		}
5274		status = man_autoswitch(mpg, &ap->mp_device, NULL);
5275	}
5276exit:
5277	if (status != 0) {
5278		/*
5279		 * man_iswitch not going to run, clean up.
5280		 */
5281		mpg->mpg_flags &= ~MAN_PG_SWITCHING;
5282		mdp->md_msp->ms_manp->man_refcnt--;
5283	}
5284
5285	return (status);
5286}
5287
5288/*
5289 * Gather up all lower multiplexor streams that have this link open and
5290 * try to switch them. Called from inner perimeter and holding man_lock.
5291 *
5292 *	pg_id		- Pathgroup to do switch for.
5293 *	st_devp		- New device to switch to.
5294 *	wait_for_switch	- whether or not to qwait for completion.
5295 */
5296static int
5297man_autoswitch(man_pg_t *mpg, man_dev_t *st_devp, man_work_t *waiter_wp)
5298{
5299	man_work_t	*wp;
5300	int		sdp_cnt = 0;
5301	man_dest_t	*sdp;
5302	int		status = 0;
5303
5304	ASSERT(MUTEX_HELD(&man_lock));
5305	if (waiter_wp == NULL) {
5306		wp = man_work_alloc(MAN_WORK_SWITCH, KM_NOSLEEP);
5307		if (wp == NULL) {
5308			status = ENOMEM;
5309			goto exit;
5310		}
5311	} else {
5312		ASSERT(waiter_wp->mw_type == MAN_WORK_SWITCH);
5313		wp = waiter_wp;
5314	}
5315
5316	/*
5317	 * Set dests as PLUMBING, cancel timers and return array of dests
5318	 * that need a switch.
5319	 */
5320	status = man_prep_dests_for_switch(mpg, &sdp, &sdp_cnt);
5321	if (status) {
5322		if (waiter_wp == NULL)
5323			man_work_free(wp);
5324		goto exit;
5325	}
5326
5327	/*
5328	 * If no streams are active, there are no streams to switch.
5329	 * Return ENODEV (see man_pg_activate).
5330	 */
5331	if (sdp_cnt == 0) {
5332		if (waiter_wp == NULL)
5333			man_work_free(wp);
5334		status = ENODEV;
5335		goto exit;
5336	}
5337
5338	/*
5339	 * Ask the bgthread to switch. See man_bwork.
5340	 */
5341	wp->mw_arg.a_sf_dev = sdp->md_device;
5342	wp->mw_arg.a_st_dev = *st_devp;
5343	wp->mw_arg.a_pg_id = mpg->mpg_pg_id;
5344	wp->mw_arg.a_man_ppa = mpg->mpg_man_ppa;
5345
5346	wp->mw_arg.a_mdp = sdp;
5347	wp->mw_arg.a_ndests = sdp_cnt;
5348	man_work_add(man_bwork_q, wp);
5349
5350exit:
5351
5352	return (status);
5353}
5354
5355/*
5356 * If an alternate path exists for pathgroup, arrange for switch to
5357 * happen. Note that we need to switch each of msp->dests[pg_id], for
5358 * all on man_strup. We must:
5359 *
5360 *		Cancel any timers
5361 *		Mark dests as PLUMBING
5362 *		Submit switch request to man_bwork_q->
5363 */
5364static int
5365man_prep_dests_for_switch(man_pg_t *mpg, man_dest_t **mdpp, int *cntp)
5366{
5367	manstr_t	*msp;
5368	man_dest_t	*mdp;
5369	int		sdp_cnt = 0;
5370	man_dest_t	*sdp = NULL;
5371	man_dest_t	*tdp;
5372	int		status = 0;
5373
5374	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: pg_id %d",
5375	    mpg->mpg_pg_id));
5376
5377	/*
5378	 * Count up number of streams, there is one destination that needs
5379	 * switching per stream.
5380	 */
5381	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5382		if (man_str_uses_pg(msp, mpg))
5383			sdp_cnt++;
5384	}
5385
5386	if (sdp_cnt == 0)
5387		goto exit;
5388
5389	sdp = man_kzalloc(sizeof (man_dest_t) * sdp_cnt, KM_NOSLEEP);
5390	if (sdp == NULL) {
5391		status = ENOMEM;
5392		goto exit;
5393	}
5394	tdp = sdp;
5395	/*
5396	 * Mark each destination as unusable.
5397	 */
5398	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5399		if (man_str_uses_pg(msp, mpg)) {
5400
5401			/*
5402			 * Mark destination as plumbing and store the
5403			 * address of sdp as a way to identify the
5404			 * SWITCH request when it comes back (see man_iswitch).
5405			 */
5406			mdp = &msp->ms_dests[mpg->mpg_pg_id];
5407			mdp->md_state |= MAN_DSTATE_PLUMBING;
5408			mdp->md_switch_id = sdp;
5409
5410			/*
5411			 * Copy destination info.
5412			 */
5413			bcopy(mdp, tdp, sizeof (man_dest_t));
5414			tdp++;
5415
5416			/*
5417			 * Cancel timers.
5418			 */
5419			if (mdp->md_lc_timer_id) {
5420				(void) quntimeout(man_ctl_wq,
5421				    mdp->md_lc_timer_id);
5422				mdp->md_lc_timer_id = 0;
5423			}
5424			if (mdp->md_bc_id) {
5425				qunbufcall(man_ctl_wq, mdp->md_bc_id);
5426				mdp->md_bc_id = 0;
5427			}
5428		}
5429	}
5430
5431	*mdpp = sdp;
5432	*cntp = sdp_cnt;
5433	status = 0;
5434exit:
5435
5436	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: returns %d"
5437	    " sdp(0x%p) sdp_cnt(%d)", status, (void *)sdp, sdp_cnt));
5438
5439	return (status);
5440
5441}
5442
5443/*
5444 * The code below generates an ICMP echo packet and sends it to the
5445 * broadcast address in the hopes that the other end will respond
5446 * and the man_linkcheck_timer logic will see the traffic.
5447 *
5448 * This assumes ethernet-like media.
5449 */
5450/*
5451 * Generate an ICMP packet. Called exclusive inner perimeter.
5452 *
5453 *	mdp - destination to send packet to.
5454 *	sap - either ETHERTYPE_ARP or ETHERTYPE_IPV6
5455 */
5456static void
5457man_do_icmp_bcast(man_dest_t *mdp, t_uscalar_t sap)
5458{
5459	mblk_t			*mp = NULL;
5460
5461	/* TBD - merge pinger and this routine. */
5462
5463	ASSERT(sap == ETHERTYPE_IPV6 || sap == ETHERTYPE_IP);
5464
5465	if (sap == ETHERTYPE_IPV6) {
5466		mdp->md_icmpv6probes++;
5467	} else {
5468		mdp->md_icmpv4probes++;
5469	}
5470	/*
5471	 * Send the ICMP message
5472	 */
5473	mp = man_pinger(sap);
5474
5475	MAN_DBG(MAN_LINK, ("man_do_icmp_bcast: sap=0x%x mp=0x%p",
5476	    sap, (void *)mp));
5477	if (mp == NULL)
5478		return;
5479
5480	/*
5481	 * Send it out.
5482	 */
5483	if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
5484
5485		MAN_DBG(MAN_LINK, ("man_do_icmp_broadcast: xmit failed"));
5486
5487		freemsg(mp);
5488	}
5489
5490}
5491
5492static mblk_t *
5493man_pinger(t_uscalar_t sap)
5494{
5495	mblk_t		*mp = NULL;
5496	man_dladdr_t	dlsap;
5497	icmph_t		*icmph;
5498	int		ipver;
5499	ipha_t		*ipha;
5500	ip6_t		*ip6h;
5501	int		iph_hdr_len;
5502	int		datalen = 64;
5503	uchar_t		*datap;
5504	uint16_t	size;
5505	uchar_t		i;
5506
5507	dlsap.dl_sap = htons(sap);
5508	bcopy(&etherbroadcast, &dlsap.dl_phys, sizeof (dlsap.dl_phys));
5509
5510	if (sap == ETHERTYPE_IPV6) {
5511		ipver = IPV6_VERSION;
5512		iph_hdr_len = sizeof (ip6_t);
5513		size = ICMP6_MINLEN;
5514	} else {
5515		ipver = IPV4_VERSION;
5516		iph_hdr_len = sizeof (ipha_t);
5517		size = ICMPH_SIZE;
5518	}
5519	size += (uint16_t)iph_hdr_len;
5520	size += datalen;
5521
5522	mp = man_alloc_udreq(size, &dlsap);
5523	if (mp == NULL)
5524		goto exit;
5525
5526	/*
5527	 * fill out the ICMP echo packet headers
5528	 */
5529	mp->b_cont->b_wptr += iph_hdr_len;
5530	if (ipver == IPV4_VERSION) {
5531		ipha = (ipha_t *)mp->b_cont->b_rptr;
5532		ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
5533		    | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
5534		ipha->ipha_type_of_service = 0;
5535		ipha->ipha_length = size;
5536		ipha->ipha_fragment_offset_and_flags = IPH_DF;
5537		ipha->ipha_ttl = 1;
5538		ipha->ipha_protocol = IPPROTO_ICMP;
5539		if (man_is_on_domain) {
5540			manc_t		manc;
5541
5542			if (man_get_iosram(&manc)) {
5543				freemsg(mp);
5544				mp = NULL;
5545				goto exit;
5546			}
5547
5548			/*
5549			 * Domain generates ping packets for domain to
5550			 * SC network (dman0 <--> scman0).
5551			 */
5552			ipha->ipha_dst = manc.manc_sc_ipaddr;
5553			ipha->ipha_src = manc.manc_dom_ipaddr;
5554		} else {
5555			/*
5556			 * Note that ping packets are only generated
5557			 * by the SC across scman1 (SC to SC network).
5558			 */
5559			ipha->ipha_dst = man_sc_ipaddrs.ip_other_sc_ipaddr;
5560			ipha->ipha_src = man_sc_ipaddrs.ip_my_sc_ipaddr;
5561		}
5562
5563		ipha->ipha_ident = 0;
5564
5565		ipha->ipha_hdr_checksum = 0;
5566		ipha->ipha_hdr_checksum = IP_CSUM(mp->b_cont, 0, 0);
5567
5568	} else {
5569		ip6h = (ip6_t *)mp->b_cont->b_rptr;
5570		/*
5571		 * IP version = 6, priority = 0, flow = 0
5572		 */
5573		ip6h->ip6_flow = (IPV6_VERSION << 28);
5574		ip6h->ip6_plen =
5575		    htons((short)(size - iph_hdr_len));
5576		ip6h->ip6_nxt = IPPROTO_ICMPV6;
5577		ip6h->ip6_hlim = 1;	/* stay on link */
5578
5579		if (man_is_on_domain) {
5580			manc_t		manc;
5581
5582			if (man_get_iosram(&manc)) {
5583				freemsg(mp);
5584				mp = NULL;
5585				goto exit;
5586			}
5587
5588			/*
5589			 * Domain generates ping packets for domain to
5590			 * SC network (dman0 <--> scman0).
5591			 */
5592			ip6h->ip6_src = manc.manc_dom_ipv6addr;
5593			ip6h->ip6_dst = manc.manc_sc_ipv6addr;
5594		} else {
5595			/*
5596			 * Note that ping packets are only generated
5597			 * by the SC across scman1 (SC to SC network).
5598			 */
5599			ip6h->ip6_src = man_sc_ip6addrs.ip6_my_sc_ipaddr;
5600			ip6h->ip6_dst = man_sc_ip6addrs.ip6_other_sc_ipaddr;
5601		}
5602	}
5603
5604	/*
5605	 * IPv6 and IP are the same for ICMP as far as I'm concerned.
5606	 */
5607	icmph = (icmph_t *)mp->b_cont->b_wptr;
5608	if (ipver == IPV4_VERSION) {
5609		mp->b_cont->b_wptr += ICMPH_SIZE;
5610		icmph->icmph_type = ICMP_ECHO_REQUEST;
5611		icmph->icmph_code = 0;
5612	} else {
5613		mp->b_cont->b_wptr += ICMP6_MINLEN;
5614		icmph->icmph_type = ICMP6_ECHO_REQUEST;
5615		icmph->icmph_code = 0;
5616	}
5617
5618	datap = mp->b_cont->b_wptr;
5619	mp->b_cont->b_wptr += datalen;
5620
5621	for (i = 0; i < datalen; i++)
5622		*datap++ = i;
5623
5624	if (ipver == IPV4_VERSION) {
5625		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len, 0);
5626	} else {
5627		uint32_t	sum;
5628
5629		sum = htons(IPPROTO_ICMPV6) + ip6h->ip6_plen;
5630		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len - 32,
5631		    (sum & 0xffff) + (sum >> 16));
5632	}
5633
5634/*
5635 * TBD
5636 *	icp->icmp_time =  ???;
5637 */
5638
5639exit:
5640	return (mp);
5641}
5642
5643static mblk_t *
5644man_alloc_udreq(int size, man_dladdr_t *dlsap)
5645{
5646	dl_unitdata_req_t	*udreq;
5647	mblk_t			*bp;
5648	mblk_t			*mp;
5649
5650	mp = allocb(sizeof (dl_unitdata_req_t) + sizeof (*dlsap), BPRI_MED);
5651
5652	if (mp == NULL) {
5653		cmn_err(CE_NOTE, "man_preparepkt: allocb failed");
5654		return (NULL);
5655	}
5656
5657	if ((bp = allocb(size, BPRI_MED)) == NULL) {
5658		freemsg(mp);
5659		cmn_err(CE_NOTE, "man_preparepkts: allocb failed");
5660		return (NULL);
5661	}
5662	bzero(bp->b_rptr, size);
5663
5664	mp->b_cont = bp;
5665	mp->b_datap->db_type = M_PROTO;
5666	udreq = (dl_unitdata_req_t *)mp->b_wptr;
5667	mp->b_wptr += sizeof (dl_unitdata_req_t);
5668
5669	/*
5670	 * phys addr first - TBD
5671	 */
5672	bcopy((char *)dlsap, mp->b_wptr, sizeof (*dlsap));
5673	mp->b_wptr += sizeof (*dlsap);
5674
5675	udreq->dl_primitive = DL_UNITDATA_REQ;
5676	udreq->dl_dest_addr_length = sizeof (*dlsap);
5677	udreq->dl_dest_addr_offset = sizeof (*udreq);
5678	udreq->dl_priority.dl_min = 0;
5679	udreq->dl_priority.dl_max = 0;
5680
5681	return (mp);
5682}
5683
5684
5685/*
5686 * The routines in this file are executed by the MAN background thread,
5687 * which executes outside of the STREAMS framework (see man_str.c). It is
5688 * allowed to do the things required to modify the STREAMS driver (things
5689 * that are normally done from a user process). These routines do things like
5690 * open and close drivers, PLINK and PUNLINK streams to/from the multiplexor,
5691 * etc.
5692 *
5693 * The mechanism of communication between the STREAMS portion of the driver
5694 * and the background thread portion are two work queues, man_bwork_q
5695 * and man_iwork_q (background work q and streams work q).  Work
5696 * requests are placed on those queues when one half of the driver wants
5697 * the other half to do some work for it.
5698 *
5699 * The MAN background thread executes the man_bwork routine. Its sole
5700 * job is to process work requests placed on this work q. The MAN upper
5701 * write service routine is responsible for processing work requests posted
5702 * to the man_iwork_q->
5703 *
5704 * Both work queues are protected by the global mutex man_lock. The
5705 * man_bwork is signalged via the condvarman_bwork_q->q_cv. The man_uwsrv
5706 * routine is signaled by calling qenable (forcing man_uwsrv to run).
5707 */
5708
5709/*
5710 * man_bwork - Work thread for this device.  It is responsible for
5711 * performing operations which can't occur within the STREAMS framework.
5712 *
5713 * Locking:
5714 *	- Called holding no locks
5715 *	- Obtains the global mutex man_lock to remove work from
5716 *	  man_bwork_q, and post work to man_iwork_q->
5717 *	- Note that we do not want to hold any locks when making
5718 *	  any ldi_ calls.
5719 */
5720void
5721man_bwork()
5722{
5723	man_work_t	*wp;
5724	int		done = 0;
5725	callb_cpr_t	cprinfo;
5726	int		wp_finished;
5727
5728	CALLB_CPR_INIT(&cprinfo, &man_lock, callb_generic_cpr,
5729	    "mn_work_thrd");
5730
5731	MAN_DBG(MAN_CONFIG, ("man_bwork: enter"));
5732
5733	while (done == 0) {
5734
5735		mutex_enter(&man_lock);
5736		/*
5737		 * While there is nothing to do, sit in cv_wait.  If work
5738		 * request is made, requester will signal.
5739		 */
5740		while (man_bwork_q->q_work == NULL) {
5741
5742			CALLB_CPR_SAFE_BEGIN(&cprinfo);
5743
5744			cv_wait(&man_bwork_q->q_cv, &man_lock);
5745
5746			CALLB_CPR_SAFE_END(&cprinfo, &man_lock);
5747		}
5748
5749		wp = man_bwork_q->q_work;
5750		man_bwork_q->q_work = wp->mw_next;
5751		wp->mw_next = NULL;
5752		mutex_exit(&man_lock);
5753
5754		wp_finished = TRUE;
5755
5756		MAN_DBG(MAN_SWITCH, ("man_bwork: type %s",
5757		    _mw_type[wp->mw_type]));
5758
5759		switch (wp->mw_type) {
5760		case MAN_WORK_OPEN_CTL:
5761			wp->mw_status = man_open_ctl();
5762			break;
5763
5764		case MAN_WORK_CLOSE_CTL:
5765			man_close_ctl();
5766			break;
5767
5768		case MAN_WORK_CLOSE:
5769		case MAN_WORK_CLOSE_STREAM:
5770			man_bclose(&wp->mw_arg);
5771			break;
5772
5773		case MAN_WORK_SWITCH:
5774			man_bswitch(&wp->mw_arg, wp);
5775			wp_finished = FALSE;
5776			break;
5777
5778		case MAN_WORK_STOP:		/* man_bwork_stop() */
5779			done = 1;
5780			mutex_enter(&man_lock);
5781			CALLB_CPR_EXIT(&cprinfo); /* Unlocks man_lock */
5782			break;
5783
5784		default:
5785			cmn_err(CE_WARN, "man_bwork: "
5786			    "illegal work type(%d)", wp->mw_type);
5787			break;
5788		}
5789
5790		mutex_enter(&man_lock);
5791
5792		if (wp_finished) {
5793			wp->mw_flags |= MAN_WFLAGS_DONE;
5794			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
5795				cv_signal(&wp->mw_cv);
5796			else if (wp->mw_flags & MAN_WFLAGS_QWAITER)
5797				qenable(wp->mw_q);
5798			else
5799				man_work_free(wp);
5800		}
5801
5802		mutex_exit(&man_lock);
5803	}
5804
5805	MAN_DBG(MAN_CONFIG, ("man_bwork: thread_exit"));
5806
5807	mutex_enter(&man_lock);
5808	man_bwork_id = NULL;
5809	mutex_exit(&man_lock);
5810
5811	thread_exit();
5812}
5813
5814/*
5815 * man_open_ctl - Open the control stream.
5816 *
5817 *	returns	- success - 0
5818 *		- failure - errno code
5819 *
5820 * Mutex Locking Notes:
5821 *	We need a way to keep the CLONE_OPEN qwaiters in man_open from
5822 *	checking the man_config variables after the ldi_open call below
5823 *	returns from man_open, leaving the inner perimeter. So, we use the
5824 *	man_lock to synchronize the threads in man_open_ctl and man_open.  We
5825 *	hold man_lock across this call into man_open, which in general is a
5826 *	no-no. But, the STREAMs portion of the driver (other than open)
5827 *	doesn't use it. So, if ldi_open gets hijacked to run any part of
5828 *	the MAN streams driver, it wont end up recursively trying to acquire
5829 *	man_lock. Note that the non-CLONE_OPEN portion of man_open doesnt
5830 *	acquire it either, so again no recursive mutex.
5831 */
5832static int
5833man_open_ctl()
5834{
5835	int		status = 0;
5836	ldi_handle_t	ctl_lh = NULL;
5837	ldi_ident_t	li = NULL;
5838
5839	MAN_DBG(MAN_CONFIG, ("man_open_ctl: plumbing control stream\n"));
5840
5841	/*
5842	 * Get eri driver loaded and kstats initialized. Is there a better
5843	 * way to do this? - TBD.
5844	 */
5845	status = ldi_ident_from_mod(&modlinkage, &li);
5846	if (status) {
5847		cmn_err(CE_WARN,
5848		    "man_open_ctl: ident alloc failed, error %d", status);
5849		goto exit;
5850	}
5851
5852	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
5853	    kcred, &ctl_lh, li);
5854	if (status) {
5855		cmn_err(CE_WARN,
5856		    "man_open_ctl: eri open failed, error %d", status);
5857		ctl_lh = NULL;
5858		goto exit;
5859	}
5860	(void) ldi_close(ctl_lh, NULL, kcred);
5861	ctl_lh = NULL;
5862
5863	mutex_enter(&man_lock);
5864
5865	if (man_ctl_lh != NULL) {
5866		mutex_exit(&man_lock);
5867		goto exit;
5868	}
5869
5870	ASSERT(man_ctl_wq == NULL);
5871	mutex_exit(&man_lock);
5872
5873	status = ldi_open_by_name(DMAN_INT_PATH, FREAD | FWRITE | FNOCTTY,
5874	    kcred, &ctl_lh, li);
5875	if (status) {
5876		cmn_err(CE_WARN,
5877		    "man_open_ctl: man control dev open failed, "
5878		    "error %d", status);
5879		goto exit;
5880	}
5881
5882	/*
5883	 * Update global config state. TBD - dont need lock here, since
5884	 * everyone is stuck in open until we finish. Only other modifier
5885	 * is man_deconfigure via _fini, which returns EBUSY if there is
5886	 * any open streams (other than control). Do need to signal qwaiters
5887	 * on error.
5888	 */
5889	mutex_enter(&man_lock);
5890	ASSERT(man_config_state == MAN_CONFIGURING);
5891	ASSERT(man_ctl_lh == NULL);
5892	man_ctl_lh = ctl_lh;
5893	mutex_exit(&man_lock);
5894
5895exit:
5896	if (li)
5897		ldi_ident_release(li);
5898
5899	MAN_DBG(MAN_CONFIG, ("man_open_ctl: man_ctl_lh(0x%p) errno = %d\n",
5900	    (void *)man_ctl_lh, status));
5901
5902	return (status);
5903}
5904
5905/*
5906 * man_close_ctl - Close control stream, we are about to unload driver.
5907 *
5908 * Locking:
5909 *	- Called holding no locks.
5910 */
5911static void
5912man_close_ctl()
5913{
5914	ldi_handle_t tlh;
5915
5916	MAN_DBG(MAN_CONFIG, ("man_close_ctl: unplumbing control stream\n"));
5917
5918	mutex_enter(&man_lock);
5919	if ((tlh = man_ctl_lh) != NULL)
5920		man_ctl_lh = NULL;
5921	mutex_exit(&man_lock);
5922
5923	if (tlh != NULL) {
5924		(void) ldi_close(tlh, NULL, kcred);
5925	}
5926
5927}
5928
5929/*
5930 * Close the lower streams. Get all the timers canceled, close the lower
5931 * stream and delete the dest array.
5932 *
5933 * Returns:
5934 *	0	Closed all streams.
5935 *	1	Couldn't close one or more streams, timers still running.
5936 *
5937 * Locking:
5938 *	- Called holding no locks.
5939 */
5940static void
5941man_bclose(man_adest_t *adp)
5942{
5943	int		i;
5944	man_dest_t	*mdp;
5945
5946	man_cancel_timers(adp);
5947
5948	for (i = 0; i < adp->a_ndests; i++) {
5949		mdp = &adp->a_mdp[i];
5950
5951		if (mdp->md_muxid != -1)
5952			man_unplumb(mdp);
5953	}
5954
5955	mutex_destroy(&mdp->md_lock);
5956	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
5957	adp->a_mdp = NULL;
5958}
5959
5960/*
5961 * We want to close down all lower streams. Need to wait until all
5962 * timers and work related to these lower streams is quiesced.
5963 *
5964 * Returns 1 if lower streams are quiesced, 0 if we need to wait
5965 * a bit longer.
5966 */
5967static void
5968man_cancel_timers(man_adest_t *adp)
5969{
5970	man_dest_t	*mdp;
5971	int		cnt;
5972	int		i;
5973
5974	mdp = adp->a_mdp;
5975	cnt = adp->a_ndests;
5976
5977	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: mdp(0x%p) cnt %d",
5978	    (void *)mdp, cnt));
5979
5980	for (i = 0; i < cnt; i++) {
5981
5982		if (mdp[i].md_lc_timer_id != 0) {
5983			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
5984			mdp[i].md_lc_timer_id = 0;
5985		}
5986
5987		if (mdp[i].md_bc_id != 0) {
5988			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
5989			mdp[i].md_bc_id = 0;
5990		}
5991	}
5992
5993	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: returns"));
5994}
5995
5996/*
5997 * A failover is started at start of day, when the driver detects a
5998 * link failure (see man_linkcheck_timer), or when DR detaches
5999 * the IO board containing the current active link between SC and
6000 * domain (see man_dr_detach, man_iwork, and man_do_dr_detach). A
6001 * MAN_WORK_SWITCH work request containing all the lower streams that
6002 * should be switched is posted on the man_bwork_q-> This work request is
6003 * processed here. Once all lower streams have been switched to an
6004 * alternate path, the MAN_WORK_SWITCH work request is passed back to
6005 * man_iwork_q where it is processed within the inner perimeter of the
6006 * STREAMS framework (see man_iswitch).
6007 *
6008 * Note that when the switch fails for whatever reason, we just hand
6009 * back the lower streams untouched and let another failover happen.
6010 * Hopefully we will sooner or later succeed at the failover.
6011 */
6012static void
6013man_bswitch(man_adest_t *adp, man_work_t *wp)
6014{
6015	man_dest_t	*tdp;
6016	man_t		*manp;
6017	int		i;
6018	int		status = 0;
6019
6020	/*
6021	 * Make a temporary copy of dest array, updating device to the
6022	 * alternate and try to open all lower streams. bgthread can sleep.
6023	 */
6024
6025	tdp = man_kzalloc(sizeof (man_dest_t) * adp->a_ndests,
6026	    KM_SLEEP);
6027	bcopy(adp->a_mdp, tdp, sizeof (man_dest_t) * adp->a_ndests);
6028
6029	/*
6030	 * Before we switch to the new path, lets sync the kstats.
6031	 */
6032	mutex_enter(&man_lock);
6033
6034	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
6035	if (manp != NULL) {
6036		man_update_path_kstats(manp);
6037	} else
6038		status = ENODEV;
6039
6040	mutex_exit(&man_lock);
6041
6042	if (status != 0)
6043		goto exit;
6044
6045	for (i = 0; i < adp->a_ndests; i++) {
6046
6047		tdp[i].md_device = adp->a_st_dev;
6048		tdp[i].md_muxid = -1;
6049
6050		if (man_plumb(&tdp[i]))
6051			break;
6052	}
6053
6054	/*
6055	 * Didn't plumb everyone, unplumb new lower stuff and return.
6056	 */
6057	if (i < adp->a_ndests) {
6058		int	j;
6059
6060		for (j = 0; j <= i; j++)
6061			man_unplumb(&tdp[j]);
6062		status = EAGAIN;
6063		goto exit;
6064	}
6065
6066	if (man_is_on_domain && man_dossc_switch(adp->a_st_dev.mdev_exp_id)) {
6067		/*
6068		 * If we cant set new path on the SSC, then fail the
6069		 * failover.
6070		 */
6071		for (i = 0; i < adp->a_ndests; i++)
6072			man_unplumb(&tdp[i]);
6073		status = EAGAIN;
6074		goto exit;
6075	}
6076
6077	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
6078	adp->a_mdp = tdp;
6079
6080exit:
6081	if (status)
6082		man_kfree(tdp, sizeof (man_dest_t) * adp->a_ndests);
6083
6084
6085	MAN_DBG(MAN_SWITCH, ("man_bswitch: returns %d", status));
6086
6087	/*
6088	 * Hand processed switch request back to man_iwork for
6089	 * processing in man_iswitch.
6090	 */
6091	wp->mw_status = status;
6092
6093	mutex_enter(&man_lock);
6094	man_work_add(man_iwork_q, wp);
6095	mutex_exit(&man_lock);
6096
6097}
6098
6099/*
6100 * man_plumb - Configure a lower stream for this destination.
6101 *
6102 * Locking:
6103 * 	- Called holding no locks.
6104 *
6105 * Returns:
6106 *	- success - 0
6107 *	- failure - error code of failure
6108 */
6109static int
6110man_plumb(man_dest_t *mdp)
6111{
6112	int		status;
6113	int		muxid;
6114	ldi_handle_t	lh;
6115	ldi_ident_t	li = NULL;
6116
6117	MAN_DBG(MAN_SWITCH, ("man_plumb: mdp(0x%p) %s%d exp(%d)",
6118	    (void *)mdp, ddi_major_to_name(mdp->md_device.mdev_major),
6119	    mdp->md_device.mdev_ppa, mdp->md_device.mdev_exp_id));
6120
6121	/*
6122	 * Control stream should already be open.
6123	 */
6124	if (man_ctl_lh == NULL) {
6125		status = EAGAIN;
6126		goto exit;
6127	}
6128
6129	mutex_enter(&man_lock);
6130	ASSERT(man_ctl_wq != NULL);
6131	status = ldi_ident_from_stream(man_ctl_wq, &li);
6132	if (status != 0) {
6133		cmn_err(CE_WARN,
6134		    "man_plumb: ident alloc failed, error %d", status);
6135		goto exit;
6136	}
6137	mutex_exit(&man_lock);
6138
6139	/*
6140	 * previously opens were done by a dev_t of makedev(clone_major,
6141	 * mdev_major) which should always map to /devices/pseudo/clone@0:eri
6142	 */
6143	ASSERT(strcmp(ERI_IDNAME,
6144	    ddi_major_to_name(mdp->md_device.mdev_major)) == 0);
6145
6146	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
6147	    kcred, &lh, li);
6148	if (status) {
6149		cmn_err(CE_WARN,
6150		    "man_plumb: eri open failed, error %d", status);
6151		goto exit;
6152	}
6153
6154	/*
6155	 * Link netdev under MAN.
6156	 */
6157	ASSERT(mdp->md_muxid == -1);
6158
6159	status = ldi_ioctl(man_ctl_lh, I_PLINK, (intptr_t)lh,
6160	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
6161	if (status) {
6162		cmn_err(CE_WARN,
6163		    "man_plumb: ldi_ioctl(I_PLINK) failed, error %d", status);
6164		(void) ldi_close(lh, NULL, kcred);
6165		goto exit;
6166
6167	}
6168	mdp->md_muxid = muxid;
6169	mdp->md_wq = man_linkrec_find(muxid);
6170	/*
6171	 * If we can't find the linkrec then return an
6172	 * error. It will be automatically unplumbed on failure.
6173	 */
6174	if (mdp->md_wq == NULL)
6175		status = EAGAIN;
6176
6177	(void) ldi_close(lh, NULL, kcred);
6178exit:
6179	if (li)
6180		ldi_ident_release(li);
6181
6182	MAN_DBG(MAN_SWITCH, ("man_plumb: exit\n"));
6183
6184	return (status);
6185}
6186
6187/*
6188 * man_unplumb - tear down the STREAMs framework for the lower multiplexor.
6189 *
6190 *	mdp - destination struct of interest
6191 *
6192 *	returns	- success - 0
6193 *		- failure - return error from ldi_ioctl
6194 */
6195static void
6196man_unplumb(man_dest_t *mdp)
6197{
6198	int	status, rval;
6199
6200	MAN_DBG(MAN_SWITCH, ("man_unplumb: mdp"));
6201	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
6202
6203	if (mdp->md_muxid == -1)
6204		return;
6205
6206	ASSERT(man_ctl_lh != NULL);
6207
6208	/*
6209	 * I_PUNLINK causes the multiplexor resources to be freed.
6210	 */
6211	status = ldi_ioctl(man_ctl_lh, I_PUNLINK, (intptr_t)mdp->md_muxid,
6212	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &rval);
6213	if (status) {
6214		cmn_err(CE_WARN, "man_unplumb: ldi_ioctl(I_PUNLINK) failed"
6215		    " errno %d\n", status);
6216	}
6217	/*
6218	 * Delete linkrec if it exists.
6219	 */
6220	(void) man_linkrec_find(mdp->md_muxid);
6221	mdp->md_muxid = -1;
6222
6223}
6224
6225/*
6226 * The routines below deal with paths and pathgroups. These data structures
6227 * are used to track the physical devices connecting the domain and SSC.
6228 * These devices make up the lower streams of the MAN multiplexor. The
6229 * routines all expect the man_lock to be held.
6230 *
6231 * A pathgroup consists of all paths that connect a particular domain and the
6232 * SSC. The concept of a pathgroup id (pg_id) is used to uniquely identify
6233 * a pathgroup.  For Domains, there is just one pathgroup, that connecting
6234 * the domain to the SSC (pg_id == 0). On the SSC, there is one pathgroup per
6235 * domain. The pg_id field corresponds to the domain tags A-R. A pg_id of
6236 * 0 means domain tag A, a pg_id of 1 means domain B, etc.
6237 *
6238 * The path data structure identifies one path between the SSC and a domain.
6239 * It describes the information for the path: the major and minor number of
6240 * the physical device; kstat pointers; and ethernet address of the
6241 * other end of the path.
6242 *
6243 * The pathgroups are anchored at man_pg_head and are protected by the
6244 * by the inner perimeter. The routines are only called by the STREAMs
6245 * portion of the driver.
6246 */
6247
6248/*
6249 * Update man instance pathgroup info. Exclusive inner perimeter assures
6250 * this code is single threaded. man_refcnt assures man_t wont detach
6251 * while we are playing with man_pg stuff.
6252 *
6253 * Returns 0 on success, errno on failure.
6254 */
6255int
6256man_pg_cmd(mi_path_t *mip, man_work_t *waiter_wp)
6257{
6258	int		status = 0;
6259	man_t		*manp;
6260
6261	if (mip->mip_ndevs < 0) {
6262		status = EINVAL;
6263		cmn_err(CE_WARN, "man_pg_cmd: EINVAL: mip_ndevs %d",
6264		    mip->mip_ndevs);
6265		goto exit;
6266	}
6267
6268	ASSERT(MUTEX_HELD(&man_lock));
6269	manp = ddi_get_soft_state(man_softstate, mip->mip_man_ppa);
6270	if (manp == NULL) {
6271		status = ENODEV;
6272		goto exit;
6273	}
6274
6275	MAN_DBG(MAN_PATH, ("man_pg_cmd: mip"));
6276	MAN_DBGCALL(MAN_PATH, man_print_mip(mip));
6277
6278	MAN_DBG(MAN_PATH, ("\tman_t"));
6279	MAN_DBGCALL(MAN_PATH, man_print_man(manp));
6280
6281	switch (mip->mip_cmd) {
6282	case MI_PATH_ASSIGN:
6283		status = man_pg_assign(&manp->man_pg, mip, FALSE);
6284		break;
6285
6286	case MI_PATH_ADD:
6287		status = man_pg_assign(&manp->man_pg, mip, TRUE);
6288		break;
6289
6290	case MI_PATH_UNASSIGN:
6291		status = man_pg_unassign(&manp->man_pg, mip);
6292		break;
6293
6294	case MI_PATH_ACTIVATE:
6295		status = man_pg_activate(manp, mip, waiter_wp);
6296		break;
6297
6298	case MI_PATH_READ:
6299		status = man_pg_read(manp->man_pg, mip);
6300		break;
6301
6302	default:
6303		status = EINVAL;
6304		cmn_err(CE_NOTE, "man_pg_cmd: invalid command");
6305		break;
6306	}
6307
6308exit:
6309	MAN_DBG(MAN_PATH, ("man_pg_cmd: returns %d", status));
6310
6311	return (status);
6312}
6313
6314/*
6315 * Assign paths to a pathgroup. If pathgroup doesnt exists, create it.
6316 * If path doesnt exist, create it. If ethernet address of existing
6317 * pathgroup different, change it. If an existing path is not in the new
6318 * list, remove it.  If anything changed, send PATH_UPDATE request to
6319 * man_iwork to update all man_dest_t's.
6320 *
6321 * 	mplpp	- man pathgroup list point to point.
6322 *	mip	- new/updated pathgroup info to assign.
6323 */
6324static int
6325man_pg_assign(man_pg_t **mplpp, mi_path_t *mip, int add_only)
6326{
6327	man_pg_t	*mpg;
6328	man_path_t	*mp;
6329	man_path_t	*add_paths = NULL;
6330	int		cnt;
6331	int		i;
6332	int		first_pass = TRUE;
6333	int		status = 0;
6334
6335	ASSERT(MUTEX_HELD(&man_lock));
6336
6337	cnt = mip->mip_ndevs;
6338	if (cnt == 0) {
6339		status = EINVAL;
6340		cmn_err(CE_NOTE, "man_pg_assign: mip_ndevs == 0");
6341		goto exit;
6342	}
6343
6344	/*
6345	 * Assure the devices to be assigned are not assigned to some other
6346	 * pathgroup.
6347	 */
6348	for (i = 0; i < cnt; i++) {
6349		mpg = man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL);
6350
6351		if (mpg == NULL)
6352			continue;
6353
6354		if ((mpg->mpg_man_ppa != mip->mip_man_ppa) ||
6355		    (mpg->mpg_pg_id != mip->mip_pg_id)) {
6356			/*
6357			 * Already assigned to some other man instance
6358			 * or pathgroup.
6359			 */
6360			status = EEXIST;
6361			goto exit;
6362		}
6363	}
6364
6365	/*
6366	 * Find pathgroup, or allocate new one if it doesnt exist and
6367	 * add it to list at mplpp. Result is that mpg points to
6368	 * pathgroup to modify.
6369	 */
6370	mpg = man_find_pg_by_id(*mplpp, mip->mip_pg_id);
6371	if (mpg == NULL) {
6372
6373		status = man_pg_create(mplpp, &mpg, mip);
6374		if (status)
6375			goto exit;
6376
6377	} else if (ether_cmp(&mip->mip_eaddr, &mpg->mpg_dst_eaddr) != 0) {
6378
6379		cmn_err(CE_WARN, "man_pg_assign: ethernet address mismatch");
6380		cmn_err(CE_CONT, "existing %s",
6381		    ether_sprintf(&mpg->mpg_dst_eaddr));
6382		cmn_err(CE_CONT, "new %s",
6383		    ether_sprintf(&mip->mip_eaddr));
6384
6385		status = EINVAL;
6386		goto exit;
6387	}
6388
6389	/*
6390	 * Create list of new paths to add to pathgroup.
6391	 */
6392	for (i = 0; i < cnt; i++) {
6393
6394		if (man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL))
6395			continue;	/* Already exists in this pathgroup */
6396
6397		mp = man_kzalloc(sizeof (man_path_t), KM_NOSLEEP);
6398		if (mp == NULL) {
6399			status = ENOMEM;
6400			goto exit;
6401		}
6402
6403		mp->mp_device = mip->mip_devs[i];
6404		mp->mp_device.mdev_state = MDEV_ASSIGNED;
6405
6406		MAN_DBG(MAN_PATH, ("man_pg_assign: assigning mdp"));
6407		MAN_DBGCALL(MAN_PATH, man_print_dev(&mp->mp_device));
6408
6409		status = man_path_kstat_init(mp);
6410		if (status) {
6411			man_kfree(mp, sizeof (man_path_t));
6412			goto exit;
6413		}
6414
6415		man_path_insert(&add_paths, mp);
6416	}
6417
6418	/*
6419	 * man_dr_attach passes only the path which is being DRd in.
6420	 * So just add the path and don't worry about removing paths.
6421	 */
6422	if (add_only == TRUE)
6423		goto exit;
6424
6425
6426	/*
6427	 * Check if any paths we want to remove are ACTIVE. If not,
6428	 * do a second pass and remove them.
6429	 */
6430again:
6431	mp = mpg->mpg_pathp;
6432	while (mp != NULL) {
6433		int		in_new_list;
6434		man_path_t	*rp;
6435
6436		rp = NULL;
6437		in_new_list = FALSE;
6438
6439		for (i = 0; i < cnt; i++) {
6440			if (mp->mp_device.mdev_ppa ==
6441			    mip->mip_devs[i].mdev_ppa) {
6442
6443				in_new_list = TRUE;
6444				break;
6445			}
6446		}
6447
6448		if (!in_new_list) {
6449			if (first_pass) {
6450				if (mp->mp_device.mdev_state & MDEV_ACTIVE) {
6451					status = EBUSY;
6452					goto exit;
6453				}
6454			} else {
6455				rp = mp;
6456			}
6457		}
6458		mp = mp->mp_next;
6459
6460		if (rp != NULL)
6461			man_path_remove(&mpg->mpg_pathp, rp);
6462	}
6463
6464	if (first_pass == TRUE) {
6465		first_pass = FALSE;
6466		goto again;
6467	}
6468
6469exit:
6470	if (status == 0) {
6471		if (add_paths)
6472			man_path_merge(&mpg->mpg_pathp, add_paths);
6473	} else {
6474		while (add_paths != NULL) {
6475			mp = add_paths;
6476			add_paths = mp->mp_next;
6477			mp->mp_next = NULL;
6478
6479			man_path_kstat_uninit(mp);
6480			man_kfree(mp, sizeof (man_path_t));
6481		}
6482	}
6483
6484	return (status);
6485}
6486
6487/*
6488 * Remove all paths from a pathgroup (domain shutdown). If there is an
6489 * active path in the group, shut down all destinations referencing it
6490 * first.
6491 */
6492static int
6493man_pg_unassign(man_pg_t **plpp, mi_path_t *mip)
6494{
6495	man_pg_t	*mpg;
6496	man_pg_t	*tpg;
6497	man_pg_t	*tppg;
6498	man_path_t	*mp = NULL;
6499	int		status = 0;
6500
6501	ASSERT(MUTEX_HELD(&man_lock));
6502
6503	/*
6504	 * Check for existence of pathgroup.
6505	 */
6506	if ((mpg = man_find_pg_by_id(*plpp, mip->mip_pg_id)) == NULL)
6507		goto exit;
6508
6509	if (man_find_active_path(mpg->mpg_pathp) != NULL) {
6510		status = man_remove_dests(mpg);
6511		if (status)
6512			goto exit;
6513	}
6514
6515	/*
6516	 * Free all the paths for this pathgroup.
6517	 */
6518	while (mpg->mpg_pathp) {
6519		mp = mpg->mpg_pathp;
6520		mpg->mpg_pathp = mp->mp_next;
6521		mp->mp_next = NULL;
6522
6523		man_path_kstat_uninit(mp);
6524		man_kfree(mp, sizeof (man_path_t));
6525	}
6526
6527	/*
6528	 * Remove this pathgroup from the list, and free it.
6529	 */
6530	tpg = tppg = *plpp;
6531	if (tpg == mpg) {
6532		*plpp = tpg->mpg_next;
6533		goto free_pg;
6534	}
6535
6536	for (tpg = tpg->mpg_next; tpg != NULL; tpg = tpg->mpg_next) {
6537		if (tpg == mpg)
6538			break;
6539		tppg = tpg;
6540	}
6541
6542	ASSERT(tpg != NULL);
6543
6544	tppg->mpg_next = tpg->mpg_next;
6545	tpg->mpg_next = NULL;
6546
6547free_pg:
6548	man_kfree(tpg, sizeof (man_pg_t));
6549
6550exit:
6551	return (status);
6552
6553}
6554
6555/*
6556 * Set a new active path. This is done via man_ioctl so we are
6557 * exclusive in the inner perimeter.
6558 */
6559static int
6560man_pg_activate(man_t *manp, mi_path_t *mip, man_work_t *waiter_wp)
6561{
6562	man_pg_t	*mpg1;
6563	man_pg_t	*mpg2;
6564	man_pg_t	*plp;
6565	man_path_t	*mp;
6566	man_path_t	*ap;
6567	int		status = 0;
6568
6569	ASSERT(MUTEX_HELD(&man_lock));
6570	MAN_DBG(MAN_PATH, ("man_pg_activate: dev"));
6571	MAN_DBGCALL(MAN_PATH, man_print_dev(mip->mip_devs));
6572
6573	if (mip->mip_ndevs != 1) {
6574		status = EINVAL;
6575		goto exit;
6576	}
6577
6578	plp = manp->man_pg;
6579	mpg1 = man_find_pg_by_id(plp, mip->mip_pg_id);
6580	if (mpg1 == NULL) {
6581		status = EINVAL;
6582		goto exit;
6583	}
6584
6585	mpg2 = man_find_path_by_dev(plp, mip->mip_devs, &mp);
6586	if (mpg2 == NULL) {
6587		status = ENODEV;
6588		goto exit;
6589	}
6590
6591	if (mpg1 != mpg2) {
6592		status = EINVAL;
6593		goto exit;
6594	}
6595
6596	ASSERT(mp->mp_device.mdev_ppa == mip->mip_devs->mdev_ppa);
6597
6598	if (mpg1->mpg_flags & MAN_PG_SWITCHING) {
6599		status = EAGAIN;
6600		goto exit;
6601	}
6602
6603	ap = man_find_active_path(mpg1->mpg_pathp);
6604	if (ap == NULL) {
6605		/*
6606		 * This is the first time a path has been activated for
6607		 * this pathgroup. Initialize all upper streams dest
6608		 * structure for this pathgroup so autoswitch will find
6609		 * them.
6610		 */
6611		mp->mp_device.mdev_state |= MDEV_ACTIVE;
6612		man_add_dests(mpg1);
6613		goto exit;
6614	}
6615
6616	/*
6617	 * Path already active, nothing to do.
6618	 */
6619	if (ap == mp)
6620		goto exit;
6621
6622	/*
6623	 * Try to autoswitch to requested device. Set flags and refcnt.
6624	 * Cleared in man_iswitch when SWITCH completes.
6625	 */
6626	manp->man_refcnt++;
6627	mpg1->mpg_flags |= MAN_PG_SWITCHING;
6628
6629	/*
6630	 * Switch to path specified.
6631	 */
6632	status = man_autoswitch(mpg1, mip->mip_devs, waiter_wp);
6633
6634	if (status != 0) {
6635		/*
6636		 * man_iswitch not going to run, clean up.
6637		 */
6638		manp->man_refcnt--;
6639		mpg1->mpg_flags &= ~MAN_PG_SWITCHING;
6640
6641		if (status == ENODEV) {
6642			/*
6643			 * Device not plumbed isn't really an error. Change
6644			 * active device setting here, since man_iswitch isn't
6645			 * going to be run to do it.
6646			 */
6647			status = 0;
6648			ap->mp_device.mdev_state &= ~MDEV_ACTIVE;
6649			mp->mp_device.mdev_state |= MDEV_ACTIVE;
6650		}
6651	}
6652
6653exit:
6654	MAN_DBG(MAN_PATH, ("man_pg_activate: returns %d", status));
6655
6656	return (status);
6657}
6658
6659static int
6660man_pg_read(man_pg_t *plp, mi_path_t *mip)
6661{
6662	man_pg_t	*mpg;
6663	man_path_t	*mp;
6664	int		cnt;
6665	int		status = 0;
6666
6667	ASSERT(MUTEX_HELD(&man_lock));
6668
6669	if ((mpg = man_find_pg_by_id(plp, mip->mip_pg_id)) == NULL) {
6670		status = ENODEV;
6671		goto exit;
6672	}
6673
6674	cnt = 0;
6675	for (mp = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6676		bcopy(&mp->mp_device, &mip->mip_devs[cnt], sizeof (man_dev_t));
6677		if (cnt == mip->mip_ndevs)
6678			break;
6679		cnt++;
6680	}
6681
6682	MAN_DBG(MAN_PATH, ("man_pg_read: pg(0x%p) id(%d) found %d paths",
6683	    (void *)mpg, mpg->mpg_pg_id, cnt));
6684
6685	mip->mip_ndevs = cnt;
6686
6687	/*
6688	 * TBD - What should errno be if user buffer too small ?
6689	 */
6690	if (mp != NULL) {
6691		status = ENOMEM;
6692	}
6693
6694exit:
6695
6696	return (status);
6697}
6698
6699/*
6700 * return existing pathgroup, or create it. TBD - Need to update
6701 * all of destinations if we added a pathgroup. Also, need to update
6702 * all of man_strup if we add a path.
6703 *
6704 * 	mplpp	- man pathgroup list point to pointer.
6705 * 	mpgp	- returns newly created man pathgroup.
6706 *	mip	- info to fill in mpgp.
6707 */
6708static int
6709man_pg_create(man_pg_t **mplpp, man_pg_t **mpgp, mi_path_t *mip)
6710{
6711	man_pg_t	*mpg;
6712	man_pg_t	*tpg;
6713	int		status = 0;
6714
6715	ASSERT(MUTEX_HELD(&man_lock));
6716
6717	if (ether_cmp(&mip->mip_eaddr, &zero_ether_addr) == 0) {
6718		cmn_err(CE_NOTE, "man_ioctl: man_pg_create: ether"
6719		    " addresss not set!");
6720		status = EINVAL;
6721		goto exit;
6722	}
6723
6724	mpg = man_kzalloc(sizeof (man_pg_t), KM_NOSLEEP);
6725	if (mpg == NULL) {
6726		status = ENOMEM;
6727		goto exit;
6728	}
6729
6730	mpg->mpg_flags = MAN_PG_IDLE;
6731	mpg->mpg_pg_id = mip->mip_pg_id;
6732	mpg->mpg_man_ppa = mip->mip_man_ppa;
6733	ether_copy(&mip->mip_eaddr, &mpg->mpg_dst_eaddr);
6734
6735	MAN_DBG(MAN_PATH, ("man_pg_create: new mpg"));
6736	MAN_DBGCALL(MAN_PATH, man_print_mpg(mpg));
6737
6738	tpg = *mplpp;
6739	if (tpg == NULL) {
6740		*mplpp = mpg;
6741	} else {
6742		while (tpg->mpg_next != NULL)
6743			tpg = tpg->mpg_next;
6744		tpg->mpg_next = mpg;
6745	}
6746
6747exit:
6748	*mpgp = mpg;
6749
6750	return (status);
6751}
6752
6753/*
6754 * Return pointer to pathgroup containing mdevp, null otherwise. Also,
6755 * if a path pointer is passed in, set it to matching path in pathgroup.
6756 *
6757 * Called holding man_lock.
6758 */
6759static man_pg_t *
6760man_find_path_by_dev(man_pg_t *plp, man_dev_t *mdevp, man_path_t **mpp)
6761{
6762	man_pg_t	*mpg;
6763	man_path_t	*mp;
6764
6765	ASSERT(MUTEX_HELD(&man_lock));
6766	for (mpg = plp; mpg != NULL; mpg = mpg->mpg_next) {
6767		for (mp  = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6768			if (mp->mp_device.mdev_major == mdevp->mdev_major &&
6769			    mp->mp_device.mdev_ppa == mdevp->mdev_ppa) {
6770
6771				if (mpp != NULL)
6772					*mpp = mp;
6773				return (mpg);
6774			}
6775		}
6776	}
6777
6778	return (NULL);
6779}
6780
6781/*
6782 * Return pointer to pathgroup assigned to destination, null if not found.
6783 *
6784 * Called holding man_lock.
6785 */
6786static man_pg_t *
6787man_find_pg_by_id(man_pg_t *mpg, int pg_id)
6788{
6789	ASSERT(MUTEX_HELD(&man_lock));
6790	for (; mpg != NULL; mpg = mpg->mpg_next) {
6791		if (mpg->mpg_pg_id == pg_id)
6792			return (mpg);
6793	}
6794
6795	return (NULL);
6796}
6797
6798static man_path_t *
6799man_find_path_by_ppa(man_path_t *mplist, int ppa)
6800{
6801	man_path_t	*mp;
6802
6803	ASSERT(MUTEX_HELD(&man_lock));
6804	for (mp = mplist; mp != NULL; mp = mp->mp_next) {
6805		if (mp->mp_device.mdev_ppa == ppa)
6806			return (mp);
6807	}
6808
6809	return (NULL);
6810}
6811
6812static man_path_t *
6813man_find_active_path(man_path_t *mplist)
6814{
6815	man_path_t	*mp;
6816
6817	ASSERT(MUTEX_HELD(&man_lock));
6818	for (mp = mplist; mp != NULL; mp = mp->mp_next)
6819		if (mp->mp_device.mdev_state & MDEV_ACTIVE)
6820			return (mp);
6821
6822	return (NULL);
6823}
6824
6825/*
6826 * Try and find an alternate path.
6827 */
6828static man_path_t *
6829man_find_alternate_path(man_path_t *mlp)
6830{
6831	man_path_t	*ap;		/* Active path */
6832	man_path_t	*np;		/* New alternate path */
6833	man_path_t	*fp = NULL;	/* LRU failed path */
6834
6835	ASSERT(MUTEX_HELD(&man_lock));
6836	ap = man_find_active_path(mlp);
6837
6838	/*
6839	 * Find a non-failed path, or the lru failed path and switch to it.
6840	 */
6841	for (np = mlp; np != NULL; np = np->mp_next) {
6842		if (np == ap)
6843			continue;
6844
6845		if (np->mp_device.mdev_state == MDEV_ASSIGNED)
6846			goto exit;
6847
6848		if (np->mp_device.mdev_state & MDEV_FAILED) {
6849			if (fp == NULL)
6850				fp = np;
6851			else
6852				if (fp->mp_lru > np->mp_lru)
6853						fp = np;
6854		}
6855	}
6856
6857	/*
6858	 * Nowhere to switch to.
6859	 */
6860	if (np == NULL && (np =  fp) == NULL)
6861		goto exit;
6862
6863exit:
6864	return (np);
6865}
6866
6867/*
6868 * Assumes caller has verified existence.
6869 */
6870static void
6871man_path_remove(man_path_t **lpp, man_path_t *mp)
6872{
6873	man_path_t	*tp;
6874	man_path_t	*tpp;
6875
6876	ASSERT(MUTEX_HELD(&man_lock));
6877	MAN_DBG(MAN_PATH, ("man_path_remove: removing path"));
6878	MAN_DBGCALL(MAN_PATH, man_print_path(mp));
6879
6880	tp = tpp = *lpp;
6881	if (tp == mp) {
6882		*lpp = tp->mp_next;
6883		goto exit;
6884	}
6885
6886	for (tp = tp->mp_next; tp != NULL; tp = tp->mp_next) {
6887		if (tp == mp)
6888			break;
6889		tpp = tp;
6890	}
6891
6892	ASSERT(tp != NULL);
6893
6894	tpp->mp_next = tp->mp_next;
6895	tp->mp_next = NULL;
6896
6897exit:
6898	man_path_kstat_uninit(tp);
6899	man_kfree(tp, sizeof (man_path_t));
6900
6901}
6902
6903/*
6904 * Insert path into list, ascending order by ppa.
6905 */
6906static void
6907man_path_insert(man_path_t **lpp, man_path_t *mp)
6908{
6909	man_path_t	*tp;
6910	man_path_t	*tpp;
6911
6912	ASSERT(MUTEX_HELD(&man_lock));
6913	if (*lpp == NULL) {
6914		*lpp = mp;
6915		return;
6916	}
6917
6918	tp = tpp = *lpp;
6919	if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa) {
6920		mp->mp_next = tp;
6921		*lpp = mp;
6922		return;
6923	}
6924
6925	for (tp = tp->mp_next; tp != NULL; tp =  tp->mp_next) {
6926		if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa)
6927			break;
6928		tpp = tp;
6929	}
6930
6931	if (tp == NULL) {
6932		tpp->mp_next = mp;
6933	} else {
6934		tpp->mp_next = mp;
6935		mp->mp_next = tp;
6936	}
6937}
6938
6939/*
6940 * Merge npp into lpp, ascending order by ppa. Assumes no
6941 * duplicates in either list.
6942 */
6943static void
6944man_path_merge(man_path_t **lpp, man_path_t *np)
6945{
6946	man_path_t	*tmp;
6947
6948	ASSERT(MUTEX_HELD(&man_lock));
6949	while (np != NULL) {
6950		tmp = np;
6951		np = np->mp_next;
6952		tmp->mp_next = NULL;
6953
6954		man_path_insert(lpp, tmp);
6955	}
6956
6957}
6958
6959static int
6960man_path_kstat_init(man_path_t *mpp)
6961{
6962
6963	kstat_named_t	*dev_knp;
6964	int		status = 0;
6965
6966	ASSERT(MUTEX_HELD(&man_lock));
6967	MAN_DBG(MAN_PATH, ("man_path_kstat_init: mpp(0x%p)\n", (void *)mpp));
6968
6969	/*
6970	 * Create named kstats for accounting purposes.
6971	 */
6972	dev_knp = man_kzalloc(MAN_NUMSTATS * sizeof (kstat_named_t),
6973	    KM_NOSLEEP);
6974	if (dev_knp == NULL) {
6975		status = ENOMEM;
6976		goto exit;
6977	}
6978	man_kstat_named_init(dev_knp, MAN_NUMSTATS);
6979	mpp->mp_last_knp = dev_knp;
6980
6981exit:
6982
6983	MAN_DBG(MAN_PATH, ("man_path_kstat_init: returns %d\n", status));
6984
6985	return (status);
6986}
6987
6988static void
6989man_path_kstat_uninit(man_path_t *mp)
6990{
6991	ASSERT(MUTEX_HELD(&man_lock));
6992	man_kfree(mp->mp_last_knp, MAN_NUMSTATS * sizeof (kstat_named_t));
6993}
6994
6995/*
6996 * man_work_alloc - allocate and initiate a work request structure
6997 *
6998 *	type - type of request to allocate
6999 *	returns	- success - ptr to an initialized work structure
7000 *		- failure - NULL
7001 */
7002man_work_t *
7003man_work_alloc(int type, int kmflag)
7004{
7005	man_work_t	*wp;
7006
7007	wp = man_kzalloc(sizeof (man_work_t), kmflag);
7008	if (wp == NULL)
7009		goto exit;
7010
7011	cv_init(&wp->mw_cv, NULL, CV_DRIVER, NULL); \
7012	wp->mw_type = type;
7013
7014exit:
7015	return (wp);
7016}
7017
7018/*
7019 * man_work_free - deallocate a work request structure
7020 *
7021 *	wp - ptr to work structure to be freed
7022 */
7023void
7024man_work_free(man_work_t *wp)
7025{
7026	cv_destroy(&wp->mw_cv);
7027	man_kfree((void *)wp, sizeof (man_work_t));
7028}
7029
7030/*
7031 * Post work to a work queue.  The man_bwork sleeps on
7032 * man_bwork_q->q_cv, and work requesters may sleep on mw_cv.
7033 * The man_lock is used to protect both cv's.
7034 */
7035void
7036man_work_add(man_workq_t *q, man_work_t *wp)
7037{
7038	man_work_t	*lp = q->q_work;
7039
7040	if (lp) {
7041		while (lp->mw_next != NULL)
7042			lp = lp->mw_next;
7043
7044		lp->mw_next = wp;
7045
7046	} else {
7047		q->q_work = wp;
7048	}
7049
7050	/*
7051	 * cv_signal for man_bwork_q, qenable for man_iwork_q
7052	 */
7053	if (q == man_bwork_q) {
7054		cv_signal(&q->q_cv);
7055
7056	} else {	/* q == man_iwork_q */
7057
7058		if (man_ctl_wq != NULL)
7059			qenable(man_ctl_wq);
7060	}
7061
7062}
7063
7064/* <<<<<<<<<<<<<<<<<<<<<<< NDD SUPPORT FUNCTIONS	>>>>>>>>>>>>>>>>>>> */
7065/*
7066 * ndd support functions to get/set parameters
7067 */
7068
7069/*
7070 * Register each element of the parameter array with the
7071 * named dispatch handler. Each element is loaded using
7072 * nd_load()
7073 *
7074 * 	cnt	- the number of elements present in the parameter array
7075 */
7076static int
7077man_param_register(param_t *manpa, int cnt)
7078{
7079	int	i;
7080	ndgetf_t getp;
7081	ndsetf_t setp;
7082	int	status = B_TRUE;
7083
7084	MAN_DBG(MAN_CONFIG, ("man_param_register: manpa(0x%p) cnt %d\n",
7085	    (void *)manpa, cnt));
7086
7087	getp = man_param_get;
7088
7089	for (i = 0; i < cnt; i++, manpa++) {
7090		switch (man_param_display[i]) {
7091		case MAN_NDD_GETABLE:
7092			setp = NULL;
7093			break;
7094
7095		case MAN_NDD_SETABLE:
7096			setp = man_param_set;
7097			break;
7098
7099		default:
7100			continue;
7101		}
7102
7103		if (!nd_load(&man_ndlist, manpa->param_name, getp,
7104		    setp, (caddr_t)manpa)) {
7105
7106			(void) man_nd_free(&man_ndlist);
7107			status = B_FALSE;
7108			goto exit;
7109		}
7110	}
7111
7112	if (!nd_load(&man_ndlist, "man_pathgroups_report",
7113	    man_pathgroups_report, NULL, NULL)) {
7114
7115		(void) man_nd_free(&man_ndlist);
7116		status = B_FALSE;
7117		goto exit;
7118	}
7119
7120	if (!nd_load(&man_ndlist, "man_set_active_path",
7121	    NULL, man_set_active_path, NULL)) {
7122
7123		(void) man_nd_free(&man_ndlist);
7124		status = B_FALSE;
7125		goto exit;
7126	}
7127
7128	if (!nd_load(&man_ndlist, "man_get_hostinfo",
7129	    man_get_hostinfo, NULL, NULL)) {
7130
7131		(void) man_nd_free(&man_ndlist);
7132		status = B_FALSE;
7133		goto exit;
7134	}
7135
7136exit:
7137
7138	MAN_DBG(MAN_CONFIG, ("man_param_register: returns %d\n", status));
7139
7140	return (status);
7141}
7142
7143static void
7144man_nd_getset(queue_t *wq, mblk_t *mp)
7145{
7146
7147	if (!nd_getset(wq, man_ndlist, mp))
7148		miocnak(wq, mp, 0, ENOENT);
7149	else
7150		qreply(wq, mp);
7151}
7152
7153/*ARGSUSED*/
7154static int
7155man_pathgroups_report(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7156{
7157
7158	man_t		*manp;
7159	man_pg_t	*mpg;
7160	int		i;
7161	char		pad[] = "                 "; /* 17 spaces */
7162	int		pad_end;
7163
7164
7165	MAN_DBG(MAN_PATH, ("man_pathgroups_report: wq(0x%p) mp(0x%p)"
7166	    " caddr 0x%p", (void *)wq, (void *)mp, (void *)cp));
7167
7168	(void) mi_mpprintf(mp, "MAN Pathgroup report: (* == failed)");
7169	(void) mi_mpprintf(mp, "====================================="
7170	    "==========================================");
7171
7172	mutex_enter(&man_lock);
7173
7174	for (i = 0; i < 2; i++) {
7175		manp = ddi_get_soft_state(man_softstate, i);
7176		if (manp == NULL)
7177			continue;
7178
7179	(void) mi_mpprintf(mp,
7180	    "Interface\tDestination\t\tActive Path\tAlternate Paths");
7181	(void) mi_mpprintf(mp, "---------------------------------------"
7182	    "----------------------------------------");
7183
7184		for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7185
7186			(void) mi_mpprintf(mp, "%s%d\t\t",
7187			    ddi_major_to_name(manp->man_meta_major),
7188			    manp->man_meta_ppa);
7189
7190			if (man_is_on_domain) {
7191				(void) mi_mpprintf_nr(mp, "Master SSC\t");
7192				man_preport(mpg->mpg_pathp, mp);
7193			} else {
7194				if (i == 0) {
7195					pad_end = 17 - strlen(ether_sprintf(
7196					    &mpg->mpg_dst_eaddr));
7197					if (pad_end < 0 || pad_end > 16)
7198					pad_end = 0;
7199					pad[pad_end] = '\0';
7200
7201					(void) mi_mpprintf_nr(mp, "%c %s%s",
7202					    mpg->mpg_pg_id + 'A',
7203					    ether_sprintf(&mpg->mpg_dst_eaddr),
7204					    pad);
7205
7206					pad[pad_end] = ' ';
7207				} else {
7208					(void) mi_mpprintf_nr(mp,
7209					    "Other SSC\t");
7210				}
7211				man_preport(mpg->mpg_pathp, mp);
7212			}
7213			(void) mi_mpprintf_nr(mp, "\n");
7214		}
7215	}
7216
7217	mutex_exit(&man_lock);
7218	MAN_DBG(MAN_PATH, ("man_pathgroups_report: returns"));
7219
7220	return (0);
7221}
7222
7223static void
7224man_preport(man_path_t *plist, mblk_t *mp)
7225{
7226	man_path_t	*ap;
7227
7228	ap = man_find_active_path(plist);
7229	/*
7230	 * Active path
7231	 */
7232	if (ap != NULL) {
7233		(void) mi_mpprintf_nr(mp, "\t%s%d\t\t",
7234		    ddi_major_to_name(ap->mp_device.mdev_major),
7235		    ap->mp_device.mdev_ppa);
7236	} else {
7237		(void) mi_mpprintf_nr(mp, "None \t");
7238	}
7239
7240	/*
7241	 * Alternate Paths.
7242	 */
7243	while (plist != NULL) {
7244		(void) mi_mpprintf_nr(mp, "%s%d exp %d",
7245		    ddi_major_to_name(plist->mp_device.mdev_major),
7246		    plist->mp_device.mdev_ppa,
7247		    plist->mp_device.mdev_exp_id);
7248		if (plist->mp_device.mdev_state & MDEV_FAILED)
7249			(void) mi_mpprintf_nr(mp, "*");
7250		plist = plist->mp_next;
7251		if (plist)
7252			(void) mi_mpprintf_nr(mp, ", ");
7253	}
7254}
7255
7256/*
7257 * NDD request to set active path. Calling context is man_ioctl, so we are
7258 * exclusive in the inner perimeter.
7259 *
7260 *	Syntax is "ndd -set /dev/dman <man ppa> <pg_id> <phys ppa>"
7261 */
7262/* ARGSUSED3 */
7263static int
7264man_set_active_path(queue_t *wq, mblk_t *mp, char *value, caddr_t cp,
7265    cred_t *cr)
7266{
7267	char		*end, *meta_ppap, *phys_ppap, *pg_idp;
7268	int		meta_ppa;
7269	int		phys_ppa;
7270	int		pg_id;
7271	man_t		*manp;
7272	man_pg_t	*mpg;
7273	man_path_t	*np;
7274	mi_path_t	mpath;
7275	int		status = 0;
7276
7277	MAN_DBG(MAN_PATH, ("man_set_active_path: wq(0x%p) mp(0x%p)"
7278	    " args %s", (void *)wq, (void *)mp, value));
7279
7280	meta_ppap = value;
7281
7282	if ((pg_idp = strchr(value, ' ')) == NULL) {
7283		status = EINVAL;
7284		goto exit;
7285	}
7286
7287	*pg_idp++ = '\0';
7288
7289	if ((phys_ppap = strchr(pg_idp, ' ')) == NULL) {
7290		status = EINVAL;
7291		goto exit;
7292	}
7293
7294	*phys_ppap++ = '\0';
7295
7296	meta_ppa = (int)mi_strtol(meta_ppap, &end, 10);
7297	pg_id = (int)mi_strtol(pg_idp, &end, 10);
7298	phys_ppa = (int)mi_strtol(phys_ppap, &end, 10);
7299
7300	mutex_enter(&man_lock);
7301	manp = ddi_get_soft_state(man_softstate, meta_ppa);
7302	if (manp == NULL || manp->man_pg == NULL) {
7303		status = EINVAL;
7304		mutex_exit(&man_lock);
7305		goto exit;
7306	}
7307
7308	mpg = man_find_pg_by_id(manp->man_pg, pg_id);
7309	if (mpg == NULL) {
7310		status = EINVAL;
7311		mutex_exit(&man_lock);
7312		goto exit;
7313	}
7314
7315	np = man_find_path_by_ppa(mpg->mpg_pathp, phys_ppa);
7316
7317	if (np == NULL) {
7318		status = EINVAL;
7319		mutex_exit(&man_lock);
7320		goto exit;
7321	}
7322
7323	mpath.mip_cmd = MI_PATH_ACTIVATE;
7324	mpath.mip_pg_id = pg_id;
7325	mpath.mip_man_ppa = meta_ppa;
7326	mpath.mip_devs[0] = np->mp_device;
7327	mpath.mip_ndevs = 1;
7328
7329	status = man_pg_cmd(&mpath, NULL);
7330	mutex_exit(&man_lock);
7331
7332exit:
7333
7334	MAN_DBG(MAN_PATH, ("man_set_active_path: returns %d", status));
7335
7336	return (status);
7337}
7338
7339/*
7340 * Dump out the contents of the IOSRAM handoff structure. Note that if
7341 * anything changes here, you must make sure that the sysinit script
7342 * stays in sync with this output.
7343 */
7344/* ARGSUSED */
7345static int
7346man_get_hostinfo(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7347{
7348	manc_t	manc;
7349	char	*ipaddr;
7350	char	ipv6addr[INET6_ADDRSTRLEN];
7351	int	i;
7352	int	status;
7353
7354	if (!man_is_on_domain)
7355		return (0);
7356
7357	if (status = man_get_iosram(&manc)) {
7358		return (status);
7359	}
7360
7361	mi_mpprintf(mp, "manc_magic = 0x%x", manc.manc_magic);
7362	mi_mpprintf(mp, "manc_version = 0%d", manc.manc_version);
7363	mi_mpprintf(mp, "manc_csum = 0x%x", manc.manc_csum);
7364
7365	if (manc.manc_ip_type == AF_INET) {
7366		in_addr_t	netnum;
7367
7368		mi_mpprintf(mp, "manc_ip_type = AF_INET");
7369
7370		ipaddr = man_inet_ntoa(manc.manc_dom_ipaddr);
7371		mi_mpprintf(mp, "manc_dom_ipaddr = %s", ipaddr);
7372
7373		ipaddr = man_inet_ntoa(manc.manc_dom_ip_netmask);
7374		mi_mpprintf(mp, "manc_dom_ip_netmask = %s", ipaddr);
7375
7376		netnum = manc.manc_dom_ipaddr & manc.manc_dom_ip_netmask;
7377		ipaddr = man_inet_ntoa(netnum);
7378		mi_mpprintf(mp, "manc_dom_ip_netnum = %s", ipaddr);
7379
7380		ipaddr = man_inet_ntoa(manc.manc_sc_ipaddr);
7381		mi_mpprintf(mp, "manc_sc_ipaddr = %s", ipaddr);
7382
7383	} else if (manc.manc_ip_type == AF_INET6) {
7384
7385		mi_mpprintf(mp, "manc_ip_type = AF_INET6");
7386
7387		(void) inet_ntop(AF_INET6, (void *)&manc.manc_dom_ipv6addr,
7388		    ipv6addr, INET6_ADDRSTRLEN);
7389		mi_mpprintf(mp, "manc_dom_ipv6addr = %s", ipv6addr);
7390
7391		mi_mpprintf(mp, "manc_dom_ipv6_netmask = %d",
7392		    manc.manc_dom_ipv6_netmask.s6_addr[0]);
7393
7394		(void) inet_ntop(AF_INET6, (void *)&manc.manc_sc_ipv6addr,
7395		    ipv6addr, INET6_ADDRSTRLEN);
7396		mi_mpprintf(mp, "manc_sc_ipv6addr = %s", ipv6addr);
7397
7398	} else {
7399
7400		mi_mpprintf(mp, "manc_ip_type = NONE");
7401	}
7402
7403	mi_mpprintf(mp, "manc_dom_eaddr = %s",
7404	    ether_sprintf(&manc.manc_dom_eaddr));
7405	mi_mpprintf(mp, "manc_sc_eaddr = %s",
7406	    ether_sprintf(&manc.manc_sc_eaddr));
7407
7408	mi_mpprintf(mp, "manc_iob_bitmap = 0x%x\tio boards = ",
7409	    manc.manc_iob_bitmap);
7410	for (i = 0; i < MAN_MAX_EXPANDERS; i++) {
7411		if ((manc.manc_iob_bitmap >> i) & 0x1) {
7412			mi_mpprintf_nr(mp, "%d.1, ", i);
7413		}
7414	}
7415	mi_mpprintf(mp, "manc_golden_iob = %d", manc.manc_golden_iob);
7416
7417	return (0);
7418}
7419
7420static char *
7421man_inet_ntoa(in_addr_t in)
7422{
7423	static char b[18];
7424	unsigned char *p;
7425
7426	p = (unsigned char *)&in;
7427	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
7428	return (b);
7429}
7430
7431/*
7432 * parameter value. cp points to the required parameter.
7433 */
7434/* ARGSUSED */
7435static int
7436man_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
7437{
7438	param_t	*manpa = (param_t *)cp;
7439
7440	(void) mi_mpprintf(mp, "%u", manpa->param_val);
7441	return (0);
7442}
7443
7444/*
7445 * Sets the man parameter to the value in the param_register using
7446 * nd_load().
7447 */
7448/* ARGSUSED */
7449static int
7450man_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
7451{
7452	char *end;
7453	size_t new_value;
7454	param_t	*manpa = (param_t *)cp;
7455
7456	new_value = mi_strtol(value, &end, 10);
7457
7458	if (end == value || new_value < manpa->param_min ||
7459	    new_value > manpa->param_max) {
7460			return (EINVAL);
7461	}
7462
7463	manpa->param_val = new_value;
7464
7465	return (0);
7466
7467}
7468
7469/*
7470 * Free the Named Dispatch Table by calling man_nd_free
7471 */
7472static void
7473man_param_cleanup()
7474{
7475	if (man_ndlist != NULL)
7476		nd_free(&man_ndlist);
7477}
7478
7479/*
7480 * Free the table pointed to by 'ndp'
7481 */
7482static void
7483man_nd_free(caddr_t *nd_pparam)
7484{
7485	ND	*nd;
7486
7487	if ((nd = (ND *)(*nd_pparam)) != NULL) {
7488		if (nd->nd_tbl)
7489			mi_free((char *)nd->nd_tbl);
7490		mi_free((char *)nd);
7491		*nd_pparam = NULL;
7492	}
7493}
7494
7495
7496/*
7497 * man_kstat_update - update the statistics for a meta-interface.
7498 *
7499 *	ksp - kstats struct
7500 *	rw - flag indicating whether stats are to be read or written.
7501 *
7502 *	returns	0
7503 *
7504 * The destination specific kstat information is protected by the
7505 * perimeter lock, so we submit a work request to get the stats
7506 * updated (see man_do_kstats()), and then collect the results
7507 * when cv_signal'd. Note that we are doing cv_timedwait_sig()
7508 * as a precautionary measure only.
7509 */
7510static int
7511man_kstat_update(kstat_t *ksp, int rw)
7512{
7513	man_t			*manp;		/* per instance data */
7514	man_work_t		*wp;
7515	int			status = 0;
7516	kstat_named_t		*knp;
7517	kstat_named_t		*man_knp;
7518	int			i;
7519
7520	MAN_DBG(MAN_KSTAT, ("man_kstat_update: %s\n", rw ? "KSTAT_WRITE" :
7521	    "KSTAT_READ"));
7522
7523	mutex_enter(&man_lock);
7524	manp = (man_t *)ksp->ks_private;
7525	manp->man_refcnt++;
7526
7527	/*
7528	 * If the driver has been configured, get kstats updated by inner
7529	 * perimeter prior to retrieving.
7530	 */
7531	if (man_config_state == MAN_CONFIGURED) {
7532		clock_t wait_status;
7533
7534		man_update_path_kstats(manp);
7535		wp = man_work_alloc(MAN_WORK_KSTAT_UPDATE, KM_SLEEP);
7536		wp->mw_arg.a_man_ppa = manp->man_meta_ppa;
7537		wp->mw_flags = MAN_WFLAGS_CVWAITER;
7538		man_work_add(man_iwork_q, wp);
7539
7540		wait_status = cv_timedwait_sig(&wp->mw_cv, &man_lock,
7541		    ddi_get_lbolt() + drv_usectohz(manp->man_kstat_waittime));
7542
7543		if (wp->mw_flags & MAN_WFLAGS_DONE) {
7544			status = wp->mw_status;
7545			man_work_free(wp);
7546		} else {
7547			ASSERT(wait_status <= 0);
7548			wp->mw_flags &= ~MAN_WFLAGS_CVWAITER;
7549			if (wait_status == 0)
7550				status = EINTR;
7551			else {
7552				MAN_DBG(MAN_KSTAT, ("man_kstat_update: "
7553				    "timedout, returning stale stats."));
7554				status = 0;
7555			}
7556		}
7557		if (status)
7558			goto exit;
7559	}
7560
7561	knp = (kstat_named_t *)ksp->ks_data;
7562	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7563
7564	if (rw == KSTAT_READ) {
7565		for (i = 0; i < MAN_NUMSTATS; i++) {
7566			knp[i].value.ui64 = man_knp[i].value.ui64;
7567		}
7568	} else {
7569		for (i = 0; i < MAN_NUMSTATS; i++) {
7570			man_knp[i].value.ui64 = knp[i].value.ui64;
7571		}
7572	}
7573
7574exit:
7575	manp->man_refcnt--;
7576	mutex_exit(&man_lock);
7577
7578	MAN_DBG(MAN_KSTAT, ("man_kstat_update: returns %d", status));
7579
7580	return (status);
7581}
7582
7583/*
7584 * Sum destination kstats for all active paths for a given instance of the
7585 * MAN driver. Called with perimeter lock.
7586 */
7587static void
7588man_do_kstats(man_work_t *wp)
7589{
7590	man_t		*manp;
7591	man_pg_t	*mpg;
7592	man_path_t	*mp;
7593
7594	MAN_DBG(MAN_KSTAT, ("man_do_kstats:"));
7595
7596	mutex_enter(&man_lock);
7597	/*
7598	 * Sync mp_last_knp for each path associated with the MAN instance.
7599	 */
7600	manp = (man_t *)ddi_get_soft_state(man_softstate,
7601	    wp->mw_arg.a_man_ppa);
7602	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7603
7604		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7605
7606		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7607
7608			MAN_DBG(MAN_KSTAT, ("\tkstat: path"));
7609			MAN_DBGCALL(MAN_KSTAT, man_print_path(mp));
7610
7611			/*
7612			 * We just to update the destination statistics here.
7613			 */
7614			man_sum_dests_kstats(mp->mp_last_knp, mpg);
7615		}
7616	}
7617	mutex_exit(&man_lock);
7618	MAN_DBG(MAN_KSTAT, ("man_do_kstats: returns"));
7619}
7620
7621/*
7622 * Sum device kstats for all active paths for a given instance of the
7623 * MAN driver. Called with man_lock.
7624 */
7625static void
7626man_update_path_kstats(man_t *manp)
7627{
7628	kstat_named_t	*man_knp;
7629	man_pg_t	*mpg;
7630	man_path_t	*mp;
7631
7632	ASSERT(MUTEX_HELD(&man_lock));
7633	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats:"));
7634
7635	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7636
7637	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7638
7639		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7640
7641		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7642
7643			man_update_dev_kstats(man_knp, mp);
7644
7645		}
7646	}
7647	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats: returns"));
7648}
7649
7650/*
7651 * Update the device kstats.
7652 * As man_kstat_update() is called with kstat_chain_lock held,
7653 * we can safely update the statistics from the underlying driver here.
7654 */
7655static void
7656man_update_dev_kstats(kstat_named_t *man_knp, man_path_t *mp)
7657{
7658	kstat_t		*dev_ksp;
7659	major_t		major;
7660	int		instance;
7661	char		buf[KSTAT_STRLEN];
7662
7663
7664	major = mp->mp_device.mdev_major;
7665	instance = mp->mp_device.mdev_ppa;
7666	(void) sprintf(buf, "%s%d", ddi_major_to_name(major), instance);
7667
7668	dev_ksp = kstat_hold_byname(ddi_major_to_name(major), instance, buf,
7669	    ALL_ZONES);
7670	if (dev_ksp != NULL) {
7671
7672		KSTAT_ENTER(dev_ksp);
7673		KSTAT_UPDATE(dev_ksp, KSTAT_READ);
7674		man_sum_kstats(man_knp, dev_ksp, mp->mp_last_knp);
7675		KSTAT_EXIT(dev_ksp);
7676		kstat_rele(dev_ksp);
7677
7678	} else {
7679		MAN_DBG(MAN_KSTAT,
7680		    ("man_update_dev_kstats: no kstat data found for %s(%d,%d)",
7681		    buf, major, instance));
7682	}
7683}
7684
7685static void
7686man_sum_dests_kstats(kstat_named_t *knp, man_pg_t *mpg)
7687{
7688	int		i;
7689	int		flags;
7690	char		*statname;
7691	manstr_t	*msp;
7692	man_dest_t	*mdp;
7693	uint64_t	switches = 0;
7694	uint64_t	linkfails = 0;
7695	uint64_t	linkstales = 0;
7696	uint64_t	icmpv4probes = 0;
7697	uint64_t	icmpv6probes = 0;
7698
7699	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: mpg 0x%p", (void *)mpg));
7700
7701	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
7702
7703		if (!man_str_uses_pg(msp, mpg))
7704			continue;
7705
7706		mdp = &msp->ms_dests[mpg->mpg_pg_id];
7707
7708		switches += mdp->md_switches;
7709		linkfails += mdp->md_linkfails;
7710		linkstales += mdp->md_linkstales;
7711		icmpv4probes += mdp->md_icmpv4probes;
7712		icmpv6probes += mdp->md_icmpv6probes;
7713	}
7714
7715	for (i = 0; i < MAN_NUMSTATS; i++) {
7716
7717		statname = man_kstat_info[i].mk_name;
7718		flags = man_kstat_info[i].mk_flags;
7719
7720		if (!(flags & MK_NOT_PHYSICAL))
7721			continue;
7722
7723		if (strcmp(statname, "man_switches") == 0) {
7724			knp[i].value.ui64 = switches;
7725		} else if (strcmp(statname, "man_link_fails") == 0) {
7726			knp[i].value.ui64 = linkfails;
7727		} else if (strcmp(statname, "man_link_stales") == 0) {
7728			knp[i].value.ui64 = linkstales;
7729		} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7730			knp[i].value.ui64 = icmpv4probes;
7731		} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7732			knp[i].value.ui64 = icmpv6probes;
7733		}
7734	}
7735
7736	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: returns"));
7737}
7738
7739/*
7740 * Initialize MAN named kstats in the space provided.
7741 */
7742static void
7743man_kstat_named_init(kstat_named_t *knp, int num_stats)
7744{
7745	int	i;
7746
7747	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: knp(0x%p) num_stats = %d",
7748	    (void *)knp, num_stats));
7749
7750	for (i = 0; i < num_stats; i++) {
7751		kstat_named_init(&knp[i], man_kstat_info[i].mk_name,
7752		    man_kstat_info[i].mk_type);
7753	}
7754
7755	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: returns"));
7756
7757}
7758
7759/*
7760 * man_kstat_byname - get a kernel stat value from its structure
7761 *
7762 *	ksp - kstat_t structure to play with
7763 *	s   - string to match names with
7764 *	res - in/out result data pointer
7765 *
7766 *	returns	- success - 1 (found)
7767 *		- failure - 0 (not found)
7768 */
7769static int
7770man_kstat_byname(kstat_t *ksp, char *s, kstat_named_t *res)
7771{
7772	int		found = 0;
7773
7774	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: GETTING %s\n", s));
7775
7776	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
7777		kstat_named_t *knp;
7778
7779		for (knp = KSTAT_NAMED_PTR(ksp);
7780		    (caddr_t)knp < ((caddr_t)ksp->ks_data+ksp->ks_data_size);
7781		    knp++) {
7782
7783			if (strcmp(s, knp->name) == NULL) {
7784
7785				res->data_type = knp->data_type;
7786				res->value = knp->value;
7787				found++;
7788
7789				MAN_DBG(MAN_KSTAT2, ("\t%s: %d\n", knp->name,
7790				    (int)knp->value.ul));
7791			}
7792		}
7793	} else {
7794		MAN_DBG(MAN_KSTAT2, ("\tbad kstats type %d\n", ksp->ks_type));
7795	}
7796
7797	/*
7798	 * if getting a value but couldn't find the namestring, result = 0.
7799	 */
7800	if (!found) {
7801		/*
7802		 * a reasonable default
7803		 */
7804		res->data_type = KSTAT_DATA_ULONG;
7805		res->value.l = 0;
7806		MAN_DBG(MAN_KSTAT2, ("\tcouldn't find, using defaults\n"));
7807	}
7808
7809	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: returns\n"));
7810
7811	return (found);
7812}
7813
7814
7815/*
7816 *
7817 * Accumulate MAN driver kstats from the incremental values of the underlying
7818 * physical interfaces.
7819 *
7820 * Parameters:
7821 *	sum_knp		- The named kstat area to put cumulative value,
7822 *			  NULL if we just want to sync next two params.
7823 *	phys_ksp	- Physical interface kstat_t pointer. Contains
7824 *			  more current counts.
7825 * 	phys_last_knp	- counts from the last time we were called for this
7826 *			  physical interface. Note that the name kstats
7827 *			  pointed to are actually in MAN format, but they
7828 *			  hold the mirrored physical devices last read
7829 *			  kstats.
7830 * Basic algorithm is:
7831 *
7832 * 	for each named kstat variable {
7833 *	    sum_knp[i] += (phys_ksp->ksp_data[i] - phys_last_knp[i]);
7834 *	    phys_last_knp[i] = phys_ksp->ksp_data[i];
7835 *	}
7836 *
7837 */
7838static void
7839man_sum_kstats(kstat_named_t *sum_knp, kstat_t *phys_ksp,
7840	kstat_named_t *phys_last_knp)
7841{
7842	char		*physname;
7843	char		*physalias;
7844	char		*statname;
7845	kstat_named_t	phys_kn_entry;
7846	uint64_t	delta64;
7847	int		i;
7848
7849	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: sum_knp(0x%p) phys_ksp(0x%p)"
7850	    " phys_last_knp(0x%p)\n", (void *)sum_knp, (void *)phys_ksp,
7851	    (void *)phys_last_knp));
7852
7853	/*
7854	 * Now for each entry in man_kstat_info, sum the named kstat.
7855	 * Not that all MAN specific kstats will end up !found.
7856	 */
7857	for (i = 0; i < MAN_NUMSTATS; i++) {
7858		int	found = 0;
7859		int	flags = 0;
7860
7861		delta64 = 0;
7862
7863		statname = man_kstat_info[i].mk_name;
7864		physname = man_kstat_info[i].mk_physname;
7865		physalias = man_kstat_info[i].mk_physalias;
7866		flags = man_kstat_info[i].mk_flags;
7867
7868		/*
7869		 * Update MAN private kstats.
7870		 */
7871		if (flags & MK_NOT_PHYSICAL) {
7872
7873			kstat_named_t	*knp = phys_last_knp;
7874
7875			if (sum_knp == NULL)
7876				continue;
7877
7878			if (strcmp(statname, "man_switches") == 0) {
7879				sum_knp[i].value.ui64 = knp[i].value.ui64;
7880			} else if (strcmp(statname, "man_link_fails") == 0) {
7881				sum_knp[i].value.ui64 = knp[i].value.ui64;
7882			} else if (strcmp(statname, "man_link_stales") == 0) {
7883				sum_knp[i].value.ui64 = knp[i].value.ui64;
7884			} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7885				sum_knp[i].value.ui64 = knp[i].value.ui64;
7886			} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7887				sum_knp[i].value.ui64 = knp[i].value.ui64;
7888			}
7889
7890			continue;	/* phys_ksp doesnt have this stat */
7891		}
7892
7893		/*
7894		 * first try it by the "official" name
7895		 */
7896		if (phys_ksp) {
7897			if (man_kstat_byname(phys_ksp, physname,
7898			    &phys_kn_entry)) {
7899
7900				found = 1;
7901
7902			} else if ((physalias) && (man_kstat_byname(phys_ksp,
7903			    physalias, &phys_kn_entry))) {
7904
7905				found = 1;
7906			}
7907		}
7908
7909		if (!found) {
7910			/*
7911			 * clear up the "last" value, no change to the sum
7912			 */
7913			phys_last_knp[i].value.ui64 = 0;
7914			continue;
7915		}
7916
7917		/*
7918		 * at this point, we should have the good underlying
7919		 * kstat value stored in phys_kn_entry
7920		 */
7921		if (flags & MK_NOT_COUNTER) {
7922			/*
7923			 * it isn't a counter, so store the value and
7924			 * move on (e.g. ifspeed)
7925			 */
7926			phys_last_knp[i].value = phys_kn_entry.value;
7927			continue;
7928		}
7929
7930		switch (phys_kn_entry.data_type) {
7931		case KSTAT_DATA_UINT32:
7932
7933			/*
7934			 * this handles 32-bit wrapping
7935			 */
7936			if (phys_kn_entry.value.ui32 <
7937			    phys_last_knp[i].value.ui32) {
7938
7939				/*
7940				 * we've wrapped!
7941				 */
7942				delta64 += (UINT_MAX -
7943				    phys_last_knp[i].value.ui32);
7944				phys_last_knp[i].value.ui32 = 0;
7945			}
7946
7947			delta64 += phys_kn_entry.value.ui32 -
7948			    phys_last_knp[i].value.ui32;
7949			phys_last_knp[i].value.ui32 = phys_kn_entry.value.ui32;
7950			break;
7951
7952		default:
7953			/*
7954			 * must be a 64-bit value, we ignore 64-bit
7955			 * wraps, since they shouldn't ever happen
7956			 * within the life of a machine (if we assume
7957			 * machines don't stay up for more than a few
7958			 * hundred years without a reboot...)
7959			 */
7960			delta64 = phys_kn_entry.value.ui64 -
7961			    phys_last_knp[i].value.ui64;
7962			phys_last_knp[i].value.ui64 = phys_kn_entry.value.ui64;
7963		}
7964
7965		if (sum_knp != NULL) {
7966			/*
7967			 * now we need to save the value
7968			 */
7969			switch (sum_knp[i].data_type) {
7970			case KSTAT_DATA_UINT32:
7971				/* trunk down to 32 bits, possibly lossy */
7972				sum_knp[i].value.ui32 += (uint32_t)delta64;
7973				break;
7974
7975			default:
7976				sum_knp[i].value.ui64 += delta64;
7977				break;
7978			}
7979		}
7980	}
7981
7982	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: returns\n"));
7983}
7984
7985
7986#if defined(DEBUG)
7987
7988
7989static char *_ms_flags[] = {
7990	"NONE",
7991	"FAST", 	/* 0x1 */
7992	"RAW",		/* 0x2 */
7993	"ALLPHYS",	/* 0x4 */
7994	"ALLMULTI",	/* 0x8 */
7995	"ALLSAP",	/* 0x10 */
7996	"CKSUM",	/* 0x20 */
7997	"MULTI",	/* 0x40 */
7998	"SERLPBK",	/* 0x80 */
7999	"MACLPBK",	/* 0x100 */
8000	"CLOSING",	/* 0x200 */
8001	"CLOSE_DONE",	/* 0x400 */
8002	"CONTROL"	/* 0x800 */
8003};
8004
8005static void
8006man_print_msp(manstr_t *msp)
8007{
8008	char	buf[512];
8009	char	prbuf[512];
8010	uint_t	flags;
8011	int	i;
8012
8013	cmn_err(CE_CONT, "\tmsp(0x%p)\n", (void *)msp);
8014
8015	if (msp == NULL)
8016		return;
8017
8018	cmn_err(CE_CONT, "\t%s%d SAP(0x%x):\n",
8019	    ddi_major_to_name(msp->ms_meta_maj), msp->ms_meta_ppa,
8020	    msp->ms_sap);
8021
8022	buf[0] = '\0';
8023	prbuf[0] = '\0';
8024	flags = msp->ms_flags;
8025	for (i = 0; i < A_CNT(_ms_flags); i++) {
8026		if ((flags >> i) & 0x1) {
8027			sprintf(buf, " %s |", _ms_flags[i+1]);
8028			strcat(prbuf, buf);
8029		}
8030	}
8031	prbuf[strlen(prbuf) - 1] = '\0';
8032	cmn_err(CE_CONT, "\tms_flags: %s\n", prbuf);
8033
8034	cmn_err(CE_CONT, "\tms_dlpistate: %s\n", dss[msp->ms_dlpistate]);
8035
8036	cmn_err(CE_CONT, "\tms_dl_mp: 0x%p\n", (void *)msp->ms_dl_mp);
8037
8038	cmn_err(CE_CONT, "\tms_manp: 0x%p\n", (void *)msp->ms_manp);
8039
8040	cmn_err(CE_CONT, "\tms_dests: 0x%p\n", (void *)msp->ms_dests);
8041
8042}
8043
8044static char *_md_state[] = {
8045	"NOTPRESENT",		/* 0x0 */
8046	"INITIALIZING",		/* 0x1 */
8047	"READY",		/* 0x2 */
8048	"PLUMBING",		/* 0x4 */
8049	"CLOSING"		/* 0x8 */
8050};
8051
8052static void
8053man_print_mdp(man_dest_t *mdp)
8054{
8055	uint_t		state;
8056	int		i;
8057	char		buf[64];
8058	char		prbuf[512];
8059
8060	buf[0] = '\0';
8061	prbuf[0] = '\0';
8062
8063	cmn_err(CE_CONT, "\tmdp(0x%p)\n", (void *)mdp);
8064
8065	if (mdp == NULL)
8066		return;
8067
8068	cmn_err(CE_CONT, "\tmd_pg_id: %d\n", mdp->md_pg_id);
8069	cmn_err(CE_CONT, "\tmd_dst_eaddr: %s\n",
8070	    ether_sprintf(&mdp->md_dst_eaddr));
8071	cmn_err(CE_CONT, "\tmd_src_eaddr: %s\n",
8072	    ether_sprintf(&mdp->md_src_eaddr));
8073	cmn_err(CE_CONT, "\tmd_dlpistate: %s", dss[mdp->md_dlpistate]);
8074	cmn_err(CE_CONT, "\tmd_muxid: 0x%u", mdp->md_muxid);
8075	cmn_err(CE_CONT, "\tmd_rcvcnt %lu md_lastrcvcnt %lu", mdp->md_rcvcnt,
8076	    mdp->md_lastrcvcnt);
8077
8078	/*
8079	 * Print out state as text.
8080	 */
8081	state = mdp->md_state;
8082
8083	if (state == 0) {
8084		strcat(prbuf, _md_state[0]);
8085	} else {
8086
8087		for (i = 0; i < A_CNT(_md_state); i++) {
8088			if ((state >> i) & 0x1)  {
8089				sprintf(buf, " %s |", _md_state[i+1]);
8090				strcat(prbuf, buf);
8091			}
8092		}
8093		prbuf[strlen(prbuf) -1] = '\0';
8094	}
8095	cmn_err(CE_CONT, "\tmd_state: %s", prbuf);
8096
8097	cmn_err(CE_CONT, "\tmd_device:\n");
8098	man_print_dev(&mdp->md_device);
8099
8100}
8101
8102static void
8103man_print_man(man_t *manp)
8104{
8105	char	buf[512];
8106	char	prbuf[512];
8107
8108	buf[0] = '\0';
8109	prbuf[0] = '\0';
8110
8111	if (manp == NULL)
8112		return;
8113
8114	if (ddi_major_to_name(manp->man_meta_major)) {
8115		sprintf(buf, "\t man_device: %s%d\n",
8116		    ddi_major_to_name(manp->man_meta_major),
8117		    manp->man_meta_ppa);
8118	} else {
8119		sprintf(buf, "\t major: %d", manp->man_meta_major);
8120		sprintf(buf, "\t ppa: %d", manp->man_meta_ppa);
8121	}
8122
8123	cmn_err(CE_CONT, "%s", buf);
8124
8125}
8126
8127static char *_mdev_state[] = {
8128	"UNASSIGNED  ",
8129	"ASSIGNED",
8130	"ACTIVE",
8131	"FAILED"
8132};
8133
8134static void
8135man_print_dev(man_dev_t *mdevp)
8136{
8137	char	buf[512];
8138	char	prbuf[512];
8139	int	i;
8140	uint_t	state;
8141
8142	buf[0] = '\0';
8143	prbuf[0] = '\0';
8144
8145	if (mdevp == NULL)
8146		return;
8147
8148	if (mdevp->mdev_major == 0) {
8149number:
8150		sprintf(buf, "\t mdev_major: %d\n", mdevp->mdev_major);
8151	} else if (ddi_major_to_name(mdevp->mdev_major)) {
8152		sprintf(buf, "\t mdev_device: %s%d\n",
8153		    ddi_major_to_name(mdevp->mdev_major),
8154		    mdevp->mdev_ppa);
8155	} else
8156		goto number;
8157
8158	cmn_err(CE_CONT, "%s", buf);
8159
8160	cmn_err(CE_CONT, "\t mdev_exp_id: %d\n", mdevp->mdev_exp_id);
8161
8162	buf[0] = '\0';
8163	prbuf[0] = '\0';
8164	state = mdevp->mdev_state;
8165
8166	if (state == 0) {
8167		strcat(prbuf, _mdev_state[0]);
8168	} else {
8169		for (i = 0; i < A_CNT(_mdev_state); i++) {
8170			if ((state >> i) & 0x1) {
8171				sprintf(buf, " %s |", _mdev_state[i+1]);
8172				strcat(prbuf, buf);
8173			}
8174		}
8175	}
8176
8177	prbuf[strlen(prbuf) - 2] = '\0';
8178
8179	cmn_err(CE_CONT, "\t mdev_state: %s\n", prbuf);
8180
8181}
8182
8183static char *_mip_cmd[] = {
8184	"MI_PATH_READ",
8185	"MI_PATH_ASSIGN",
8186	"MI_PATH_ACTIVATE",
8187	"MI_PATH_DEACTIVATE",
8188	"MI_PATH_UNASSIGN"
8189};
8190
8191static void
8192man_print_mtp(mi_time_t *mtp)
8193{
8194	cmn_err(CE_CONT, "\tmtp(0x%p)\n", (void *)mtp);
8195
8196	if (mtp == NULL)
8197		return;
8198
8199	cmn_err(CE_CONT, "\tmtp_instance: %d\n", mtp->mtp_man_ppa);
8200
8201	cmn_err(CE_CONT, "\tmtp_time: %d\n", mtp->mtp_time);
8202
8203}
8204
8205static void
8206man_print_mip(mi_path_t *mip)
8207{
8208	cmn_err(CE_CONT, "\tmip(0x%p)\n", (void *)mip);
8209
8210	if (mip == NULL)
8211		return;
8212
8213	cmn_err(CE_CONT, "\tmip_pg_id: %d\n", mip->mip_pg_id);
8214
8215	cmn_err(CE_CONT, "\tmip_cmd: %s\n", _mip_cmd[mip->mip_cmd]);
8216
8217	cmn_err(CE_CONT, "\tmip_eaddr: %s\n", ether_sprintf(&mip->mip_eaddr));
8218
8219	cmn_err(CE_CONT, "\tmip_devs: 0x%p\n", (void *)mip->mip_devs);
8220
8221	cmn_err(CE_CONT, "\tmip_ndevs: %d\n", mip->mip_ndevs);
8222
8223}
8224
8225static void
8226man_print_mpg(man_pg_t *mpg)
8227{
8228	cmn_err(CE_CONT, "\tmpg(0x%p)\n", (void *)mpg);
8229
8230	if (mpg == NULL)
8231		return;
8232
8233	cmn_err(CE_CONT, "\tmpg_next: 0x%p\n", (void *)mpg->mpg_next);
8234
8235	cmn_err(CE_CONT, "\tmpg_pg_id: %d\n", mpg->mpg_pg_id);
8236
8237	cmn_err(CE_CONT, "\tmpg_man_ppa: %d\n", mpg->mpg_man_ppa);
8238
8239	cmn_err(CE_CONT, "\tmpg_dst_eaddr: %s\n",
8240	    ether_sprintf(&mpg->mpg_dst_eaddr));
8241
8242	cmn_err(CE_CONT, "\tmpg_pathp: 0x%p\n", (void *)mpg->mpg_pathp);
8243
8244}
8245
8246static char *_mw_flags[] = {
8247	"NOWAITER",		/* 0x0 */
8248	"CVWAITER",		/* 0x1 */
8249	"QWAITER",		/* 0x2 */
8250	"DONE"		/* 0x3 */
8251};
8252
8253static void
8254man_print_work(man_work_t *wp)
8255{
8256	int 	i;
8257
8258	cmn_err(CE_CONT, "\twp(0x%p)\n\n", (void *)wp);
8259
8260	if (wp == NULL)
8261		return;
8262
8263	cmn_err(CE_CONT, "\tmw_type: %s\n", _mw_type[wp->mw_type]);
8264
8265	cmn_err(CE_CONT, "\tmw_flags: ");
8266	for (i = 0; i < A_CNT(_mw_flags); i++) {
8267		if ((wp->mw_flags >> i) & 0x1)
8268			cmn_err(CE_CONT, "%s", _mw_flags[i]);
8269	}
8270	cmn_err(CE_CONT, "\n");
8271
8272	cmn_err(CE_CONT, "\twp_status: %d\n", wp->mw_status);
8273
8274	cmn_err(CE_CONT, "\twp_arg: 0x%p\n", (void *)&wp->mw_arg);
8275
8276	cmn_err(CE_CONT, "\tmw_next: 0x%p\n", (void *)wp->mw_next);
8277
8278	cmn_err(CE_CONT, "\twp_q: 0x%p", (void *)wp->mw_q);
8279
8280}
8281
8282static void
8283man_print_path(man_path_t *mp)
8284{
8285	cmn_err(CE_CONT, "\tmp(0x%p)\n\n", (void *)mp);
8286
8287	if (mp == NULL)
8288		return;
8289
8290	cmn_err(CE_CONT, "\tmp_device:");
8291	man_print_dev(&mp->mp_device);
8292
8293	cmn_err(CE_CONT, "\tmp_next: 0x%p\n", (void *)mp->mp_next);
8294
8295	cmn_err(CE_CONT, "\tmp_last_knp: 0x%p\n", (void *)mp->mp_last_knp);
8296
8297	cmn_err(CE_CONT, "\tmp_lru: 0x%lx", mp->mp_lru);
8298
8299}
8300
8301void *
8302man_dbg_kzalloc(int line, size_t size, int kmflags)
8303{
8304	void *tmp;
8305
8306	tmp = kmem_zalloc(size, kmflags);
8307	MAN_DBG(MAN_KMEM, ("0x%p %lu\tzalloc'd @ %d\n", (void *)tmp,
8308	    size, line));
8309
8310	return (tmp);
8311
8312}
8313
8314void
8315man_dbg_kfree(int line, void *buf, size_t size)
8316{
8317
8318	MAN_DBG(MAN_KMEM, ("0x%p %lu\tfree'd @ %d\n", (void *)buf, size, line));
8319
8320	kmem_free(buf, size);
8321
8322}
8323
8324#endif  /* DEBUG */
8325