1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28/*
29 * Starcat Management Network Driver
30 *
31 * ****** NOTICE **** This file also resides in the SSC gate as
32 * ****** NOTICE **** usr/src/uts/sun4u/scman/scman.c. Any changes
33 * ****** NOTICE **** made here must be propogated there as well.
34 *
35 */
36
37#include <sys/types.h>
38#include <sys/proc.h>
39#include <sys/disp.h>
40#include <sys/kmem.h>
41#include <sys/stat.h>
42#include <sys/kstat.h>
43#include <sys/ksynch.h>
44#include <sys/stream.h>
45#include <sys/dlpi.h>
46#include <sys/stropts.h>
47#include <sys/strsubr.h>
48#include <sys/debug.h>
49#include <sys/conf.h>
50#include <sys/kstr.h>
51#include <sys/errno.h>
52#include <sys/ethernet.h>
53#include <sys/byteorder.h>
54#include <sys/ddi.h>
55#include <sys/sunddi.h>
56#include <sys/sunldi.h>
57#include <sys/modctl.h>
58#include <sys/strsun.h>
59#include <sys/callb.h>
60#include <sys/pci.h>
61#include <netinet/in.h>
62#include <inet/common.h>
63#include <inet/mi.h>
64#include <inet/nd.h>
65#include <sys/socket.h>
66#include <netinet/igmp_var.h>
67#include <netinet/ip6.h>
68#include <netinet/icmp6.h>
69#include <inet/ip.h>
70#include <inet/ip6.h>
71#include <sys/file.h>
72#include <sys/dman.h>
73#include <sys/autoconf.h>
74#include <sys/zone.h>
75
76extern int ddi_create_internal_pathname(dev_info_t *, char *, int, minor_t);
77
78#define	MAN_IDNAME	"dman"
79#define	DMAN_INT_PATH	"/devices/pseudo/dman@0:dman"
80#define	DMAN_PATH	"/devices/pseudo/clone@0:dman"
81#define	ERI_IDNAME	"eri"
82#define	ERI_PATH	"/devices/pseudo/clone@0:eri"
83
84#if defined(DEBUG)
85
86static void man_print_msp(manstr_t *);
87static void man_print_man(man_t *);
88static void man_print_mdp(man_dest_t *);
89static void man_print_dev(man_dev_t *);
90static void man_print_mip(mi_path_t *);
91static void man_print_mtp(mi_time_t *);
92static void man_print_mpg(man_pg_t *);
93static void man_print_path(man_path_t *);
94static void man_print_work(man_work_t *);
95
96/*
97 * Set manstr_t dlpistate (upper half of multiplexor)
98 */
99#define	SETSTATE(msp, state) \
100	MAN_DBG(MAN_DLPI, ("msp=0x%p @ %d state %s=>%s\n",		\
101		    (void *)msp, __LINE__, dss[msp->ms_dlpistate],	\
102		    dss[(state)]));					\
103		    msp->ms_dlpistate = (state);
104/*
105 * Set man_dest_t dlpistate (lower half of multiplexor)
106 */
107#define	D_SETSTATE(mdp, state) \
108	MAN_DBG(MAN_DLPI, ("dst=0x%p @ %d state %s=>%s\n",	   \
109		    (void *)mdp, __LINE__, dss[mdp->md_dlpistate], \
110		    dss[(state)]));				   \
111		    mdp->md_dlpistate = (state);
112
113static char *promisc[] = {	/* DLPI promisc Strings */
114	"not used",		/* 0x00 */
115	"DL_PROMISC_PHYS",	/* 0x01 */
116	"DL_PROMISC_SAP",	/* 0x02 */
117	"DL_PROMISC_MULTI"	/* 0x03 */
118};
119
120static char *dps[] = {			/* DLPI Primitive Strings */
121	"DL_INFO_REQ",			/* 0x00 */
122	"DL_BIND_REQ",			/* 0x01 */
123	"DL_UNBIND_REQ",		/* 0x02 */
124	"DL_INFO_ACK",			/* 0x03 */
125	"DL_BIND_ACK",			/* 0x04 */
126	"DL_ERROR_ACK",			/* 0x05 */
127	"DL_OK_ACK",			/* 0x06 */
128	"DL_UNITDATA_REQ",		/* 0x07 */
129	"DL_UNITDATA_IND",		/* 0x08 */
130	"DL_UDERROR_IND",		/* 0x09 */
131	"DL_UDQOS_REQ",			/* 0x0a */
132	"DL_ATTACH_REQ",		/* 0x0b */
133	"DL_DETACH_REQ",		/* 0x0c */
134	"DL_CONNECT_REQ",		/* 0x0d */
135	"DL_CONNECT_IND",		/* 0x0e */
136	"DL_CONNECT_RES",		/* 0x0f */
137	"DL_CONNECT_CON",		/* 0x10 */
138	"DL_TOKEN_REQ",			/* 0x11 */
139	"DL_TOKEN_ACK",			/* 0x12 */
140	"DL_DISCONNECT_REQ",		/* 0x13 */
141	"DL_DISCONNECT_IND",		/* 0x14 */
142	"DL_SUBS_UNBIND_REQ",		/* 0x15 */
143	"DL_LIARLIARPANTSONFIRE",	/* 0x16 */
144	"DL_RESET_REQ",			/* 0x17 */
145	"DL_RESET_IND",			/* 0x18 */
146	"DL_RESET_RES",			/* 0x19 */
147	"DL_RESET_CON",			/* 0x1a */
148	"DL_SUBS_BIND_REQ",		/* 0x1b */
149	"DL_SUBS_BIND_ACK",		/* 0x1c */
150	"DL_ENABMULTI_REQ",		/* 0x1d */
151	"DL_DISABMULTI_REQ",		/* 0x1e */
152	"DL_PROMISCON_REQ",		/* 0x1f */
153	"DL_PROMISCOFF_REQ",		/* 0x20 */
154	"DL_DATA_ACK_REQ",		/* 0x21 */
155	"DL_DATA_ACK_IND",		/* 0x22 */
156	"DL_DATA_ACK_STATUS_IND",	/* 0x23 */
157	"DL_REPLY_REQ",			/* 0x24 */
158	"DL_REPLY_IND",			/* 0x25 */
159	"DL_REPLY_STATUS_IND",		/* 0x26 */
160	"DL_REPLY_UPDATE_REQ",		/* 0x27 */
161	"DL_REPLY_UPDATE_STATUS_IND",	/* 0x28 */
162	"DL_XID_REQ",			/* 0x29 */
163	"DL_XID_IND",			/* 0x2a */
164	"DL_XID_RES",			/* 0x2b */
165	"DL_XID_CON",			/* 0x2c */
166	"DL_TEST_REQ",			/* 0x2d */
167	"DL_TEST_IND",			/* 0x2e */
168	"DL_TEST_RES",			/* 0x2f */
169	"DL_TEST_CON",			/* 0x30 */
170	"DL_PHYS_ADDR_REQ",		/* 0x31 */
171	"DL_PHYS_ADDR_ACK",		/* 0x32 */
172	"DL_SET_PHYS_ADDR_REQ",		/* 0x33 */
173	"DL_GET_STATISTICS_REQ",	/* 0x34 */
174	"DL_GET_STATISTICS_ACK",	/* 0x35 */
175};
176
177#define	MAN_DLPI_MAX_PRIM	0x35
178
179static char *dss[] = {			/* DLPI State Strings */
180	"DL_UNBOUND",			/* 0x00	*/
181	"DL_BIND_PENDING",		/* 0x01	*/
182	"DL_UNBIND_PENDING",		/* 0x02	*/
183	"DL_IDLE",			/* 0x03	*/
184	"DL_UNATTACHED",		/* 0x04	*/
185	"DL_ATTACH_PENDING",		/* 0x05	*/
186	"DL_DETACH_PENDING",		/* 0x06	*/
187	"DL_UDQOS_PENDING",		/* 0x07	*/
188	"DL_OUTCON_PENDING",		/* 0x08	*/
189	"DL_INCON_PENDING",		/* 0x09	*/
190	"DL_CONN_RES_PENDING",		/* 0x0a	*/
191	"DL_DATAXFER",			/* 0x0b	*/
192	"DL_USER_RESET_PENDING",	/* 0x0c	*/
193	"DL_PROV_RESET_PENDING",	/* 0x0d	*/
194	"DL_RESET_RES_PENDING",		/* 0x0e	*/
195	"DL_DISCON8_PENDING",		/* 0x0f	*/
196	"DL_DISCON9_PENDING",		/* 0x10	*/
197	"DL_DISCON11_PENDING",		/* 0x11	*/
198	"DL_DISCON12_PENDING",		/* 0x12	*/
199	"DL_DISCON13_PENDING",		/* 0x13	*/
200	"DL_SUBS_BIND_PND",		/* 0x14	*/
201	"DL_SUBS_UNBIND_PND",		/* 0x15	*/
202};
203
204static const char *lss[] = {
205	"UNKNOWN",	/* 0x0 */
206	"INIT",		/* 0x1 */
207	"GOOD",		/* 0x2 */
208	"STALE",	/* 0x3 */
209	"FAIL",		/* 0x4 */
210};
211
212static char *_mw_type[] = {
213	"OPEN_CTL",		/* 0x0 */
214	"CLOSE_CTL",		/* 0x1 */
215	"SWITCH",		/* 0x2 */
216	"PATH_UPDATE",		/* 0x3 */
217	"CLOSE",		/* 0x4 */
218	"CLOSE_STREAM",	/* 0x5 */
219	"DRATTACH",		/* 0x6 */
220	"DRDETACH",		/* 0x7 */
221	"STOP",			/* 0x8 */
222	"DRSWITCH",		/* 0x9 */
223	"KSTAT_UPDATE"		/* 0xA */
224};
225
226uint32_t		man_debug = MAN_WARN;
227
228#define	man_kzalloc(a, b)	man_dbg_kzalloc(__LINE__, a, b)
229#define	man_kfree(a, b)		man_dbg_kfree(__LINE__, a, b)
230void	*man_dbg_kzalloc(int line, size_t size, int kmflags);
231void	man_dbg_kfree(int line, void *buf, size_t size);
232
233#else	/* DEBUG */
234
235uint32_t		man_debug = 0;
236/*
237 * Set manstr_t dlpistate (upper half of multiplexor)
238 */
239#define	SETSTATE(msp, state) msp->ms_dlpistate = (state);
240/*
241 * Set man_dest_t dlpistate (lower half of multiplexor)
242 */
243#define	D_SETSTATE(mdp, state) mdp->md_dlpistate = (state);
244
245#define	man_kzalloc(a, b)	kmem_zalloc(a, b)
246#define	man_kfree(a, b)		kmem_free(a, b)
247
248#endif	/* DEBUG */
249
250#define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
251#define	DL_PROMISCON_TYPE(mp)	\
252		(((union DL_primitives *)(mp)->b_rptr)->promiscon_req.dl_level)
253#define	IOC_CMD(mp)	(((struct iocblk *)(mp)->b_rptr)->ioc_cmd)
254
255/*
256 * Start of kstat-related declarations
257 */
258#define	MK_NOT_COUNTER		(1<<0)	/* is it a counter? */
259#define	MK_ERROR		(1<<2)	/* for error statistics */
260#define	MK_NOT_PHYSICAL		(1<<3)	/* no matching physical stat */
261
262typedef struct man_kstat_info_s {
263	char		*mk_name;	/* e.g. align_errors */
264	char		*mk_physname;	/* e.g. framing (NULL for same) */
265	char		*mk_physalias;	/* e.g. framing (NULL for same) */
266	uchar_t		mk_type;	/* e.g. KSTAT_DATA_UINT32 */
267	int		mk_flags;
268} man_kstat_info_t;
269
270/*
271 * Master declaration macro, note that it uses token pasting
272 */
273#define	MK_DECLARE(name, pname, palias, bits, flags) \
274	{ name,		pname,	palias,	KSTAT_DATA_UINT ## bits, flags }
275
276/*
277 * Obsolete forms don't have the _sinceswitch forms, they are all errors
278 */
279#define	MK_OBSOLETE32(name, alias) MK_DECLARE(alias, name, alias, 32, MK_ERROR)
280#define	MK_OBSOLETE64(name, alias) MK_DECLARE(alias, name, alias, 64, MK_ERROR)
281
282/*
283 * The only non-counters don't have any other aliases
284 */
285#define	MK_NOTCOUNTER32(name) MK_DECLARE(name, name, NULL, 32, MK_NOT_COUNTER)
286#define	MK_NOTCOUNTER64(name) MK_DECLARE(name, name, NULL, 64, MK_NOT_COUNTER)
287
288/*
289 * Normal counter forms
290 */
291#define	MK_DECLARE32(name, alias) \
292	MK_DECLARE(name, name, alias, 32, 0)
293#define	MK_DECLARE64(name, alias) \
294	MK_DECLARE(name, name, alias, 64, 0)
295
296/*
297 * Error counters need special MK_ERROR flag only for the non-AP form
298 */
299#define	MK_ERROR32(name, alias) \
300	MK_DECLARE(name, name, alias, 32, MK_ERROR)
301#define	MK_ERROR64(name, alias) \
302	MK_DECLARE(name, name, alias, 64, MK_ERROR)
303
304/*
305 * These AP-specific stats are not backed by physical statistics
306 */
307#define	MK_NOTPHYS32(name) MK_DECLARE(name, NULL, NULL, 32, MK_NOT_PHYSICAL)
308#define	MK_NOTPHYS64(name) MK_DECLARE(name, NULL, NULL, 64, MK_NOT_PHYSICAL)
309
310/*
311 * START of the actual man_kstat_info declaration using above macros
312 */
313static man_kstat_info_t man_kstat_info[] = {
314	/*
315	 * Link Input/Output stats
316	 */
317	MK_DECLARE32("ipackets", NULL),
318	MK_ERROR32("ierrors", NULL),
319	MK_DECLARE32("opackets", NULL),
320	MK_ERROR32("oerrors", NULL),
321	MK_ERROR32("collisions", NULL),
322	MK_NOTCOUNTER64("ifspeed"),
323	/*
324	 * These are new MIB-II stats, per PSARC 1997/198
325	 */
326	MK_DECLARE32("rbytes", NULL),
327	MK_DECLARE32("obytes", NULL),
328	MK_DECLARE32("multircv", NULL),
329	MK_DECLARE32("multixmt", NULL),
330	MK_DECLARE32("brdcstrcv", NULL),
331	MK_DECLARE32("brdcstxmt", NULL),
332	/*
333	 * Error values
334	 */
335	MK_ERROR32("norcvbuf", NULL),
336	MK_ERROR32("noxmtbuf", NULL),
337	MK_ERROR32("unknowns", NULL),
338	/*
339	 * These are the 64-bit values, they fallback to 32-bit values
340	 */
341	MK_DECLARE64("ipackets64", "ipackets"),
342	MK_DECLARE64("opackets64", "opackets"),
343	MK_DECLARE64("rbytes64", "rbytes"),
344	MK_DECLARE64("obytes64", "obytes"),
345
346	/* New AP switching statistics */
347	MK_NOTPHYS64("man_switches"),
348	MK_NOTPHYS64("man_link_fails"),
349	MK_NOTPHYS64("man_link_stales"),
350	MK_NOTPHYS64("man_icmpv4_probes"),
351	MK_NOTPHYS64("man_icmpv6_probes"),
352
353	MK_ERROR32("align_errors", "framing"),
354	MK_ERROR32("fcs_errors", "crc"),
355	MK_ERROR32("first_collisions", NULL),
356	MK_ERROR32("multi_collisions", NULL),
357	MK_ERROR32("sqe_errors", "sqe"),
358
359	MK_ERROR32("tx_late_collisions", NULL),
360	MK_ERROR32("ex_collisions", "excollisions"),
361	MK_ERROR32("macxmt_errors", NULL),
362	MK_ERROR32("carrier_errors", "nocarrier"),
363	MK_ERROR32("toolong_errors", "buff"),
364	MK_ERROR32("macrcv_errors", NULL),
365
366	MK_OBSOLETE32("framing", "align_errors"),
367	MK_OBSOLETE32("crc", "fcs_errors"),
368	MK_OBSOLETE32("sqe", "sqe_errors"),
369	MK_OBSOLETE32("excollisions", "ex_collisions"),
370	MK_OBSOLETE32("nocarrier", "carrier_errors"),
371	MK_OBSOLETE32("buff", "toolong_errors"),
372};
373
374#define	MAN_NUMSTATS (sizeof (man_kstat_info) / sizeof (man_kstat_info_t))
375
376/*
377 * Miscellaneous ethernet stuff.
378 *
379 * MANs DL_INFO_ACK template.
380 */
381static	dl_info_ack_t man_infoack = {
382	DL_INFO_ACK,				/* dl_primitive */
383	ETHERMTU,				/* dl_max_sdu */
384	0,					/* dl_min_sdu */
385	MAN_ADDRL,				/* dl_addr_length */
386	DL_ETHER,				/* dl_mac_type */
387	0,					/* dl_reserved */
388	0,					/* dl_current_state */
389	-2,					/* dl_sap_length */
390	DL_CLDLS,				/* dl_service_mode */
391	0,					/* dl_qos_length */
392	0,					/* dl_qos_offset */
393	0,					/* dl_range_length */
394	0,					/* dl_range_offset */
395	DL_STYLE2,				/* dl_provider_style */
396	sizeof (dl_info_ack_t),			/* dl_addr_offset */
397	DL_VERSION_2,				/* dl_version */
398	ETHERADDRL,				/* dl_brdcst_addr_length */
399	sizeof (dl_info_ack_t) + MAN_ADDRL,	/* dl_brdcst_addr_offset */
400	0					/* dl_growth */
401};
402
403/*
404 * Ethernet broadcast address definition.
405 */
406static	struct ether_addr	etherbroadcast = {
407	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
408};
409
410static struct ether_addr zero_ether_addr = {
411	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
412};
413
414/*
415 * Set via MAN_SET_SC_IPADDRS ioctl.
416 */
417man_sc_ipaddrs_t	man_sc_ipaddrs = { 0xffffffffU, 0xffffffffU };
418
419/*
420 * Set via MAN_SET_SC_IP6ADDRS ioctl.
421 */
422man_sc_ip6addrs_t	man_sc_ip6addrs = { 0, 0, 0, 0, 0, 0, 0, 0 };
423
424/*
425 * IP & ICMP constants
426 */
427#ifndef	ETHERTYPE_IPV6
428#define	ETHERTYPE_IPV6 0x86DD
429#endif
430
431/*
432 * Function prototypes.
433 *
434 * Upper multiplexor functions.
435 */
436static int	man_attach(dev_info_t *, ddi_attach_cmd_t);
437static int	man_detach(dev_info_t *, ddi_detach_cmd_t);
438static int	man_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439static int	man_open(register queue_t *, dev_t *, int, int, cred_t *);
440static int	man_configure(queue_t *);
441static int	man_deconfigure(void);
442static int	man_init_dests(man_t *, manstr_t *);
443static void	man_start_dest(man_dest_t *, manstr_t *, man_pg_t *);
444static void	man_set_optimized_dest(manstr_t *);
445static int	man_close(queue_t *);
446static void	man_cancel_timers(man_adest_t *);
447static int	man_uwput(queue_t *, mblk_t *);
448static int	man_start(queue_t *, mblk_t *, eaddr_t *);
449static void	man_ioctl(queue_t *, mblk_t *);
450static void	man_set_linkcheck_time(queue_t *, mblk_t *);
451static void	man_setpath(queue_t *, mblk_t *);
452static void	man_geteaddr(queue_t *, mblk_t *);
453static void	man_set_sc_ipaddrs(queue_t *, mblk_t *);
454static void	man_set_sc_ip6addrs(queue_t *, mblk_t *);
455static int	man_get_our_etheraddr(eaddr_t *eap);
456static void	man_nd_getset(queue_t *, mblk_t *);
457static void	man_dl_ioc_hdr_info(queue_t *, mblk_t *);
458static int	man_uwsrv(queue_t *);
459static int	man_proto(queue_t *, mblk_t *);
460static int	man_udreq(queue_t *, mblk_t *);
461static void	man_areq(queue_t *, mblk_t *);
462static mblk_t	*man_alloc_physreq_mp(eaddr_t *);
463static void	man_dreq(queue_t *, mblk_t *);
464static void	man_dodetach(manstr_t *, man_work_t *);
465static void	man_dl_clean(mblk_t **);
466static void	man_breq(queue_t *, mblk_t *);
467static void	man_ubreq(queue_t *, mblk_t *);
468static void	man_ireq(queue_t *, mblk_t *);
469static void	man_ponreq(queue_t *, mblk_t *);
470static void	man_poffreq(queue_t *, mblk_t *);
471static void	man_emreq(queue_t *, mblk_t *);
472static void	man_dmreq(queue_t *, mblk_t *);
473static void	man_pareq(queue_t *, mblk_t *);
474static void	man_spareq(queue_t *, mblk_t *);
475static int	man_dlpi(manstr_t *, mblk_t *);
476static int	man_dlioc(manstr_t *, mblk_t *);
477static int	man_dl_catch(mblk_t **, mblk_t *);
478static void	man_dl_release(mblk_t **, mblk_t *);
479static int	man_match_proto(mblk_t *, mblk_t *);
480static int	man_open_ctl();
481static void	man_close_ctl();
482/*
483 * upper/lower multiplexor functions.
484 */
485static int	man_dlpi_senddown(manstr_t *, mblk_t *);
486static int	man_start_lower(man_dest_t *, mblk_t *, queue_t *, int caller);
487static int	man_lrput(queue_t *, mblk_t *);
488/*
489 * Lower multiplexor functions.
490 */
491static int	man_lwsrv(queue_t *);
492static int	man_lrsrv(queue_t *);
493static void	man_dlpi_replay(man_dest_t *, mblk_t *);
494static int	man_dlioc_replay(man_dest_t *);
495/*
496 * Link failover routines.
497 */
498static int	man_gettimer(int, man_dest_t *);
499static void	man_linkcheck_timer(void *);
500static int	man_needs_linkcheck(man_dest_t *);
501static int	man_do_autoswitch(man_dest_t *);
502static int	man_autoswitch(man_pg_t *, man_dev_t *, man_work_t *);
503static int	man_prep_dests_for_switch(man_pg_t *, man_dest_t **, int *);
504static int	man_str_uses_pg(manstr_t *, man_pg_t *);
505static void	man_do_icmp_bcast(man_dest_t *, t_uscalar_t);
506static mblk_t	*man_alloc_udreq(int, man_dladdr_t *);
507static mblk_t	*man_pinger(t_uscalar_t);
508/*
509 * Functions normally executing outside of the STREAMs perimeter.
510 */
511/*
512 * Functions supporting/processing work requests.
513 */
514static void	man_bwork(void);
515static void	man_iwork(void);		/* inside perimeter */
516void		man_work_add(man_workq_t *, man_work_t *);
517man_work_t	*man_work_alloc(int, int);
518void		man_work_free(man_work_t *);
519/*
520 * Functions implementing/supporting failover.
521 *
522 * Executed inside perimeter.
523 */
524static int	man_do_dr_attach(man_work_t *);
525static int	man_do_dr_switch(man_work_t *);
526static void	man_do_dr_detach(man_work_t *);
527static int	man_iswitch(man_work_t *);
528static void	man_ifail_dest(man_dest_t *);
529static man_dest_t *man_switch_match(man_dest_t *, int, void *);
530static void	man_add_dests(man_pg_t *);
531static void	man_reset_dlpi(void *);
532static mblk_t	*man_dup_mplist(mblk_t *);
533static mblk_t	*man_alloc_ubreq_dreq();
534/*
535 * Executed outside perimeter (us man_lock for synchronization).
536 */
537static void	man_bclose(man_adest_t *);
538static void	man_bswitch(man_adest_t *, man_work_t *);
539static int	man_plumb(man_dest_t *);
540static void	man_unplumb(man_dest_t *);
541static void	man_plink(queue_t *, mblk_t *);
542static void	man_unplink(queue_t *, mblk_t *);
543static void	man_linkrec_insert(man_linkrec_t *);
544static queue_t	*man_linkrec_find(int);
545/*
546 * Functions supporting pathgroups
547 */
548int	man_pg_cmd(mi_path_t *, man_work_t *);
549static int	man_pg_assign(man_pg_t **, mi_path_t *, int);
550static int	man_pg_create(man_pg_t **, man_pg_t **, mi_path_t *);
551static int	man_pg_unassign(man_pg_t **, mi_path_t *);
552static int	man_pg_activate(man_t *, mi_path_t *, man_work_t *);
553static int	man_pg_read(man_pg_t *, mi_path_t *);
554static man_pg_t	*man_find_path_by_dev(man_pg_t *, man_dev_t *, man_path_t **);
555static man_pg_t	*man_find_pg_by_id(man_pg_t *, int);
556static man_path_t	*man_find_path_by_ppa(man_path_t *, int);
557static man_path_t	*man_find_active_path(man_path_t *);
558static man_path_t	*man_find_alternate_path(man_path_t *);
559static void	man_path_remove(man_path_t **, man_path_t *);
560static void	man_path_insert(man_path_t **, man_path_t *);
561static void	man_path_merge(man_path_t **, man_path_t *);
562static int	man_path_kstat_init(man_path_t *);
563static void	man_path_kstat_uninit(man_path_t *);
564/*
565 * Functions supporting kstat reporting.
566 */
567static int	man_kstat_update(kstat_t *, int);
568static void	man_do_kstats(man_work_t *);
569static void	man_update_path_kstats(man_t *);
570static void 	man_update_dev_kstats(kstat_named_t *, man_path_t *);
571static void	man_sum_dests_kstats(kstat_named_t *, man_pg_t *);
572static void	man_kstat_named_init(kstat_named_t *, int);
573static int	man_kstat_byname(kstat_t *, char *, kstat_named_t *);
574static void	man_sum_kstats(kstat_named_t *, kstat_t *, kstat_named_t *);
575/*
576 * Functions supporting ndd.
577 */
578static int	man_param_register(param_t *, int);
579static int	man_pathgroups_report(queue_t *, mblk_t *, caddr_t, cred_t *);
580static void	man_preport(man_path_t *, mblk_t *);
581static int	man_set_active_path(queue_t *, mblk_t *, char *, caddr_t,
582			cred_t *);
583static int	man_get_hostinfo(queue_t *, mblk_t *, caddr_t, cred_t *);
584static char	*man_inet_ntoa(in_addr_t);
585static int	man_param_get(queue_t *, mblk_t *, caddr_t, cred_t *);
586static int	man_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
587static  void    man_param_cleanup(void);
588static  void    man_nd_free(caddr_t *nd_pparam);
589/*
590 * MAN SSC/Domain specific externs.
591 */
592extern int	man_get_iosram(manc_t *);
593extern int	man_domain_configure(void);
594extern int	man_domain_deconfigure(void);
595extern int	man_dossc_switch(uint32_t);
596extern int	man_is_on_domain;
597
598/*
599 * Driver Globals protected by inner perimeter.
600 */
601static manstr_t	*man_strup = NULL;	/* list of MAN STREAMS */
602static caddr_t	man_ndlist = NULL;	/* head of ndd var list */
603void		*man_softstate = NULL;
604
605/*
606 * Driver globals protected by man_lock.
607 */
608kmutex_t		man_lock;		/* lock protecting vars below */
609static kthread_id_t	man_bwork_id = NULL;	/* background thread ID */
610man_workq_t		*man_bwork_q;		/* bgthread work q */
611man_workq_t		*man_iwork_q;		/* inner perim (uwsrv) work q */
612static man_linkrec_t	*man_linkrec_head = NULL;	/* list of linkblks */
613ldi_handle_t		man_ctl_lh = NULL;	/* MAN control handle */
614queue_t			*man_ctl_wq = NULL;	/* MAN control rq */
615static int		man_config_state = MAN_UNCONFIGURED;
616static int		man_config_error = ENODEV;
617
618/*
619 * These parameters are accessed via ndd to report the link configuration
620 * for the MAN driver. They can also be used to force configuration changes.
621 */
622#define	MAN_NOTUSR	0x0f000000
623
624/* ------------------------------------------------------------------------- */
625
626static  param_t	man_param_arr[] = {
627	/* min		max		value		name */
628	{  0,		0xFFFF,		0,		"man_debug_level"},
629};
630
631#define	MAN_NDD_GETABLE	1
632#define	MAN_NDD_SETABLE	2
633
634static  uint32_t	man_param_display[] = {
635/* DISPLAY */
636MAN_NDD_SETABLE,	/* man_debug_level */
637};
638
639/*
640 * STREAMs information.
641 */
642static struct module_info man_m_info = {
643	MAN_IDNUM,			/* mi_idnum */
644	MAN_IDNAME,			/* mi_idname */
645	MAN_MINPSZ,			/* mi_minpsz */
646	MAN_MAXPSZ,			/* mi_maxpsz */
647	MAN_HIWAT,			/* mi_hiwat */
648	MAN_LOWAT			/* mi_lowat */
649};
650
651/*
652 * Upper read queue does not do anything.
653 */
654static struct qinit man_urinit = {
655	NULL,				/* qi_putp */
656	NULL,				/* qi_srvp */
657	man_open,			/* qi_qopen */
658	man_close,			/* qi_qclose */
659	NULL,				/* qi_qadmin */
660	&man_m_info,			/* qi_minfo */
661	NULL				/* qi_mstat */
662};
663
664static struct qinit man_lrinit = {
665	man_lrput,			/* qi_putp */
666	man_lrsrv,			/* qi_srvp */
667	man_open,			/* qi_qopen */
668	man_close,			/* qi_qclose */
669	NULL,				/* qi_qadmin */
670	&man_m_info,			/* qi_minfo */
671	NULL				/* qi_mstat */
672};
673
674static struct qinit man_uwinit = {
675	man_uwput,			/* qi_putp */
676	man_uwsrv,			/* qi_srvp */
677	man_open,			/* qi_qopen */
678	man_close,			/* qi_qclose */
679	NULL,				/* qi_qadmin */
680	&man_m_info,			/* qi_minfo */
681	NULL				/* qi_mstat */
682};
683
684static struct qinit man_lwinit = {
685	NULL,				/* qi_putp */
686	man_lwsrv,			/* qi_srvp */
687	man_open,			/* qi_qopen */
688	man_close,			/* qi_qclose */
689	NULL,				/* qi_qadmin */
690	&man_m_info,			/* qi_minfo */
691	NULL				/* qi_mstat */
692};
693
694static struct streamtab man_maninfo = {
695	&man_urinit,			/* st_rdinit */
696	&man_uwinit,			/* st_wrinit */
697	&man_lrinit,			/* st_muxrinit */
698	&man_lwinit			/* st_muxwrinit */
699};
700
701
702/*
703 * Module linkage information for the kernel.
704 *
705 * Locking Theory:
706 * 	D_MTPERMOD -	Only an inner perimeter: All routines single
707 * 			threaded (except put, see below).
708 *	D_MTPUTSHARED -	Put routines enter inner perimeter shared (not
709 *			exclusive) for concurrency/performance reasons.
710 *
711 *	Anyone who needs exclusive outer perimeter permission (changing
712 *	global data structures) does so via qwriter() calls. The
713 *	background thread does all his work outside of perimeter and
714 *	submits work via qtimeout() when data structures need to be
715 *	modified.
716 */
717
718#define	MAN_MDEV_FLAGS	(D_MP|D_MTPERMOD|D_MTPUTSHARED)
719
720DDI_DEFINE_STREAM_OPS(man_ops, nulldev, nulldev, man_attach,
721    man_detach, nodev, man_info, MAN_MDEV_FLAGS, &man_maninfo,
722    ddi_quiesce_not_supported);
723
724extern int nodev(), nulldev();
725
726static struct modldrv modldrv = {
727	&mod_driverops, 	/* Module type.  This one is a pseudo driver */
728	"MAN MetaDriver",
729	&man_ops,		/* driver ops */
730};
731
732static struct modlinkage modlinkage = {
733	MODREV_1,
734	(void *) &modldrv,
735	NULL
736};
737
738
739/* Virtual Driver loader entry points */
740
741int
742_init(void)
743{
744	int		status = DDI_FAILURE;
745
746	MAN_DBG(MAN_INIT, ("_init:"));
747
748	status = mod_install(&modlinkage);
749	if (status != 0) {
750		cmn_err(CE_WARN, "man_init: mod_install failed"
751		    " error = %d", status);
752		return (status);
753	}
754
755	status = ddi_soft_state_init(&man_softstate, sizeof (man_t), 4);
756	if (status != 0) {
757		cmn_err(CE_WARN, "man_init: ddi_soft_state_init failed"
758		    " error = %d", status);
759		(void) mod_remove(&modlinkage);
760		return (status);
761	}
762
763	man_bwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
764	man_iwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
765
766	mutex_init(&man_lock, NULL, MUTEX_DRIVER, NULL);
767	cv_init(&man_bwork_q->q_cv, NULL, CV_DRIVER, NULL);
768	cv_init(&man_iwork_q->q_cv, NULL, CV_DRIVER, NULL);
769
770	return (0);
771}
772
773/*
774 * _info is called by modinfo().
775 */
776int
777_info(struct modinfo *modinfop)
778{
779	int	status;
780
781	MAN_DBG(MAN_INIT, ("_info:"));
782
783	status = mod_info(&modlinkage, modinfop);
784
785	MAN_DBG(MAN_INIT, ("_info: returns %d", status));
786
787	return (status);
788}
789
790/*
791 * _fini called by modunload() just before driver is unloaded from memory.
792 */
793int
794_fini(void)
795{
796	int status = 0;
797
798	MAN_DBG(MAN_INIT, ("_fini:"));
799
800
801	/*
802	 * The only upper stream left should be man_ctl_lh. Note that
803	 * man_close (upper stream) is synchronous (i.e. it waits for
804	 * all STREAMS framework associated with the upper stream to be
805	 * torn down). This guarantees that man_ctl_lh will never become
806	 * NULL until noone is around to notice. This assumption is made
807	 * in a few places like man_plumb, man_unplumb, etc.
808	 */
809	if (man_strup && (man_strup->ms_next != NULL))
810		return (EBUSY);
811
812	/*
813	 * Deconfigure the driver.
814	 */
815	status = man_deconfigure();
816	if (status)
817		goto exit;
818
819	/*
820	 * need to detach every instance of the driver
821	 */
822	status = mod_remove(&modlinkage);
823	if (status != 0)
824		goto exit;
825
826	ddi_soft_state_fini(&man_softstate);
827
828	/*
829	 * Free up locks.
830	 */
831	mutex_destroy(&man_lock);
832	cv_destroy(&man_bwork_q->q_cv);
833	cv_destroy(&man_iwork_q->q_cv);
834
835	man_kfree(man_bwork_q, sizeof (man_workq_t));
836	man_kfree(man_iwork_q, sizeof (man_workq_t));
837
838exit:
839
840	MAN_DBG(MAN_INIT, ("_fini: returns %d", status));
841
842	return (status);
843}
844
845/*
846 * Deconfigure the MAN driver.
847 */
848static int
849man_deconfigure()
850{
851	man_work_t	*wp;
852	int		status = 0;
853
854	MAN_DBG(MAN_CONFIG, ("man_deconfigure:\n"));
855
856	mutex_enter(&man_lock);
857
858	if (man_is_on_domain) {
859		status = man_domain_deconfigure();
860		if (status != 0)
861			goto exit;
862	}
863
864	man_param_cleanup();	/* Free up NDD resources */
865
866	/*
867	 * I may have to handle straggling work requests. Just qwait?
868	 * or cvwait? Called from _fini - TBD
869	 */
870	ASSERT(man_bwork_q->q_work == NULL);
871	ASSERT(man_iwork_q->q_work == NULL);
872
873	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting CLOSE_CTL\n"));
874
875	if (man_ctl_lh != NULL) {
876		wp = man_work_alloc(MAN_WORK_CLOSE_CTL, KM_SLEEP);
877		wp->mw_flags = MAN_WFLAGS_CVWAITER;
878		man_work_add(man_bwork_q, wp);
879
880		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
881			cv_wait(&wp->mw_cv, &man_lock);
882		}
883		man_work_free(wp);
884	}
885
886	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting STOP\n"));
887	if (man_bwork_id != NULL) {
888
889		wp = man_work_alloc(MAN_WORK_STOP, KM_SLEEP);
890		wp->mw_flags = MAN_WFLAGS_CVWAITER;
891		man_work_add(man_bwork_q, wp);
892
893		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
894			cv_wait(&wp->mw_cv, &man_lock);
895		}
896		man_work_free(wp);
897	}
898	man_config_state = MAN_UNCONFIGURED;
899
900exit:
901	mutex_exit(&man_lock);
902
903	MAN_DBG(MAN_CONFIG, ("man_deconfigure: returns %d\n", status));
904
905	return (status);
906}
907
908/*
909 * man_attach - allocate resources and attach an instance of the MAN driver
910 * The <man>.conf file controls how many instances of the MAN driver are
911 * available.
912 *
913 *	dip - devinfo of node
914 * 	cmd - one of DDI_ATTACH | DDI_RESUME
915 *
916 *	returns	- success - DDI_SUCCESS
917 *		- failure - DDI_FAILURE
918 */
919static int
920man_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
921{
922	man_t		*manp;		/* per instance data */
923	uchar_t		flag = KSTAT_FLAG_WRITABLE; /* support netstat -kc */
924	kstat_t		*ksp;
925	int		minor_node_created = 0;
926	int		instance;
927	eaddr_t		man_eaddr;
928
929	MAN_DBG(MAN_INIT, ("man_attach: \n"));
930
931	if (cmd != DDI_ATTACH) {
932		MAN_DBG(MAN_INIT, ("man_attach: bad command %d\n", cmd));
933		return (DDI_FAILURE);
934	}
935
936	if (man_get_our_etheraddr(&man_eaddr))
937		return (DDI_FAILURE);
938
939	instance = ddi_get_instance(dip);
940
941	/*
942	 * we assume that instance is always equal to zero.
943	 * and there will always only be one instance.
944	 * this is done because when dman opens itself via DMAN_INT_PATH,
945	 * the path assumes that the instance number is zero.
946	 * if we ever need to support multiple instances of the dman
947	 * driver or non-zero instances, this will have to change.
948	 */
949	ASSERT(instance == 0);
950
951	/*
952	 * Allocate per device info pointer and link in to global list of
953	 * MAN devices.
954	 */
955	if ((ddi_soft_state_zalloc(man_softstate, instance) != DDI_SUCCESS) ||
956	    ((manp = ddi_get_soft_state(man_softstate, instance)) == NULL)) {
957		cmn_err(CE_WARN, "man_attach: cannot zalloc soft state!");
958		return (DDI_FAILURE);
959	}
960
961	ddi_set_driver_private(dip, manp);
962	manp->man_dip = dip;
963	manp->man_meta_major = ddi_driver_major(dip);
964	manp->man_meta_ppa = instance;
965
966	/*
967	 * Set ethernet address. Note that this address is duplicated
968	 * at md_src_eaddr.
969	 */
970	ether_copy(&man_eaddr, &manp->man_eaddr);
971	manp->man_eaddr_v = 1;
972
973	MAN_DBG(MAN_INIT, ("man_attach: set ether to %s",
974	    ether_sprintf(&manp->man_eaddr)));
975
976	/*
977	 * Initialize failover-related fields (timers and such),
978	 * taking values from properties if present.
979	 */
980	manp->man_init_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
981	    "init_time", MAN_INIT_TIME);
982
983	manp->man_linkcheck_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
984	    "linkcheck_time", MAN_LINKCHECK_TIME);
985
986	manp->man_linkstale_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
987	    "man_linkstale_time", MAN_LINKSTALE_TIME);
988
989	manp->man_linkstale_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
990	    "man_linkstale_retries", MAN_LINKSTALE_RETRIES);
991
992	manp->man_dr_delay = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
993	    "man_dr_delay", MAN_DR_DELAY);
994
995	manp->man_dr_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
996	    "man_dr_retries", MAN_DR_RETRIES);
997
998	manp->man_kstat_waittime = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
999	    "man_kstat_waittime", MAN_KSTAT_WAITTIME);
1000
1001	manp->man_dlpireset_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
1002	    "man_dlpireset_time", MAN_DLPIRESET_TIME);
1003
1004	if (ddi_create_internal_pathname(dip, MAN_IDNAME, S_IFCHR,
1005	    ddi_get_instance(dip)) == DDI_SUCCESS) {
1006		minor_node_created = 1;
1007	} else {
1008		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1009		    ddi_get_instance(dip));
1010		goto exit;
1011	}
1012
1013	if (ddi_create_minor_node(dip, MAN_IDNAME, S_IFCHR,
1014	    ddi_get_instance(dip), DDI_NT_NET, CLONE_DEV) == DDI_SUCCESS) {
1015		minor_node_created = 1;
1016	} else {
1017		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1018		    ddi_get_instance(dip));
1019		goto exit;
1020	}
1021
1022	/*
1023	 * Allocate meta kstat_t for this instance of the driver.
1024	 * Note that each of man_path_t keeps track of the kstats
1025	 * for the real devices via mp_last_knp.
1026	 */
1027#ifdef	kstat
1028	flag |= KSTAT_FLAG_PERSISTENT;
1029#endif
1030	ksp = kstat_create(MAN_IDNAME, ddi_get_instance(dip), NULL, "net",
1031	    KSTAT_TYPE_NAMED, MAN_NUMSTATS, flag);
1032
1033	if (ksp == NULL) {
1034		cmn_err(CE_WARN, "man_attach(%d): kstat_create failed"
1035		    " - manp(0x%p)", manp->man_meta_ppa,
1036		    (void *)manp);
1037		goto exit;
1038	}
1039
1040	man_kstat_named_init(ksp->ks_data, MAN_NUMSTATS);
1041	ksp->ks_update = man_kstat_update;
1042	ksp->ks_private = (void *) manp;
1043	manp->man_ksp = ksp;
1044	kstat_install(manp->man_ksp);
1045
1046	ddi_report_dev(dip);
1047
1048	MAN_DBG(MAN_INIT, ("man_attach(%d) returns DDI_SUCCESS",
1049	    ddi_get_instance(dip)));
1050
1051	return (DDI_SUCCESS);
1052
1053exit:
1054	if (minor_node_created)
1055		ddi_remove_minor_node(dip, NULL);
1056	ddi_set_driver_private(dip, NULL);
1057	ddi_soft_state_free(man_softstate, instance);
1058
1059	MAN_DBG(MAN_INIT, ("man_attach(%d) eaddr returns DDI_FAILIRE",
1060	    ddi_get_instance(dip)));
1061
1062	return (DDI_FAILURE);
1063
1064}
1065
1066static int
1067man_get_our_etheraddr(eaddr_t *eap)
1068{
1069	manc_t	manc;
1070	int	status = 0;
1071
1072	if (man_is_on_domain) {
1073		if (status = man_get_iosram(&manc))
1074			return (status);
1075		ether_copy(&manc.manc_dom_eaddr, eap);
1076	} else {
1077		(void) localetheraddr((struct ether_addr *)NULL, eap);
1078	}
1079
1080	return (status);
1081}
1082
1083/*
1084 * man_detach - detach an instance of a driver
1085 *
1086 *	dip - devinfo of node
1087 * 	cmd - one of DDI_DETACH | DDI_SUSPEND
1088 *
1089 *	returns	- success - DDI_SUCCESS
1090 *		- failure - DDI_FAILURE
1091 */
1092static int
1093man_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1094{
1095	register man_t	*manp;		/* per instance data */
1096	int		instance;
1097
1098	MAN_DBG(MAN_INIT, ("man_detach(%d):\n", ddi_get_instance(dip)));
1099
1100	if (cmd != DDI_DETACH) {
1101		MAN_DBG(MAN_INIT, ("man_detach: bad command %d\n", cmd));
1102		return (DDI_FAILURE);
1103	}
1104
1105	if (dip == NULL) {
1106		MAN_DBG(MAN_INIT, ("man_detach: dip == NULL\n"));
1107		return (DDI_FAILURE);
1108	}
1109
1110	instance = ddi_get_instance(dip);
1111
1112	mutex_enter(&man_lock);
1113
1114	manp = (man_t *)ddi_get_soft_state(man_softstate, instance);
1115	if (manp == NULL) {
1116		mutex_exit(&man_lock);
1117
1118		cmn_err(CE_WARN, "man_detach: unable to get softstate"
1119		    " for instance = %d, dip = 0x%p!\n", instance,
1120		    (void *)dip);
1121		return (DDI_FAILURE);
1122	}
1123
1124	if (manp->man_refcnt != 0) {
1125		mutex_exit(&man_lock);
1126
1127		cmn_err(CE_WARN, "man_detach: %s%d refcnt %d", MAN_IDNAME,
1128		    instance, manp->man_refcnt);
1129		MAN_DBGCALL(MAN_INIT, man_print_man(manp));
1130
1131		return (DDI_FAILURE);
1132	}
1133
1134	ddi_remove_minor_node(dip, NULL);
1135
1136	mutex_exit(&man_lock);
1137
1138	kstat_delete(manp->man_ksp);
1139	ddi_soft_state_free(man_softstate, instance);
1140	ddi_set_driver_private(dip, NULL);
1141
1142	MAN_DBG(MAN_INIT, ("man_detach returns DDI_SUCCESS"));
1143
1144	return (DDI_SUCCESS);
1145}
1146
1147/*
1148 * man_info:
1149 *	As a standard DLPI style-2, man_info() should always return
1150 *	DDI_FAILURE.
1151 *
1152 *	However, man_open() has special treatment for a direct open
1153 *	via kstr_open() without going through the CLONE driver.
1154 *	To make this special kstr_open() work, we need to map
1155 *	minor of 0 to instance 0.
1156 */
1157/*ARGSUSED*/
1158static int
1159man_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1160{
1161	minor_t minor;
1162
1163	switch (infocmd) {
1164	case DDI_INFO_DEVT2DEVINFO:
1165		break;
1166
1167	case DDI_INFO_DEVT2INSTANCE:
1168		minor = getminor((dev_t)arg);
1169		if (minor == 0) {
1170			*result = (void *)(uintptr_t)minor;
1171			return (DDI_SUCCESS);
1172		}
1173		break;
1174	default:
1175		break;
1176	}
1177	return (DDI_FAILURE);
1178}
1179
1180/* Standard Device Driver entry points */
1181
1182/*
1183 * man_open - open the device
1184 *
1185 *	rq - upper read queue of the stream
1186 *	devp - pointer to a device number
1187 *	flag - information passed from the user program open(2) system call
1188 *	sflag - stream flags
1189 *	credp - pointer to the cred(9S) user credential structure
1190 *
1191 *	returns	- success - 0
1192 *		- failure - errno value for failure
1193 */
1194/*ARGSUSED*/
1195static int
1196man_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
1197{
1198	int			minordev = -1;
1199	manstr_t		*msp;
1200	manstr_t		*tsp;
1201	manstr_t		**prevmsp;
1202	int			status = 0;
1203
1204	MAN_DBG(MAN_OCLOSE, ("man_open: rq(0x%p) sflag(0x%x)\n",
1205	    (void *)rq, sflag));
1206
1207	ASSERT(rq);
1208	ASSERT(sflag != MODOPEN);
1209
1210	/*
1211	 * reopen; q_ptr set to msp at open completion.
1212	 */
1213	if (rq->q_ptr) {
1214		return (0);
1215	}
1216
1217	/*
1218	 * Allocate and initialize manstr_t for this device.
1219	 */
1220	msp = man_kzalloc(sizeof (manstr_t), KM_SLEEP);
1221	SETSTATE(msp, DL_UNATTACHED);
1222	msp->ms_meta_ppa = -1;
1223	msp->ms_rq = rq;
1224	rq->q_ptr = WR(rq)->q_ptr = msp;
1225
1226	/*
1227	 * Get the MAN driver configured on 1st open.  Note that the only way
1228	 * we get sflag != CLONEOPEN is via the call in man_plumbctl().  All
1229	 * CLONEOPEN calls to man_open will be via the file system
1230	 * device node /dev/man, a pseudo clone device.
1231	 */
1232
1233	qprocson(rq);
1234
1235	if (sflag == CLONEOPEN && man_config_state != MAN_CONFIGURED) {
1236		/*
1237		 * First open calls man_configure. Everyone qwaits until
1238		 * we get it open. See man_open_ctl() comments for mutex
1239		 * lock/synchronization info.
1240		 */
1241
1242		mutex_enter(&man_lock);
1243
1244		if (man_config_state == MAN_UNCONFIGURED) {
1245			man_config_state = MAN_CONFIGURING;
1246			mutex_exit(&man_lock);
1247			status = man_configure(rq);
1248			if (status != 0)
1249				goto exit;
1250		} else {
1251			while (man_config_state == MAN_CONFIGURING) {
1252
1253				mutex_exit(&man_lock);
1254				status = qwait_sig(rq);
1255
1256				if (status == 0) {
1257					status = EINTR;
1258					goto exit;
1259				}
1260
1261				mutex_enter(&man_lock);
1262			}
1263			mutex_exit(&man_lock);
1264
1265			if (man_config_error) {
1266				status = man_config_error;
1267				goto exit;
1268			}
1269		}
1270	}
1271
1272	/*
1273	 * Determine minor device number. man_open serialized by
1274	 * D_MTPERMOD.
1275	 */
1276	prevmsp = &man_strup;
1277	if (sflag == CLONEOPEN) {
1278
1279		minordev = 0;
1280		for (; (tsp = *prevmsp) != NULL; prevmsp = &tsp->ms_next) {
1281			if (minordev < tsp->ms_minor)
1282				break;
1283			minordev++;
1284		}
1285		*devp = makedevice(getmajor(*devp), minordev);
1286
1287	} else {
1288		/*
1289		 * Should only get here from man_plumbctl().
1290		 */
1291		/*LINTED E_ASSIGN_UINT_TO_SIGNED_INT*/
1292		minordev = getminor(*devp);
1293
1294		/*
1295		 * No need to protect this here as all opens are
1296		 * qwaiting, and the bgthread (who is doing this open)
1297		 * is the only one who mucks with this variable.
1298		 */
1299		man_ctl_wq = WR(rq);
1300
1301		ASSERT(minordev == 0);	/* TBD delete this */
1302	}
1303
1304	msp->ms_meta_maj = getmajor(*devp);
1305	msp->ms_minor = minordev;
1306	if (minordev == 0)
1307		msp->ms_flags = MAN_SFLAG_CONTROL;
1308
1309	/*
1310	 * Link new entry into global list of active entries.
1311	 */
1312	msp->ms_next = *prevmsp;
1313	*prevmsp = msp;
1314
1315
1316	/*
1317	 * Disable automatic enabling of our write service procedure.
1318	 * We control this explicitly.
1319	 */
1320	noenable(WR(rq));
1321
1322exit:
1323	MAN_DBG(MAN_OCLOSE, ("man_open: exit rq(0x%p) minor %d errno %d\n",
1324	    (void *)rq, minordev, status));
1325
1326	/*
1327	 * Clean up on error.
1328	 */
1329	if (status) {
1330		qprocsoff(rq);
1331		rq->q_ptr = WR(rq)->q_ptr = NULL;
1332		man_kfree((char *)msp, sizeof (manstr_t));
1333	} else
1334		(void) qassociate(rq, -1);
1335
1336	return (status);
1337}
1338
1339/*
1340 * Get the driver configured.  Called from first man_open with exclusive
1341 * inner perimeter.
1342 */
1343static int
1344man_configure(queue_t *rq)
1345{
1346	man_work_t	*wp;
1347	int		status = 0;
1348
1349	MAN_DBG(MAN_CONFIG, ("man_configure:"));
1350
1351	/*
1352	 * Initialize NDD parameters.
1353	 */
1354	if (!man_ndlist &&
1355	    !man_param_register(man_param_arr, A_CNT(man_param_arr))) {
1356		cmn_err(CE_WARN, "man_configure: man_param_register failed!");
1357		man_config_error = ENOMEM;
1358		goto exit;
1359	}
1360
1361	mutex_enter(&man_lock);
1362
1363	/*
1364	 * Start up background thread.
1365	 */
1366	if (man_bwork_id == NULL)
1367		man_bwork_id = thread_create(NULL, 2 * DEFAULTSTKSZ,
1368		    man_bwork, NULL, 0, &p0, TS_RUN, minclsyspri);
1369
1370	/*
1371	 * Submit work to get control stream opened. Qwait until its
1372	 * done. See man_open_ctl for mutex lock/synchronization info.
1373	 */
1374
1375	if (man_ctl_lh == NULL) {
1376		wp = man_work_alloc(MAN_WORK_OPEN_CTL, KM_SLEEP);
1377		wp->mw_flags |= MAN_WFLAGS_QWAITER;
1378		wp->mw_q = WR(rq);
1379
1380		/*
1381		 * Submit work and wait. When man_open_ctl exits
1382		 * man_open, it will cause qwait below to return.
1383		 */
1384		man_work_add(man_bwork_q, wp);
1385		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
1386			mutex_exit(&man_lock);
1387			qwait(rq);
1388			mutex_enter(&man_lock);
1389		}
1390		status = wp->mw_status;
1391		man_work_free(wp);
1392
1393	}
1394	mutex_exit(&man_lock);
1395
1396	/*
1397	 * If on domain, setup IOSRAM and build the pathgroups
1398	 * automatically.
1399	 */
1400	if ((status == 0) && man_is_on_domain)
1401		status = man_domain_configure();
1402
1403exit:
1404	mutex_enter(&man_lock);
1405
1406	man_config_error = status;
1407	if (status != 0)
1408		man_config_state = MAN_UNCONFIGURED;
1409	else
1410		man_config_state = MAN_CONFIGURED;
1411
1412	mutex_exit(&man_lock);
1413
1414	MAN_DBG(MAN_CONFIG, ("man_configure: returns %d\n", status));
1415
1416	return (status);
1417}
1418
1419/*
1420 * man_close - close the device
1421 *
1422 *	rq - upper read queue of the stream
1423 *
1424 *	returns	- success - 0
1425 *		- failure - errno value for failure
1426 */
1427static int
1428man_close(queue_t *rq)
1429{
1430	manstr_t		*close_msp;
1431	manstr_t		*msp;
1432
1433	MAN_DBG(MAN_OCLOSE, ("man_close: rq(0x%p)\n", (void *)rq));
1434
1435	qprocsoff(rq);
1436	close_msp = (manstr_t *)rq->q_ptr;
1437
1438	/*
1439	 * Unlink the per-Stream entry from the active list and free it.
1440	 */
1441	if (close_msp == man_strup)
1442		man_strup = close_msp->ms_next;
1443	else {
1444		for (msp = man_strup; msp && msp->ms_next != close_msp; )
1445			msp = msp->ms_next;
1446
1447		if (msp == NULL) {
1448			cmn_err(CE_WARN, "man_close: no stream!");
1449			return (ENODEV);
1450		}
1451
1452		msp->ms_next = close_msp->ms_next;
1453	}
1454
1455	if (close_msp->ms_dests != NULL) {
1456		/*
1457		 * Still DL_ATTACHED
1458		 */
1459		man_work_t *wp;
1460
1461		wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_SLEEP);
1462		man_dodetach(close_msp, wp);
1463	}
1464
1465	if (close_msp->ms_flags & MAN_SFLAG_CONTROL) {
1466		/*
1467		 * Driver about to unload.
1468		 */
1469		man_ctl_wq = NULL;
1470	}
1471
1472	rq->q_ptr = WR(rq)->q_ptr = NULL;
1473	man_kfree((char *)close_msp, sizeof (manstr_t));
1474	(void) qassociate(rq, -1);
1475
1476	MAN_DBG(MAN_OCLOSE, ("man_close: exit\n"));
1477
1478	return (0);
1479}
1480
1481/*
1482 * Ask bgthread to tear down lower stream and qwait
1483 * until its done.
1484 */
1485static void
1486man_dodetach(manstr_t *msp, man_work_t *wp)
1487{
1488	man_dest_t	*mdp;
1489	int		i;
1490	mblk_t		*mp;
1491
1492	mdp = msp->ms_dests;
1493	msp->ms_dests = NULL;
1494	msp->ms_destp = NULL;
1495
1496	/*
1497	 * Excise lower dests array, set it closing and hand it to
1498	 * background thread to dispose of.
1499	 */
1500	for (i = 0; i < MAN_MAX_DESTS; i++) {
1501
1502		mdp[i].md_state |= MAN_DSTATE_CLOSING;
1503		mdp[i].md_msp = NULL;
1504		mdp[i].md_rq = NULL;
1505
1506		if (mdp[i].md_lc_timer_id != 0) {
1507			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
1508			mdp[i].md_lc_timer_id = 0;
1509		}
1510		if (mdp[i].md_bc_id != 0) {
1511			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
1512			mdp[i].md_bc_id = 0;
1513		}
1514
1515		mutex_enter(&mdp[i].md_lock);
1516		while ((mp = mdp[i].md_dmp_head) != NULL) {
1517			mdp[i].md_dmp_head = mp->b_next;
1518			mp->b_next = NULL;
1519			freemsg(mp);
1520		}
1521		mdp[i].md_dmp_count = 0;
1522		mdp[i].md_dmp_tail = NULL;
1523		mutex_exit(&mdp[i].md_lock);
1524	}
1525
1526	/*
1527	 * Dump any DL type messages previously caught.
1528	 */
1529	man_dl_clean(&msp->ms_dl_mp);
1530	man_dl_clean(&msp->ms_dlioc_mp);
1531
1532	/*
1533	 * We need to clear fast path flag when dlioc messages are cleaned.
1534	 */
1535	msp->ms_flags &= ~MAN_SFLAG_FAST;
1536
1537	/*
1538	 * MAN_WORK_CLOSE_STREAM work request preallocated by caller.
1539	 */
1540	ASSERT(wp->mw_type == MAN_WORK_CLOSE_STREAM);
1541	ASSERT(mdp != NULL);
1542	wp->mw_arg.a_mdp = mdp;
1543	wp->mw_arg.a_ndests = MAN_MAX_DESTS;
1544	wp->mw_arg.a_pg_id = -1;	/* Don't care */
1545
1546	mutex_enter(&man_lock);
1547	man_work_add(man_bwork_q, wp);
1548	msp->ms_manp->man_refcnt--;
1549	mutex_exit(&man_lock);
1550
1551	msp->ms_manp = NULL;
1552
1553}
1554
1555
1556/*
1557 * man_uwput - handle DLPI messages issued from upstream, the write
1558 * side of the upper half of multiplexor. Called with shared access to
1559 * the inner perimeter.
1560 *
1561 *	wq - upper write queue of mxx
1562 *	mp - mblk ptr to DLPI request
1563 */
1564static int
1565man_uwput(register queue_t *wq, register mblk_t *mp)
1566{
1567	register manstr_t	*msp;		/* per stream data */
1568	register man_t		*manp;		/* per instance data */
1569
1570	msp = (manstr_t *)wq->q_ptr;
1571
1572	MAN_DBG(MAN_UWPUT, ("man_uwput: wq(0x%p) mp(0x%p) db_type(0x%x)"
1573	    " msp(0x%p)\n",
1574	    (void *)wq, (void *)mp, DB_TYPE(mp), (void *)msp));
1575#if DEBUG
1576	if (man_debug & MAN_UWPUT) {
1577		if (DB_TYPE(mp) == M_IOCTL) {
1578			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1579			MAN_DBG(MAN_UWPUT,
1580			    ("man_uwput: M_IOCTL ioc_cmd(0x%x)\n",
1581			    iocp->ioc_cmd));
1582		} else if (DB_TYPE(mp) == M_CTL) {
1583			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1584			MAN_DBG(MAN_UWPUT,
1585			    ("man_uwput: M_CTL ioc_cmd(0x%x)\n",
1586			    iocp->ioc_cmd));
1587		}
1588	}
1589#endif	/* DEBUG */
1590
1591
1592	switch (DB_TYPE(mp)) {
1593	case M_DATA:
1594		manp = msp->ms_manp;
1595
1596		if (((msp->ms_flags & (MAN_SFLAG_FAST | MAN_SFLAG_RAW)) == 0) ||
1597		    (msp->ms_dlpistate != DL_IDLE) ||
1598		    (manp == NULL)) {
1599
1600			merror(wq, mp, EPROTO);
1601			break;
1602		}
1603
1604		if (wq->q_first) {
1605			(void) putq(wq, mp);
1606			qenable(wq);
1607		} else {
1608			ehdr_t	*ep = (ehdr_t *)mp->b_rptr;
1609
1610			(void) man_start(wq, mp, &ep->ether_dhost);
1611		}
1612		break;
1613
1614	case M_PROTO:
1615	case M_PCPROTO:
1616		if ((DL_PRIM(mp) == DL_UNITDATA_IND) && !wq->q_first) {
1617			(void) man_udreq(wq, mp);
1618		} else {
1619			(void) putq(wq, mp);
1620			qenable(wq);
1621		}
1622		break;
1623
1624	case M_IOCTL:
1625	case M_IOCDATA:
1626		qwriter(wq, mp, man_ioctl, PERIM_INNER);
1627		break;
1628
1629	case M_CTL:
1630		freemsg(mp);
1631		break;
1632
1633	case M_FLUSH:
1634		MAN_DBG(MAN_UWPUT, ("man_wput: M_FLUSH\n"));
1635		if (*mp->b_rptr & FLUSHW)
1636			flushq(wq, FLUSHDATA);
1637		if (*mp->b_rptr & FLUSHR) {
1638			flushq(RD(wq), FLUSHDATA);
1639			*mp->b_rptr &= ~FLUSHW;
1640			qreply(wq, mp);
1641		} else {
1642			freemsg(mp);
1643		}
1644		break;
1645
1646	default:
1647		MAN_DBG(MAN_WARN,
1648		    ("man_uwput: illegal mblk(0x%p) type(0x%x)\n",
1649		    (void *)mp, DB_TYPE(mp)));
1650		freemsg(mp);
1651		break;
1652	} /* End switch */
1653
1654	MAN_DBG(MAN_UWPUT, ("man_uwput: exit wq(0x%p) mp(0x%p)\n",
1655	    (void *)wq, (void *)mp));
1656
1657	return (0);
1658}
1659
1660/*
1661 * man_start - handle data messages issued from upstream.  Send down
1662 * to particular man_dest based on ether_addr, otherwise send out to all
1663 * valid man_dests.
1664 *
1665 *	wq - upper write queue of mxx
1666 *	mp - mblk ptr to DLPI request
1667 * 	caller - Caller ID for decision making on canput failure
1668 *
1669 * Returns:
1670 *	0	- Data xmitted or No flow control situation detected.
1671 *	1	- Flow control situation detected.
1672 *
1673 * STREAMS Flow Control: can be used if there is only one destination
1674 * for a stream (1 to 1 multiplexor). In this case, we will use the upper
1675 * write queue to store mblks when in flow control. If there are multiple
1676 * destinations, we cannot use the STREAMs based flow control (1 to many
1677 * multiplexor). In this case, we will use the lower write queue to store
1678 * mblks when in flow control. Since destinations come and go, we may
1679 * transition between 1-to-1 and 1-to-m. So it may be the case that we have
1680 * some mblks stored on the upper queue, and some on the lower queue. However,
1681 * we will never send mblks out of order. See man_uwput and man_start_lower().
1682 *
1683 * A simple flow control mechanism is implemented for the deferred mblk list,
1684 * as this list is expected to be used temporarily for a very short
1685 * period required for switching paths. This flow control mechanism is
1686 * used only as a defensive approach to avoid infinite growth of this list.
1687 */
1688static int
1689man_start(register queue_t *wq, register mblk_t *mp, eaddr_t *eap)
1690{
1691	register manstr_t	*msp;		/* per stream data */
1692	register man_dest_t	*mdp = NULL;	/* destination */
1693	mblk_t			*tmp;
1694	int			i;
1695	int			status = 0;
1696
1697	msp = (manstr_t *)wq->q_ptr;
1698
1699	MAN_DBG(MAN_DATA, ("man_start: msp(0x%p) ether_addr(%s)\n",
1700	    (void *)msp, ether_sprintf(eap)));
1701
1702	if (msp->ms_dests == NULL) {
1703		cmn_err(CE_WARN, "man_start: no destinations");
1704		freemsg(mp);
1705		return (0);
1706	}
1707
1708	/*
1709	 * Optimization if only one valid destination.
1710	 */
1711	mdp = msp->ms_destp;
1712
1713	if (IS_UNICAST(eap)) {
1714		queue_t			*flow_wq = NULL;
1715
1716		if (mdp == NULL) {
1717			/*
1718			 * TDB - This needs to be optimized (some bits in
1719			 * ehp->dhost will act as an index.
1720			 */
1721			for (i = 0; i < MAN_MAX_DESTS; i++) {
1722
1723				mdp = &msp->ms_dests[i];
1724
1725				if ((mdp->md_state == MAN_DSTATE_READY) &&
1726				    (ether_cmp(eap, &mdp->md_dst_eaddr) == 0))
1727					break;
1728				mdp = NULL;
1729			}
1730		} else {
1731			/*
1732			 * 1 to 1 multiplexing, use upper wq for flow control.
1733			 */
1734			flow_wq = wq;
1735		}
1736
1737		if (mdp != NULL) {
1738			/*
1739			 * Its going somewhere specific
1740			 */
1741			status =  man_start_lower(mdp, mp, flow_wq, MAN_UPPER);
1742
1743		} else {
1744			MAN_DBG(MAN_DATA, ("man_start: no destination"
1745			    " for eaddr %s\n", ether_sprintf(eap)));
1746			freemsg(mp);
1747		}
1748	} else {
1749		/*
1750		 * Broadcast or multicast - send everone a copy.
1751		 */
1752		if (mdp == NULL) {
1753			for (i = 0; i < MAN_MAX_DESTS; i++) {
1754				mdp = &msp->ms_dests[i];
1755
1756				if (mdp->md_state != MAN_DSTATE_READY)
1757					continue;
1758
1759				if ((tmp = copymsg(mp)) != NULL) {
1760					(void) man_start_lower(mdp, tmp,
1761					    NULL, MAN_UPPER);
1762				} else {
1763					MAN_DBG(MAN_DATA, ("man_start: copymsg"
1764					    " failed!"));
1765				}
1766			}
1767			freemsg(mp);
1768		} else {
1769			if (mdp->md_state == MAN_DSTATE_READY)
1770				status =  man_start_lower(mdp, mp, wq,
1771				    MAN_UPPER);
1772			else
1773				freemsg(mp);
1774		}
1775	}
1776	return (status);
1777}
1778
1779/*
1780 * Send a DL_UNITDATA or M_DATA fastpath data mblk to a particular
1781 * destination. Others mblk types sent down via * man_dlpi_senddown().
1782 *
1783 * Returns:
1784 *	0	- Data xmitted
1785 *	1	- Data not xmitted due to flow control.
1786 */
1787static int
1788man_start_lower(man_dest_t *mdp, mblk_t *mp, queue_t *flow_wq, int caller)
1789{
1790	queue_t		*wq = mdp->md_wq;
1791	int		status = 0;
1792
1793	/*
1794	 * Lower stream ready for data transmit.
1795	 */
1796	if (mdp->md_state == MAN_DSTATE_READY &&
1797	    mdp->md_dlpistate == DL_IDLE) {
1798
1799		ASSERT(mdp->md_wq != NULL);
1800
1801		if (caller == MAN_UPPER) {
1802			/*
1803			 * Check for flow control conditions for lower
1804			 * stream.
1805			 */
1806			if (mdp->md_dmp_head == NULL &&
1807			    wq->q_first == NULL && canputnext(wq)) {
1808
1809				(void) putnext(wq, mp);
1810
1811			} else {
1812				mutex_enter(&mdp->md_lock);
1813				if (mdp->md_dmp_head != NULL) {
1814					/*
1815					 * A simple flow control mechanism.
1816					 */
1817					if (mdp->md_dmp_count >= MAN_HIWAT) {
1818						freemsg(mp);
1819					} else {
1820						/*
1821						 * Add 'mp' to the deferred
1822						 * msg list.
1823						 */
1824						mdp->md_dmp_tail->b_next = mp;
1825						mdp->md_dmp_tail = mp;
1826						mdp->md_dmp_count +=
1827						    msgsize(mp);
1828					}
1829					mutex_exit(&mdp->md_lock);
1830					/*
1831					 * Inform flow control situation
1832					 * to the caller.
1833					 */
1834					status = 1;
1835					qenable(wq);
1836					goto exit;
1837				}
1838				mutex_exit(&mdp->md_lock);
1839				/*
1840				 * If 1 to 1 mux, use upper write queue for
1841				 * flow control.
1842				 */
1843				if (flow_wq != NULL) {
1844					/*
1845					 * putbq() message and indicate
1846					 * flow control situation to the
1847					 * caller.
1848					 */
1849					(void) putbq(flow_wq, mp);
1850					qenable(flow_wq);
1851					status = 1;
1852					goto exit;
1853				}
1854				/*
1855				 * 1 to many mux, use lower write queue for
1856				 * flow control. Be mindful not to overflow
1857				 * the lower MAN STREAM q.
1858				 */
1859				if (canput(wq)) {
1860					(void) putq(wq, mp);
1861					qenable(wq);
1862				} else {
1863					MAN_DBG(MAN_DATA, ("man_start_lower:"
1864					    " lower q flow controlled -"
1865					    " discarding packet"));
1866					freemsg(mp);
1867					goto exit;
1868				}
1869			}
1870
1871		} else {
1872			/*
1873			 * man_lwsrv  is draining flow controlled mblks.
1874			 */
1875			if (canputnext(wq))
1876				(void) putnext(wq, mp);
1877			else
1878				status = 1;
1879		}
1880		goto exit;
1881	}
1882
1883	/*
1884	 * Lower stream in transition, do flow control.
1885	 */
1886	status = 1;
1887
1888	if (mdp->md_state == MAN_DSTATE_NOTPRESENT) {
1889nodest:
1890		cmn_err(CE_WARN,
1891		    "man_start_lower: no dest for mdp(0x%p), caller(%d)!",
1892		    (void *)mdp, caller);
1893		if (caller == MAN_UPPER)
1894			freemsg(mp);
1895		goto exit;
1896	}
1897
1898	if (mdp->md_state & MAN_DSTATE_CLOSING) {
1899		MAN_DBG(MAN_DATA, ("man_start_lower: mdp(0x%p) closing",
1900		    (void *)mdp));
1901		if (caller == MAN_UPPER)
1902			freemsg(mp);
1903		goto exit;
1904	}
1905
1906	if ((mdp->md_state & MAN_DSTATE_PLUMBING) ||
1907	    (mdp->md_state == MAN_DSTATE_INITIALIZING) ||
1908	    (mdp->md_dlpistate != DL_IDLE)) {
1909		/*
1910		 * Defer until PLUMBED and DL_IDLE. See man_lwsrv().
1911		 */
1912		if (caller == MAN_UPPER) {
1913			/*
1914			 * Upper stream sending data down, add to defered mblk
1915			 * list for stream.
1916			 */
1917			mutex_enter(&mdp->md_lock);
1918			if (mdp->md_dmp_count >= MAN_HIWAT) {
1919				freemsg(mp);
1920			} else {
1921				if (mdp->md_dmp_head == NULL) {
1922					ASSERT(mdp->md_dmp_tail == NULL);
1923					mdp->md_dmp_head = mp;
1924					mdp->md_dmp_tail = mp;
1925				} else {
1926					mdp->md_dmp_tail->b_next = mp;
1927					mdp->md_dmp_tail = mp;
1928				}
1929				mdp->md_dmp_count += msgsize(mp);
1930			}
1931			mutex_exit(&mdp->md_lock);
1932		}
1933
1934		goto exit;
1935	}
1936
1937exit:
1938	return (status);
1939}
1940
1941/*
1942 * man_ioctl - handle ioctl requests for this driver (I_PLINK/I_PUNLINK)
1943 * or pass thru to the physical driver below.  Note that most M_IOCTLs we
1944 * care about come down the control msp, but the IOC ones come down the IP.
1945 * Called with exclusive inner perimeter.
1946 *
1947 *	wq - upper write queue of mxx
1948 *	mp - mblk ptr to DLPI ioctl request
1949 */
1950static void
1951man_ioctl(register queue_t *wq, register mblk_t *mp)
1952{
1953	manstr_t		*msp;
1954	struct iocblk		*iocp;
1955
1956	iocp = (struct iocblk *)mp->b_rptr;
1957	msp = (manstr_t *)wq->q_ptr;
1958
1959#ifdef DEBUG
1960	{
1961		char			ioc_cmd[30];
1962
1963		(void) sprintf(ioc_cmd, "not handled IOCTL 0x%x",
1964		    iocp->ioc_cmd);
1965		MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI),
1966		    ("man_ioctl: wq(0x%p) mp(0x%p) cmd(%s)\n",
1967		    (void *)wq, (void *)mp,
1968		    (iocp->ioc_cmd == I_PLINK) ? "I_PLINK" :
1969		    (iocp->ioc_cmd == I_PUNLINK) ? "I_PUNLINK" :
1970		    (iocp->ioc_cmd == MAN_SETPATH) ? "MAN_SETPATH" :
1971		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
1972		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
1973	}
1974#endif /* DEBUG */
1975
1976
1977	/*
1978	 *  Handle the requests...
1979	 */
1980	switch ((unsigned int)iocp->ioc_cmd) {
1981
1982	case I_PLINK:
1983		man_plink(wq, mp);
1984		break;
1985
1986	case I_PUNLINK:
1987		man_unplink(wq, mp);
1988		break;
1989
1990	case MAN_SETPATH:
1991		man_setpath(wq, mp);
1992		break;
1993
1994	case MAN_GETEADDR:
1995		man_geteaddr(wq, mp);
1996		break;
1997
1998	case MAN_SET_LINKCHECK_TIME:
1999		man_set_linkcheck_time(wq, mp);
2000		break;
2001
2002	case MAN_SET_SC_IPADDRS:
2003		man_set_sc_ipaddrs(wq, mp);
2004		break;
2005
2006	case MAN_SET_SC_IP6ADDRS:
2007		man_set_sc_ip6addrs(wq, mp);
2008		break;
2009
2010	case DLIOCRAW:
2011		if (man_dlioc(msp, mp))
2012			miocnak(wq, mp, 0, ENOMEM);
2013		else {
2014			msp->ms_flags |= MAN_SFLAG_RAW;
2015			miocack(wq, mp, 0, 0);
2016		}
2017		break;
2018
2019	case DL_IOC_HDR_INFO:
2020		man_dl_ioc_hdr_info(wq, mp);
2021		break;
2022
2023	case MAN_ND_GET:
2024	case MAN_ND_SET:
2025		man_nd_getset(wq, mp);
2026		break;
2027
2028	default:
2029		MAN_DBG(MAN_DDI, ("man_ioctl: unknown ioc_cmd %d\n",
2030		    (unsigned int)iocp->ioc_cmd));
2031		miocnak(wq, mp, 0, EINVAL);
2032		break;
2033	}
2034exit:
2035	MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI), ("man_ioctl: exit\n"));
2036
2037}
2038
2039/*
2040 * man_plink: handle I_PLINK requests on the control stream
2041 */
2042void
2043man_plink(queue_t *wq, mblk_t *mp)
2044{
2045	struct linkblk	*linkp;
2046	man_linkrec_t	*lrp;
2047	int		status = 0;
2048
2049	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2050
2051	/*
2052	 * Create a record to hold lower stream info. man_plumb will
2053	 * retrieve it after calling ldi_ioctl(I_PLINK)
2054	 */
2055	lrp = man_kzalloc(sizeof (man_linkrec_t), KM_NOSLEEP);
2056	if (lrp == NULL) {
2057		status = ENOMEM;
2058		goto exit;
2059	}
2060
2061	lrp->l_muxid = linkp->l_index;
2062	lrp->l_wq = linkp->l_qbot;
2063	lrp->l_rq = RD(linkp->l_qbot);
2064
2065	man_linkrec_insert(lrp);
2066
2067exit:
2068	if (status)
2069		miocnak(wq, mp, 0, status);
2070	else
2071		miocack(wq, mp, 0, 0);
2072
2073}
2074
2075/*
2076 * man_unplink - handle I_PUNLINK requests on the control stream
2077 */
2078void
2079man_unplink(queue_t *wq, mblk_t *mp)
2080{
2081	struct linkblk	*linkp;
2082
2083	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2084	RD(linkp->l_qbot)->q_ptr = NULL;
2085	WR(linkp->l_qbot)->q_ptr = NULL;
2086	miocack(wq, mp, 0, 0);
2087}
2088
2089void
2090man_linkrec_insert(man_linkrec_t *lrp)
2091{
2092	mutex_enter(&man_lock);
2093
2094	lrp->l_next = man_linkrec_head;
2095	man_linkrec_head = lrp;
2096
2097	mutex_exit(&man_lock);
2098
2099}
2100
2101static queue_t *
2102man_linkrec_find(int muxid)
2103{
2104	man_linkrec_t	*lpp;
2105	man_linkrec_t	*lp;
2106	queue_t		*wq = NULL;
2107
2108	mutex_enter(&man_lock);
2109
2110	if (man_linkrec_head == NULL)
2111		goto exit;
2112
2113	lp = lpp = man_linkrec_head;
2114	if (lpp->l_muxid == muxid) {
2115		man_linkrec_head = lpp->l_next;
2116	} else {
2117		for (lp = lpp->l_next; lp; lp = lp->l_next) {
2118			if (lp->l_muxid == muxid)
2119				break;
2120			lpp = lp;
2121		}
2122	}
2123
2124	if (lp == NULL)
2125		goto exit;
2126
2127	wq = lp->l_wq;
2128	ASSERT(wq != NULL);
2129
2130	lpp->l_next = lp->l_next;
2131	man_kfree(lp, sizeof (man_linkrec_t));
2132
2133exit:
2134	mutex_exit(&man_lock);
2135
2136	return (wq);
2137}
2138
2139/*
2140 * Set instance linkcheck timer value.
2141 */
2142static void
2143man_set_linkcheck_time(queue_t *wq, mblk_t *mp)
2144{
2145	mi_time_t	*mtp;
2146	int		error;
2147	man_t		*manp;
2148
2149	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: enter"));
2150
2151	error = miocpullup(mp, sizeof (mi_time_t));
2152	if (error != 0)
2153		goto exit;
2154
2155	mtp = (mi_time_t *)mp->b_cont->b_rptr;
2156
2157	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: mtp"));
2158	MAN_DBGCALL(MAN_LINK, man_print_mtp(mtp));
2159
2160	manp = ddi_get_soft_state(man_softstate, mtp->mtp_man_ppa);
2161	if (manp == NULL) {
2162		error = ENODEV;
2163		goto exit;
2164	}
2165
2166	manp->man_linkcheck_time = mtp->mtp_time;
2167exit:
2168	if (error)
2169		miocnak(wq, mp, 0, error);
2170	else
2171		miocack(wq, mp, sizeof (mi_time_t), 0);
2172}
2173
2174/*
2175 * Man path ioctl processing. Should only happen on the SSC. Called
2176 * with exclusive inner perimeter.
2177 */
2178static void
2179man_setpath(queue_t *wq, mblk_t *mp)
2180{
2181	mi_path_t		*mip;
2182	int			error;
2183
2184	error = miocpullup(mp, sizeof (mi_path_t));
2185	if (error != 0)
2186		goto exit;
2187
2188	mip = (mi_path_t *)mp->b_cont->b_rptr;
2189	mutex_enter(&man_lock);
2190	error = man_pg_cmd(mip, NULL);
2191	mutex_exit(&man_lock);
2192
2193exit:
2194	if (error)
2195		miocnak(wq, mp, 0, error);
2196	else
2197		miocack(wq, mp, sizeof (mi_path_t), 0);
2198}
2199
2200/*
2201 * Get the local ethernet address of this machine.
2202 */
2203static void
2204man_geteaddr(queue_t *wq, mblk_t *mp)
2205{
2206	eaddr_t			*eap;
2207	int			error;
2208
2209	error = miocpullup(mp, sizeof (eaddr_t));
2210	if (error != 0) {
2211		miocnak(wq, mp, 0, error);
2212		return;
2213	}
2214
2215	eap = (eaddr_t *)mp->b_cont->b_rptr;
2216	(void) localetheraddr(NULL, eap);
2217	miocack(wq, mp, sizeof (eaddr_t), 0);
2218}
2219
2220/*
2221 * Set my SC and other SC IPv4 addresses for use in man_pinger routine.
2222 */
2223static void
2224man_set_sc_ipaddrs(queue_t *wq, mblk_t *mp)
2225{
2226	int			error;
2227
2228	error = miocpullup(mp, sizeof (man_sc_ipaddrs_t));
2229	if (error != 0)
2230		goto exit;
2231
2232	man_sc_ipaddrs = *(man_sc_ipaddrs_t *)mp->b_cont->b_rptr;
2233
2234#ifdef DEBUG
2235	{
2236		char	buf[INET_ADDRSTRLEN];
2237
2238		(void) inet_ntop(AF_INET,
2239		    (void *) &man_sc_ipaddrs.ip_other_sc_ipaddr,
2240		    buf, INET_ADDRSTRLEN);
2241		MAN_DBG(MAN_CONFIG, ("ip_other_sc_ipaddr = %s", buf));
2242		(void) inet_ntop(AF_INET,
2243		    (void *) &man_sc_ipaddrs.ip_my_sc_ipaddr,
2244		    buf, INET_ADDRSTRLEN);
2245		MAN_DBG(MAN_CONFIG, ("ip_my_sc_ipaddr = %s", buf));
2246	}
2247#endif /* DEBUG */
2248exit:
2249	if (error)
2250		miocnak(wq, mp, 0, error);
2251	else
2252		miocack(wq, mp, sizeof (man_sc_ipaddrs_t), 0);
2253}
2254
2255/*
2256 * Set my SC and other SC IPv6 addresses for use in man_pinger routine.
2257 */
2258static void
2259man_set_sc_ip6addrs(queue_t *wq, mblk_t *mp)
2260{
2261	int			error;
2262
2263	error = miocpullup(mp, sizeof (man_sc_ip6addrs_t));
2264	if (error != 0)
2265		goto exit;
2266
2267	man_sc_ip6addrs = *(man_sc_ip6addrs_t *)mp->b_cont->b_rptr;
2268
2269#ifdef DEBUG
2270	{
2271		char	buf[INET6_ADDRSTRLEN];
2272
2273		(void) inet_ntop(AF_INET6,
2274		    (void *) &man_sc_ip6addrs.ip6_other_sc_ipaddr,
2275		    buf, INET6_ADDRSTRLEN);
2276		MAN_DBG(MAN_CONFIG, ("ip6_other_sc_ipaddr = %s", buf));
2277		(void) inet_ntop(AF_INET6,
2278		    (void *) &man_sc_ip6addrs.ip6_my_sc_ipaddr,
2279		    buf, INET6_ADDRSTRLEN);
2280		MAN_DBG(MAN_CONFIG, ("ip6_my_sc_ipaddr = %s", buf));
2281	}
2282#endif /* DEBUG */
2283exit:
2284	if (error)
2285		miocnak(wq, mp, 0, error);
2286	else
2287		miocack(wq, mp, sizeof (man_sc_ip6addrs_t), 0);
2288}
2289
2290/*
2291 * M_DATA fastpath info request.
2292 */
2293static void
2294man_dl_ioc_hdr_info(queue_t *wq, mblk_t *mp)
2295{
2296	manstr_t		*msp;
2297	man_t			*manp;
2298	mblk_t			*nmp;
2299	man_dladdr_t		*dlap;
2300	dl_unitdata_req_t	*dludp;
2301	struct	ether_header	*headerp;
2302	t_uscalar_t		off, len;
2303	int			status = 0;
2304
2305	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: enter"));
2306
2307	msp = (manstr_t *)wq->q_ptr;
2308	manp = msp->ms_manp;
2309	if (manp == NULL) {
2310		status = EINVAL;
2311		goto exit;
2312	}
2313
2314	status = miocpullup(mp, sizeof (dl_unitdata_req_t) + MAN_ADDRL);
2315	if (status != 0)
2316		goto exit;
2317
2318	/*
2319	 * Sanity check the DL_UNITDATA_REQ destination address
2320	 * offset and length values.
2321	 */
2322	dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
2323	off = dludp->dl_dest_addr_offset;
2324	len = dludp->dl_dest_addr_length;
2325	if (dludp->dl_primitive != DL_UNITDATA_REQ ||
2326	    !MBLKIN(mp->b_cont, off, len) || len != MAN_ADDRL) {
2327		status = EINVAL;
2328		goto exit;
2329	}
2330
2331	dlap = (man_dladdr_t  *)(mp->b_cont->b_rptr + off);
2332
2333	/*
2334	 * Allocate a new mblk to hold the ether header.
2335	 */
2336	if ((nmp = allocb(ETHERHEADER_SIZE, BPRI_MED)) == NULL) {
2337		status = ENOMEM;
2338		goto exit;
2339	}
2340
2341	/* We only need one dl_ioc_hdr mblk for replay */
2342	if (!(msp->ms_flags & MAN_SFLAG_FAST))
2343		status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2344
2345	/* Forward the packet to all lower destinations. */
2346	if ((status != 0) || ((status = man_dlpi_senddown(msp, mp)) != 0)) {
2347		freemsg(nmp);
2348		goto exit;
2349	}
2350
2351	nmp->b_wptr += ETHERHEADER_SIZE;
2352
2353	/*
2354	 * Fill in the ether header.
2355	 */
2356	headerp = (struct ether_header *)nmp->b_rptr;
2357	ether_copy(&dlap->dl_phys, &headerp->ether_dhost);
2358	ether_copy(&manp->man_eaddr, &headerp->ether_shost);
2359	put_ether_type(headerp, dlap->dl_sap);
2360
2361	/*
2362	 * Link new mblk in after the "request" mblks.
2363	 */
2364	linkb(mp, nmp);
2365
2366exit:
2367	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: returns, status = %d",
2368	    status));
2369
2370	if (status) {
2371		miocnak(wq, mp, 0, status);
2372	} else {
2373		msp = (manstr_t *)wq->q_ptr;
2374		msp->ms_flags |= MAN_SFLAG_FAST;
2375		miocack(wq, mp, msgsize(mp->b_cont), 0);
2376	}
2377
2378}
2379
2380/*
2381 * man_uwsrv - Upper write queue service routine to handle deferred
2382 * DLPI messages issued from upstream, the write side of the upper half
2383 * of multiplexor. It is also used by man_bwork to switch the lower
2384 * multiplexor.
2385 *
2386 *	wq - upper write queue of mxx
2387 */
2388static int
2389man_uwsrv(queue_t *wq)
2390{
2391	register mblk_t		*mp;
2392	manstr_t		*msp;		/* per stream data */
2393	man_t			*manp;		/* per instance data */
2394	ehdr_t			*ep;
2395	int			status;
2396
2397	msp = (manstr_t *)wq->q_ptr;
2398
2399	MAN_DBG(MAN_UWSRV, ("man_uwsrv: wq(0x%p) msp", (void *)wq));
2400	MAN_DBGCALL(MAN_UWSRV, man_print_msp(msp));
2401
2402	if (msp == NULL)
2403		goto done;
2404
2405	manp = msp->ms_manp;
2406
2407	while (mp = getq(wq)) {
2408
2409		switch (DB_TYPE(mp)) {
2410		/*
2411		 * Can probably remove this as I never put data messages
2412		 * here.
2413		 */
2414		case M_DATA:
2415			if (manp) {
2416				ep = (ehdr_t *)mp->b_rptr;
2417				status = man_start(wq, mp, &ep->ether_dhost);
2418				if (status) {
2419					/*
2420					 * man_start() indicated flow control
2421					 * situation, stop processing now.
2422					 */
2423					goto break_loop;
2424				}
2425			} else
2426				freemsg(mp);
2427			break;
2428
2429		case M_PROTO:
2430		case M_PCPROTO:
2431			status = man_proto(wq, mp);
2432			if (status) {
2433				/*
2434				 * man_proto() indicated flow control
2435				 * situation detected by man_start(),
2436				 * stop processing now.
2437				 */
2438				goto break_loop;
2439			}
2440			break;
2441
2442		default:
2443			MAN_DBG(MAN_UWSRV, ("man_uwsrv: discarding mp(0x%p)",
2444			    (void *)mp));
2445			freemsg(mp);
2446			break;
2447		}
2448	}
2449
2450break_loop:
2451	/*
2452	 * Check to see if bgthread wants us to do something inside the
2453	 * perimeter.
2454	 */
2455	if ((msp->ms_flags & MAN_SFLAG_CONTROL) &&
2456	    man_iwork_q->q_work != NULL) {
2457
2458		man_iwork();
2459	}
2460
2461done:
2462
2463	MAN_DBG(MAN_UWSRV, ("man_uwsrv: returns"));
2464
2465	return (0);
2466}
2467
2468
2469/*
2470 * man_proto - handle DLPI protocol requests issued from upstream.
2471 * Called by man_uwsrv().  We disassociate upper and lower multiplexor
2472 * DLPI state transitions. The upper stream here (manstr_t) transitions
2473 * appropriately, saves the DLPI requests via man_dlpi(), and then
2474 * arranges for the DLPI request to be sent down via man_dlpi_senddown() if
2475 * appropriate.
2476 *
2477 *	wq - upper write queue of mxx
2478 *	mp - mbl ptr to protocol request
2479 */
2480static int
2481man_proto(queue_t *wq, mblk_t *mp)
2482{
2483	union DL_primitives	*dlp;
2484	int			flow_status = 0;
2485
2486	dlp = (union DL_primitives *)mp->b_rptr;
2487
2488	MAN_DBG((MAN_UWSRV | MAN_DLPI),
2489	    ("man_proto: mp(0x%p) prim(%s)\n", (void *)mp,
2490	    dps[dlp->dl_primitive]));
2491
2492	switch (dlp->dl_primitive) {
2493	case DL_UNITDATA_REQ:
2494		flow_status = man_udreq(wq, mp);
2495		break;
2496
2497	case DL_ATTACH_REQ:
2498		man_areq(wq, mp);
2499		break;
2500
2501	case DL_DETACH_REQ:
2502		man_dreq(wq, mp);
2503		break;
2504
2505	case DL_BIND_REQ:
2506		man_breq(wq, mp);
2507		break;
2508
2509	case DL_UNBIND_REQ:
2510		man_ubreq(wq, mp);
2511		break;
2512
2513	case DL_INFO_REQ:
2514		man_ireq(wq, mp);
2515		break;
2516
2517	case DL_PROMISCON_REQ:
2518		man_ponreq(wq, mp);
2519		break;
2520
2521	case DL_PROMISCOFF_REQ:
2522		man_poffreq(wq, mp);
2523		break;
2524
2525	case DL_ENABMULTI_REQ:
2526		man_emreq(wq, mp);
2527		break;
2528
2529	case DL_DISABMULTI_REQ:
2530		man_dmreq(wq, mp);
2531		break;
2532
2533	case DL_PHYS_ADDR_REQ:
2534		man_pareq(wq, mp);
2535		break;
2536
2537	case DL_SET_PHYS_ADDR_REQ:
2538		man_spareq(wq, mp);
2539		break;
2540
2541	default:
2542		MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: prim(%d)\n",
2543		    dlp->dl_primitive));
2544		dlerrorack(wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
2545		break;
2546
2547	} /* End switch */
2548
2549	MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: exit\n"));
2550	return (flow_status);
2551
2552}
2553
2554static int
2555man_udreq(queue_t *wq, mblk_t *mp)
2556{
2557	manstr_t		*msp;
2558	dl_unitdata_req_t	*dludp;
2559	mblk_t	*nmp;
2560	man_dladdr_t		*dlap;
2561	t_uscalar_t 		off, len;
2562	int 			flow_status = 0;
2563
2564	msp = (manstr_t *)wq->q_ptr;
2565
2566
2567	if (msp->ms_dlpistate != DL_IDLE) {
2568		dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
2569		return (flow_status);
2570	}
2571	dludp = (dl_unitdata_req_t *)mp->b_rptr;
2572	off = dludp->dl_dest_addr_offset;
2573	len = dludp->dl_dest_addr_length;
2574
2575	/*
2576	 * Validate destination address format.
2577	 */
2578	if (!MBLKIN(mp, off, len) || (len != MAN_ADDRL)) {
2579		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADADDR, 0);
2580		return (flow_status);
2581	}
2582
2583	/*
2584	 * Error if no M_DATA follows.
2585	 */
2586	nmp = mp->b_cont;
2587	if (nmp == NULL) {
2588		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADDATA, 0);
2589		return (flow_status);
2590	}
2591
2592	dlap = (man_dladdr_t *)(mp->b_rptr + off);
2593
2594	flow_status = man_start(wq, mp, &dlap->dl_phys);
2595	return (flow_status);
2596}
2597
2598/*
2599 * Handle DL_ATTACH_REQ.
2600 */
2601static void
2602man_areq(queue_t *wq, mblk_t *mp)
2603{
2604	man_t			*manp;	/* per instance data */
2605	manstr_t		*msp;	/* per stream data */
2606	short			ppa;
2607	union DL_primitives	*dlp;
2608	mblk_t			*preq = NULL;
2609	int			did_refcnt = FALSE;
2610	int			dlerror = 0;
2611	int			status = 0;
2612
2613	msp = (manstr_t *)wq->q_ptr;
2614	dlp = (union DL_primitives *)mp->b_rptr;
2615
2616	/*
2617	 * Attach us to MAN PPA (device instance).
2618	 */
2619	if (MBLKL(mp) < DL_ATTACH_REQ_SIZE) {
2620		dlerror = DL_BADPRIM;
2621		goto exit;
2622	}
2623
2624	if (msp->ms_dlpistate != DL_UNATTACHED) {
2625		dlerror = DL_OUTSTATE;
2626		goto exit;
2627	}
2628
2629	ppa = dlp->attach_req.dl_ppa;
2630	if (ppa == -1 || qassociate(wq, ppa) != 0) {
2631		dlerror = DL_BADPPA;
2632		MAN_DBG(MAN_WARN, ("man_areq: bad PPA %d", ppa));
2633		goto exit;
2634	}
2635
2636	mutex_enter(&man_lock);
2637	manp = ddi_get_soft_state(man_softstate, ppa);
2638	ASSERT(manp != NULL);	/* qassociate() succeeded */
2639
2640	manp->man_refcnt++;
2641	did_refcnt = TRUE;
2642	mutex_exit(&man_lock);
2643
2644	/*
2645	 * Create a DL replay list for the lower stream. These wont
2646	 * actually be sent down until the lower streams are made active
2647	 * (sometime after the call to man_init_dests below).
2648	 */
2649	preq = man_alloc_physreq_mp(&manp->man_eaddr);
2650	if (preq == NULL) {
2651		dlerror = DL_SYSERR;
2652		status = ENOMEM;
2653		goto exit;
2654	}
2655
2656	/*
2657	 * Make copy for dlpi resync of upper and lower streams.
2658	 */
2659	if (man_dlpi(msp, mp)) {
2660		dlerror = DL_SYSERR;
2661		status = ENOMEM;
2662		goto exit;
2663	}
2664
2665	/* TBD - need to clean off ATTACH req on failure here. */
2666	if (man_dlpi(msp, preq)) {
2667		dlerror = DL_SYSERR;
2668		status = ENOMEM;
2669		goto exit;
2670	}
2671
2672	/*
2673	 * man_init_dests/man_start_dest needs these set before call.
2674	 */
2675	msp->ms_manp = manp;
2676	msp->ms_meta_ppa = ppa;
2677
2678	/*
2679	 *  Allocate and init lower destination structures.
2680	 */
2681	ASSERT(msp->ms_dests == NULL);
2682	if (man_init_dests(manp, msp)) {
2683		mblk_t	 *tmp;
2684
2685		/*
2686		 * If we cant get the lower streams ready, then
2687		 * remove the messages from the DL replay list and
2688		 * fail attach.
2689		 */
2690		while ((tmp = msp->ms_dl_mp) != NULL) {
2691			msp->ms_dl_mp = msp->ms_dl_mp->b_next;
2692			tmp->b_next = tmp->b_prev = NULL;
2693			freemsg(tmp);
2694		}
2695
2696		msp->ms_manp = NULL;
2697		msp->ms_meta_ppa = -1;
2698
2699		dlerror = DL_SYSERR;
2700		status = ENOMEM;
2701		goto exit;
2702	}
2703
2704	MAN_DBG(MAN_DLPI, ("man_areq: ppa 0x%x man_refcnt: %d\n",
2705	    ppa, manp->man_refcnt));
2706
2707	SETSTATE(msp, DL_UNBOUND);
2708
2709exit:
2710	if (dlerror == 0) {
2711		dlokack(wq, mp, DL_ATTACH_REQ);
2712	} else {
2713		if (did_refcnt) {
2714			mutex_enter(&man_lock);
2715			manp->man_refcnt--;
2716			mutex_exit(&man_lock);
2717		}
2718		dlerrorack(wq, mp, DL_ATTACH_REQ, dlerror, status);
2719		(void) qassociate(wq, -1);
2720	}
2721	if (preq != NULL)
2722		freemsg(preq);
2723
2724}
2725
2726/*
2727 * Called at DL_ATTACH time.
2728 * Man_lock is held to protect pathgroup list(man_pg).
2729 */
2730static int
2731man_init_dests(man_t *manp, manstr_t *msp)
2732{
2733	man_dest_t	*mdp;
2734	man_pg_t	*mpg;
2735	int		i;
2736
2737	mdp = man_kzalloc(MAN_DEST_ARRAY_SIZE, KM_NOSLEEP);
2738	if (mdp == NULL)
2739		return (ENOMEM);
2740
2741	msp->ms_dests = mdp;
2742
2743	mutex_enter(&man_lock);
2744	for (i = 0; i < MAN_MAX_DESTS; i++) {
2745
2746		mdp[i].md_muxid = -1;	/* muxid 0 is valid */
2747		mutex_init(&mdp->md_lock, NULL, MUTEX_DRIVER, NULL);
2748
2749		mpg = man_find_pg_by_id(manp->man_pg, i);
2750
2751		if (mpg && man_find_active_path(mpg->mpg_pathp))
2752			man_start_dest(&mdp[i], msp, mpg);
2753	}
2754	mutex_exit(&man_lock);
2755
2756	return (0);
2757}
2758
2759/*
2760 * Get a destination ready for use.
2761 */
2762static void
2763man_start_dest(man_dest_t *mdp, manstr_t *msp, man_pg_t *mpg)
2764{
2765	man_path_t	*ap;
2766
2767	mdp->md_muxid = -1;
2768	mdp->md_dlpistate = DL_UNATTACHED;
2769	mdp->md_msp = msp;
2770	mdp->md_rq = msp->ms_rq;
2771	mdp->md_pg_id = mpg->mpg_pg_id;
2772
2773	ASSERT(msp->ms_manp);
2774
2775	ether_copy(&msp->ms_manp->man_eaddr, &mdp->md_src_eaddr);
2776	ether_copy(&mpg->mpg_dst_eaddr, &mdp->md_dst_eaddr);
2777
2778	ap = man_find_active_path(mpg->mpg_pathp);
2779	ASSERT(ap);
2780	mdp->md_device = ap->mp_device;
2781
2782	/*
2783	 * Set up linktimers so that first time through, we will do
2784	 * a failover.
2785	 */
2786	mdp->md_linkstate = MAN_LINKFAIL;
2787	mdp->md_state = MAN_DSTATE_INITIALIZING;
2788	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
2789	    (void *)mdp, man_gettimer(MAN_TIMER_INIT, mdp));
2790
2791	/*
2792	 * As an optimization, if there is only one destination,
2793	 * remember the destination pointer. Used by man_start().
2794	 */
2795	man_set_optimized_dest(msp);
2796
2797	MAN_DBG(MAN_DEST, ("man_start_dest: mdp"));
2798	MAN_DBGCALL(MAN_DEST, man_print_mdp(mdp));
2799}
2800
2801static void
2802man_set_optimized_dest(manstr_t *msp)
2803{
2804	int		count = 0;
2805	int		i;
2806	man_dest_t	*mdp = NULL;
2807
2808	for (i = 0; i < MAN_MAX_DESTS; i++) {
2809		if (msp->ms_dests[i].md_msp != NULL) {
2810			count++;
2811			mdp = &msp->ms_dests[i];
2812		}
2813	}
2814
2815	if (count == 1)
2816		msp->ms_destp = mdp;
2817	else
2818		msp->ms_destp = NULL;
2819
2820}
2821
2822/*
2823 * Catch dlpi message for replaying, and arrange to send it down
2824 * to any destinations not PLUMBING. See man_dlpi_replay().
2825 */
2826static int
2827man_dlpi(manstr_t *msp, mblk_t *mp)
2828{
2829	int	status;
2830
2831	status = man_dl_catch(&msp->ms_dl_mp, mp);
2832	if (status == 0)
2833		status = man_dlpi_senddown(msp, mp);
2834
2835	return (status);
2836}
2837
2838/*
2839 * Catch IOCTL type DL_ messages.
2840 */
2841static int
2842man_dlioc(manstr_t *msp, mblk_t *mp)
2843{
2844	int status;
2845
2846	status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2847	if (status == 0)
2848		status = man_dlpi_senddown(msp, mp);
2849
2850	return (status);
2851}
2852
2853/*
2854 * We catch all DLPI messages that we have to resend to a new AP'ed
2855 * device to put him in the right state.  We link these messages together
2856 * w/ their b_next fields and hang it off of msp->ms_dl_mp.  We
2857 * must be careful to restore b_next fields before doing dupmsg/freemsg!
2858 *
2859 *	msp - pointer of stream struct to process
2860 *	mblk - pointer to DLPI request to catch
2861 */
2862static int
2863man_dl_catch(mblk_t **mplist, mblk_t *mp)
2864{
2865	mblk_t			*dupmp;
2866	mblk_t			*tmp;
2867	unsigned		prim;
2868	int			status = 0;
2869
2870	dupmp = copymsg(mp);
2871	if (dupmp == NULL) {
2872		status = ENOMEM;
2873		goto exit;
2874	}
2875
2876
2877	if (*mplist == NULL)
2878		*mplist = dupmp;
2879	else {
2880		for (tmp = *mplist; tmp->b_next; )
2881			tmp = tmp->b_next;
2882
2883		tmp->b_next = dupmp;
2884	}
2885
2886	prim = DL_PRIM(mp);
2887	MAN_DBG(MAN_DLPI,
2888	    ("man_dl_catch: adding %s\n",
2889	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
2890	    (prim == DLIOCRAW) ? "DLIOCRAW" :
2891	    (prim == DL_PROMISCON_REQ) ? promisc[DL_PROMISCON_TYPE(mp)] :
2892	    dps[prim]));
2893
2894exit:
2895
2896	return (status);
2897}
2898
2899/*
2900 * Send down a single DLPI M_[PC]PROTO to all currently valid dests.
2901 *
2902 *	msp - ptr to NDM stream structure DL_ messages was received on.
2903 *	mp - ptr to mblk containing DL_ request.
2904 */
2905static int
2906man_dlpi_senddown(manstr_t *msp, mblk_t *mp)
2907{
2908	man_dest_t	*mdp;
2909	int		i;
2910	mblk_t		*rmp[MAN_MAX_DESTS];	/* Copy to replay */
2911	int		dstate[MAN_MAX_DESTS];
2912	int		no_dests = TRUE;
2913	int		status = 0;
2914
2915	if (msp->ms_dests == NULL)
2916		goto exit;
2917
2918	for (i = 0; i < MAN_MAX_DESTS; i++) {
2919		mdp = &msp->ms_dests[i];
2920		if (mdp->md_state == MAN_DSTATE_READY) {
2921			dstate[i] = TRUE;
2922			no_dests = FALSE;
2923		} else {
2924			dstate[i] = FALSE;
2925		}
2926		rmp[i] = NULL;
2927	}
2928
2929	if (no_dests)
2930		goto exit;
2931
2932	/*
2933	 * Build replay and duplicate list for all possible destinations.
2934	 */
2935	for (i = 0; i < MAN_MAX_DESTS; i++) {
2936		if (dstate[i]) {
2937			rmp[i] = copymsg(mp);
2938			if (rmp[i] == NULL) {
2939				status = ENOMEM;
2940				break;
2941			}
2942		}
2943	}
2944
2945	if (status == 0) {
2946		for (i = 0; i < MAN_MAX_DESTS; i++)
2947			if (dstate[i]) {
2948				mdp = &msp->ms_dests[i];
2949
2950				ASSERT(mdp->md_wq != NULL);
2951				ASSERT(mp->b_next == NULL);
2952				ASSERT(mp->b_prev == NULL);
2953
2954				man_dlpi_replay(mdp, rmp[i]);
2955			}
2956	} else {
2957		for (; i >= 0; i--)
2958			if (dstate[i] && rmp[i])
2959				freemsg(rmp[i]);
2960	}
2961
2962exit:
2963	return (status);
2964}
2965
2966/*
2967 * man_dlpi_replay - traverse the list of DLPI requests and reapply them to
2968 * get the upper and lower streams into the same state. Called holding inner
2969 * perimeter lock exclusive. Note thet we defer M_IOCTL type dlpi messages
2970 * until we get an OK_ACK to our ATTACH (see man_lrsrv and
2971 * man_dlioc_replay).
2972 *
2973 * 	mdp - pointer to lower queue (destination)
2974 *	rmp - list of mblks to send down stream.
2975 */
2976static void
2977man_dlpi_replay(man_dest_t *mdp, mblk_t *rmp)
2978{
2979	mblk_t			*mp;
2980	union DL_primitives	*dlp = NULL;
2981
2982	MAN_DBG(MAN_DLPI, ("man_dlpi_replay: mdp(0x%p)", (void *)mdp));
2983
2984	while (rmp) {
2985		mp = rmp;
2986		rmp = rmp->b_next;
2987		mp->b_prev = mp->b_next = NULL;
2988
2989		dlp = (union DL_primitives *)mp->b_rptr;
2990		MAN_DBG(MAN_DLPI,
2991		    ("man_dlpi_replay: mdp(0x%p) sending %s\n",
2992		    (void *)mdp,
2993		    (dlp->dl_primitive == DL_IOC_HDR_INFO) ?
2994		    "DL_IOC_HDR_INFO" : (dlp->dl_primitive == DLIOCRAW) ?
2995		    "DLIOCRAW" : dps[(unsigned)(dlp->dl_primitive)]));
2996
2997		if (dlp->dl_primitive == DL_ATTACH_REQ) {
2998			/*
2999			 * insert the lower devices ppa.
3000			 */
3001			dlp->attach_req.dl_ppa = mdp->md_device.mdev_ppa;
3002		}
3003
3004		(void) putnext(mdp->md_wq, mp);
3005	}
3006
3007}
3008
3009static void
3010man_dreq(queue_t *wq, mblk_t *mp)
3011{
3012	manstr_t	*msp;	/* per stream data */
3013	man_work_t	*wp;
3014
3015	msp = (manstr_t *)wq->q_ptr;
3016
3017	if (MBLKL(mp) < DL_DETACH_REQ_SIZE) {
3018		dlerrorack(wq, mp, DL_DETACH_REQ, DL_BADPRIM, 0);
3019		return;
3020	}
3021
3022	if (msp->ms_dlpistate != DL_UNBOUND) {
3023		dlerrorack(wq, mp, DL_DETACH_REQ, DL_OUTSTATE, 0);
3024		return;
3025	}
3026
3027	ASSERT(msp->ms_dests != NULL);
3028
3029	wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_NOSLEEP);
3030	if (wp == NULL) {
3031		dlerrorack(wq, mp, DL_DETACH_REQ, DL_SYSERR, ENOMEM);
3032		return;
3033	}
3034	man_dodetach(msp, wp);
3035	(void) qassociate(wq, -1);
3036
3037	SETSTATE(msp, DL_UNATTACHED);
3038
3039	dlokack(wq, mp, DL_DETACH_REQ);
3040}
3041
3042static void
3043man_dl_clean(mblk_t **mplist)
3044{
3045	mblk_t	*tmp;
3046
3047	/*
3048	 * Toss everything.
3049	 */
3050	while ((tmp = *mplist) != NULL) {
3051		*mplist = (*mplist)->b_next;
3052		tmp->b_next = tmp->b_prev = NULL;
3053		freemsg(tmp);
3054	}
3055
3056}
3057
3058/*
3059 * man_dl_release - Remove the corresponding DLPI request from the
3060 * catch list. Walk thru the catch list looking for the other half of
3061 * the pair and delete it.  If we are detaching, delete the entire list.
3062 *
3063 *	msp - pointer of stream struct to process
3064 *	mp  - pointer to mblk to first half of pair.  We will delete other
3065 * 		half of pair based on this.
3066 */
3067static void
3068man_dl_release(mblk_t **mplist, mblk_t *mp)
3069{
3070	uchar_t			match_dbtype;
3071	mblk_t			*tmp;
3072	mblk_t			*tmpp;
3073	int			matched = FALSE;
3074
3075	if (*mplist == NULL)
3076		goto exit;
3077
3078	match_dbtype = DB_TYPE(mp);
3079
3080	/*
3081	 * Currently we only clean DL_ PROTO type messages. There is
3082	 * no way to turn off M_CTL or DL_IOC stuff other than sending
3083	 * down a DL_DETACH, which resets everything.
3084	 */
3085	if (match_dbtype != M_PROTO && match_dbtype != M_PCPROTO) {
3086		goto exit;
3087	}
3088
3089	/*
3090	 * Selectively find a caught mblk that matches this one and
3091	 * remove it from the list
3092	 */
3093	tmp = tmpp = *mplist;
3094	matched = man_match_proto(mp, tmp);
3095	if (matched) {
3096		*mplist = tmp->b_next;
3097		tmp->b_next = tmp->b_prev = NULL;
3098	} else {
3099		for (tmp = tmp->b_next; tmp != NULL; tmp = tmp->b_next) {
3100			if (matched = man_match_proto(mp, tmp))
3101				break;
3102			tmpp = tmp;
3103		}
3104
3105		if (matched) {
3106			tmpp->b_next = tmp->b_next;
3107			tmp->b_next = tmp->b_prev = NULL;
3108		}
3109	}
3110
3111exit:
3112	if (matched) {
3113
3114		MAN_DBG(MAN_DLPI, ("man_dl_release: release %s",
3115		    (DL_PRIM(mp) == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3116		    (DL_PRIM(mp) == DLIOCRAW) ? "DLIOCRAW" :
3117		    dps[(int)DL_PRIM(mp)]));
3118
3119		freemsg(tmp);
3120	}
3121	MAN_DBG(MAN_DLPI, ("man_dl_release: returns"));
3122
3123}
3124
3125/*
3126 * Compare two DL_ messages. If they are complimentary (e.g. DL_UNBIND
3127 * compliments DL_BIND), return true.
3128 */
3129static int
3130man_match_proto(mblk_t *mp1, mblk_t *mp2)
3131{
3132	t_uscalar_t	prim1;
3133	t_uscalar_t	prim2;
3134	int		matched = FALSE;
3135
3136	/*
3137	 * Primitive to clean off list.
3138	 */
3139	prim1 = DL_PRIM(mp1);
3140	prim2 = DL_PRIM(mp2);
3141
3142	switch (prim1) {
3143	case DL_UNBIND_REQ:
3144		if (prim2 == DL_BIND_REQ)
3145			matched = TRUE;
3146		break;
3147
3148	case DL_PROMISCOFF_REQ:
3149		if (prim2 == DL_PROMISCON_REQ) {
3150			dl_promiscoff_req_t	*poff1;
3151			dl_promiscoff_req_t	*poff2;
3152
3153			poff1 = (dl_promiscoff_req_t *)mp1->b_rptr;
3154			poff2 = (dl_promiscoff_req_t *)mp2->b_rptr;
3155
3156			if (poff1->dl_level == poff2->dl_level)
3157				matched = TRUE;
3158		}
3159		break;
3160
3161	case DL_DISABMULTI_REQ:
3162		if (prim2 == DL_ENABMULTI_REQ) {
3163			union DL_primitives	*dlp;
3164			t_uscalar_t		off;
3165			eaddr_t			*addrp1;
3166			eaddr_t			*addrp2;
3167
3168			dlp = (union DL_primitives *)mp1->b_rptr;
3169			off = dlp->disabmulti_req.dl_addr_offset;
3170			addrp1 = (eaddr_t *)(mp1->b_rptr + off);
3171
3172			dlp = (union DL_primitives *)mp2->b_rptr;
3173			off = dlp->disabmulti_req.dl_addr_offset;
3174			addrp2 = (eaddr_t *)(mp2->b_rptr + off);
3175
3176			if (ether_cmp(addrp1, addrp2) == 0)
3177				matched = 1;
3178		}
3179		break;
3180
3181	default:
3182		break;
3183	}
3184
3185	MAN_DBG(MAN_DLPI, ("man_match_proto returns %d", matched));
3186
3187	return (matched);
3188}
3189
3190/*
3191 * Bind upper stream to a particular SAP. Called with exclusive innerperim
3192 * QPAIR, shared outerperim.
3193 */
3194static void
3195man_breq(queue_t *wq, mblk_t *mp)
3196{
3197	man_t			*manp;	/* per instance data */
3198	manstr_t		*msp;	/* per stream data */
3199	union DL_primitives	*dlp;
3200	man_dladdr_t		man_addr;
3201	t_uscalar_t		sap;
3202	t_uscalar_t		xidtest;
3203
3204	msp = (manstr_t *)wq->q_ptr;
3205
3206	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
3207		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADPRIM, 0);
3208		return;
3209	}
3210
3211	if (msp->ms_dlpistate != DL_UNBOUND) {
3212		dlerrorack(wq, mp, DL_BIND_REQ, DL_OUTSTATE, 0);
3213		return;
3214	}
3215
3216	dlp = (union DL_primitives *)mp->b_rptr;
3217	manp = msp->ms_manp;			/* valid after attach */
3218	sap = dlp->bind_req.dl_sap;
3219	xidtest = dlp->bind_req.dl_xidtest_flg;
3220
3221	ASSERT(manp);
3222
3223	if (xidtest) {
3224		dlerrorack(wq, mp, DL_BIND_REQ, DL_NOAUTO, 0);
3225		return;
3226	}
3227
3228	if (sap > ETHERTYPE_MAX) {
3229		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADSAP, 0);
3230		return;
3231	}
3232
3233	if (man_dlpi(msp, mp)) {
3234		dlerrorack(wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
3235		return;
3236	}
3237
3238	msp->ms_sap = sap;
3239
3240	SETSTATE(msp, DL_IDLE);
3241
3242	man_addr.dl_sap = msp->ms_sap;
3243	ether_copy(&msp->ms_manp->man_eaddr, &man_addr.dl_phys);
3244
3245	dlbindack(wq, mp, msp->ms_sap, &man_addr, MAN_ADDRL, 0, 0);
3246
3247}
3248
3249static void
3250man_ubreq(queue_t *wq, mblk_t *mp)
3251{
3252	manstr_t		*msp;	/* per stream data */
3253
3254	msp = (manstr_t *)wq->q_ptr;
3255
3256	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
3257		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
3258		return;
3259	}
3260
3261	if (msp->ms_dlpistate != DL_IDLE) {
3262		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
3263		return;
3264	}
3265
3266	if (man_dlpi_senddown(msp, mp)) {
3267		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
3268		return;
3269	}
3270
3271	man_dl_release(&msp->ms_dl_mp, mp);
3272
3273	SETSTATE(msp, DL_UNBOUND);
3274
3275	dlokack(wq, mp, DL_UNBIND_REQ);
3276
3277}
3278
3279static void
3280man_ireq(queue_t *wq, mblk_t *mp)
3281{
3282	manstr_t	*msp;
3283	dl_info_ack_t	*dlip;
3284	man_dladdr_t	*dlap;
3285	eaddr_t		*ep;
3286	size_t	size;
3287
3288	msp = (manstr_t *)wq->q_ptr;
3289
3290	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
3291		dlerrorack(wq, mp, DL_INFO_REQ, DL_BADPRIM, 0);
3292		return;
3293	}
3294
3295	/* Exchange current msg for a DL_INFO_ACK. */
3296	size = sizeof (dl_info_ack_t) + MAN_ADDRL + ETHERADDRL;
3297	mp = mexchange(wq, mp, size, M_PCPROTO, DL_INFO_ACK);
3298	if (mp == NULL) {
3299		MAN_DBG(MAN_DLPI, ("man_ireq: man_ireq: mp == NULL."));
3300		return;
3301	}
3302
3303	/* Fill in the DL_INFO_ACK fields and reply. */
3304	dlip = (dl_info_ack_t *)mp->b_rptr;
3305	*dlip = man_infoack;
3306	dlip->dl_current_state = msp->ms_dlpistate;
3307	dlap = (man_dladdr_t *)(mp->b_rptr + dlip->dl_addr_offset);
3308	dlap->dl_sap = msp->ms_sap;
3309
3310	/*
3311	 * If attached, return physical address.
3312	 */
3313	if (msp->ms_manp != NULL) {
3314		ether_copy(&msp->ms_manp->man_eaddr, &dlap->dl_phys);
3315	} else {
3316		bzero((caddr_t)&dlap->dl_phys, ETHERADDRL);
3317	}
3318
3319	ep = (struct ether_addr *)(mp->b_rptr + dlip->dl_brdcst_addr_offset);
3320	ether_copy(&etherbroadcast, ep);
3321
3322	qreply(wq, mp);
3323
3324}
3325
3326
3327static void
3328man_ponreq(queue_t *wq, mblk_t *mp)
3329{
3330	manstr_t	*msp;
3331	int		flag;
3332
3333	msp = (manstr_t *)wq->q_ptr;
3334
3335	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
3336		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
3337		return;
3338	}
3339
3340	switch (((dl_promiscon_req_t *)mp->b_rptr)->dl_level) {
3341	case DL_PROMISC_PHYS:
3342		flag = MAN_SFLAG_ALLPHYS;
3343		break;
3344
3345	case DL_PROMISC_SAP:
3346		flag = MAN_SFLAG_ALLSAP;
3347		break;
3348
3349	case DL_PROMISC_MULTI:
3350		flag = MAN_SFLAG_ALLMULTI;
3351		break;
3352
3353	default:
3354		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_NOTSUPPORTED, 0);
3355		return;
3356	}
3357
3358	/*
3359	 * Catch request for replay, and forward down to any lower
3360	 * lower stream.
3361	 */
3362	if (man_dlpi(msp, mp)) {
3363		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_SYSERR, ENOMEM);
3364		return;
3365	}
3366
3367	msp->ms_flags |= flag;
3368
3369	dlokack(wq, mp, DL_PROMISCON_REQ);
3370
3371}
3372
3373static void
3374man_poffreq(queue_t *wq, mblk_t *mp)
3375{
3376	manstr_t		*msp;
3377	int			flag;
3378
3379	msp = (manstr_t *)wq->q_ptr;
3380
3381	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
3382		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
3383		return;
3384	}
3385
3386	switch (((dl_promiscoff_req_t *)mp->b_rptr)->dl_level) {
3387	case DL_PROMISC_PHYS:
3388		flag = MAN_SFLAG_ALLPHYS;
3389		break;
3390
3391	case DL_PROMISC_SAP:
3392		flag = MAN_SFLAG_ALLSAP;
3393		break;
3394
3395	case DL_PROMISC_MULTI:
3396		flag = MAN_SFLAG_ALLMULTI;
3397		break;
3398
3399	default:
3400		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTSUPPORTED, 0);
3401		return;
3402	}
3403
3404	if ((msp->ms_flags & flag) == 0) {
3405		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
3406		return;
3407	}
3408
3409	if (man_dlpi_senddown(msp, mp)) {
3410		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_SYSERR, ENOMEM);
3411		return;
3412	}
3413
3414	man_dl_release(&msp->ms_dl_mp, mp);
3415
3416	msp->ms_flags &= ~flag;
3417
3418	dlokack(wq, mp, DL_PROMISCOFF_REQ);
3419
3420}
3421
3422/*
3423 * Enable multicast requests. We might need to track addresses instead of
3424 * just passing things through (see eri_dmreq) - TBD.
3425 */
3426static void
3427man_emreq(queue_t *wq, mblk_t *mp)
3428{
3429	manstr_t		*msp;
3430	union DL_primitives	*dlp;
3431	eaddr_t			*addrp;
3432	t_uscalar_t		off;
3433	t_uscalar_t		len;
3434
3435	msp = (manstr_t *)wq->q_ptr;
3436
3437	if (MBLKL(mp) < DL_ENABMULTI_REQ_SIZE) {
3438		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADPRIM, 0);
3439		return;
3440	}
3441
3442	if (msp->ms_dlpistate == DL_UNATTACHED) {
3443		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3444		return;
3445	}
3446
3447	dlp = (union DL_primitives *)mp->b_rptr;
3448	len = dlp->enabmulti_req.dl_addr_length;
3449	off = dlp->enabmulti_req.dl_addr_offset;
3450	addrp = (struct ether_addr *)(mp->b_rptr + off);
3451
3452	if ((len != ETHERADDRL) ||
3453	    !MBLKIN(mp, off, len) ||
3454	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3455		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3456		return;
3457	}
3458
3459	/*
3460	 * Catch request for replay, and forward down to any lower
3461	 * lower stream.
3462	 */
3463	if (man_dlpi(msp, mp)) {
3464		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3465		return;
3466	}
3467
3468	dlokack(wq, mp, DL_ENABMULTI_REQ);
3469
3470}
3471
3472static void
3473man_dmreq(queue_t *wq, mblk_t *mp)
3474{
3475	manstr_t		*msp;
3476	union DL_primitives	*dlp;
3477	eaddr_t			*addrp;
3478	t_uscalar_t		off;
3479	t_uscalar_t		len;
3480
3481	msp = (manstr_t *)wq->q_ptr;
3482
3483	if (MBLKL(mp) < DL_DISABMULTI_REQ_SIZE) {
3484		dlerrorack(wq, mp, DL_DISABMULTI_REQ, DL_BADPRIM, 0);
3485		return;
3486	}
3487
3488	if (msp->ms_dlpistate == DL_UNATTACHED) {
3489		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3490		return;
3491	}
3492
3493	dlp = (union DL_primitives *)mp->b_rptr;
3494	len = dlp->enabmulti_req.dl_addr_length;
3495	off = dlp->enabmulti_req.dl_addr_offset;
3496	addrp = (struct ether_addr *)(mp->b_rptr + off);
3497
3498	if ((len != ETHERADDRL) ||
3499	    !MBLKIN(mp, off, len) ||
3500	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3501		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3502		return;
3503	}
3504
3505	if (man_dlpi_senddown(msp, mp)) {
3506		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3507		return;
3508	}
3509
3510	man_dl_release(&msp->ms_dl_mp, mp);
3511
3512	dlokack(wq, mp, DL_DISABMULTI_REQ);
3513
3514}
3515
3516static void
3517man_pareq(queue_t *wq, mblk_t *mp)
3518{
3519	manstr_t		*msp;
3520	union	DL_primitives	*dlp;
3521	uint32_t		type;
3522	struct	ether_addr	addr;
3523
3524	msp = (manstr_t *)wq->q_ptr;
3525
3526	if (MBLKL(mp) < DL_PHYS_ADDR_REQ_SIZE) {
3527		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3528		return;
3529	}
3530
3531	dlp = (union DL_primitives *)mp->b_rptr;
3532	type = dlp->physaddr_req.dl_addr_type;
3533	if (msp->ms_manp == NULL) {
3534		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3535		return;
3536	}
3537
3538	switch (type) {
3539	case	DL_FACT_PHYS_ADDR:
3540		(void) localetheraddr((struct ether_addr *)NULL, &addr);
3541		break;
3542
3543	case	DL_CURR_PHYS_ADDR:
3544		ether_bcopy(&msp->ms_manp->man_eaddr, &addr);
3545		break;
3546
3547	default:
3548		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_NOTSUPPORTED, 0);
3549		return;
3550	}
3551
3552	dlphysaddrack(wq, mp, &addr, ETHERADDRL);
3553}
3554
3555/*
3556 * TBD - this routine probably should be protected w/ an ndd
3557 * tuneable, or a man.conf parameter.
3558 */
3559static void
3560man_spareq(queue_t *wq, mblk_t *mp)
3561{
3562	manstr_t		*msp;
3563	union DL_primitives	*dlp;
3564	t_uscalar_t		off;
3565	t_uscalar_t		len;
3566	eaddr_t			*addrp;
3567
3568	msp = (manstr_t *)wq->q_ptr;
3569
3570	if (MBLKL(mp) < DL_SET_PHYS_ADDR_REQ_SIZE) {
3571		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3572		return;
3573	}
3574
3575	dlp = (union DL_primitives *)mp->b_rptr;
3576	len = dlp->set_physaddr_req.dl_addr_length;
3577	off = dlp->set_physaddr_req.dl_addr_offset;
3578
3579	if (!MBLKIN(mp, off, len)) {
3580		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3581		return;
3582	}
3583
3584	addrp = (struct ether_addr *)(mp->b_rptr + off);
3585
3586	/*
3587	 * Error if length of address isn't right or the address
3588	 * specified is a multicast or broadcast address.
3589	 */
3590	if ((len != ETHERADDRL) ||
3591	    ((addrp->ether_addr_octet[0] & 01) == 1) ||
3592	    (ether_cmp(addrp, &etherbroadcast) == 0)) {
3593		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADADDR, 0);
3594		return;
3595	}
3596	/*
3597	 * Error if this stream is not attached to a device.
3598	 */
3599	if (msp->ms_manp == NULL) {
3600		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3601		return;
3602	}
3603
3604	/*
3605	 * We will also resend DL_SET_PHYS_ADDR_REQ for each dest
3606	 * when it is linked under us.
3607	 */
3608	if (man_dlpi_senddown(msp, mp)) {
3609		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_SYSERR, ENOMEM);
3610		return;
3611	}
3612
3613	ether_copy(addrp, msp->ms_manp->man_eaddr.ether_addr_octet);
3614
3615	MAN_DBG(MAN_DLPI, ("man_sareq: snagged %s\n",
3616	    ether_sprintf(&msp->ms_manp->man_eaddr)));
3617
3618	dlokack(wq, mp, DL_SET_PHYS_ADDR_REQ);
3619
3620}
3621
3622/*
3623 * These routines make up the lower part of the MAN streams framework.
3624 */
3625
3626/*
3627 * man_lwsrv - Deferred mblks for down stream. We end up here when
3628 * the destination is not DL_IDLE when traffic comes downstream.
3629 *
3630 *	wq - lower write queue of mxx
3631 */
3632static int
3633man_lwsrv(queue_t *wq)
3634{
3635	mblk_t		*mp;
3636	mblk_t		*mlistp;
3637	man_dest_t	*mdp;
3638	size_t		count;
3639
3640	mdp = (man_dest_t *)wq->q_ptr;
3641
3642	MAN_DBG(MAN_LWSRV, ("man_lwsrv: wq(0x%p) mdp(0x%p)"
3643	    " md_rq(0x%p)\n", (void *)wq, (void *)mdp,
3644	    mdp ? (void *)mdp->md_rq : NULL));
3645
3646	if (mdp == NULL)
3647		goto exit;
3648
3649	if (mdp->md_state & MAN_DSTATE_CLOSING) {
3650			flushq(wq, FLUSHDATA);
3651			flushq(RD(wq), FLUSHDATA);
3652			goto exit;
3653	}
3654
3655	/*
3656	 * Arrange to send deferred mp's first, then mblks on the
3657	 * service queue. Since we are exclusive in the inner perimeter,
3658	 * we dont have to worry about md_lock, like the put procedures,
3659	 * which are MTPUTSHARED.
3660	 */
3661	mutex_enter(&mdp->md_lock);
3662	mlistp = mdp->md_dmp_head;
3663	mdp->md_dmp_head = NULL;
3664	count = mdp->md_dmp_count;
3665	mdp->md_dmp_count = 0;
3666	mutex_exit(&mdp->md_lock);
3667
3668	while (mlistp != NULL) {
3669		mp = mlistp;
3670		mlistp = mp->b_next;
3671		mp->b_next = NULL;
3672		count -= msgsize(mp);
3673		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3674
3675			mutex_enter(&mdp->md_lock);
3676			mdp->md_dmp_count += count + msgsize(mp);
3677			mp->b_next = mlistp;
3678			mdp->md_dmp_head = mp;
3679			mutex_exit(&mdp->md_lock);
3680			goto exit;
3681		}
3682	}
3683	mdp->md_dmp_tail = NULL;
3684
3685	while (mp = getq(wq)) {
3686		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3687			/*
3688			 * Put it back on queue, making sure to avoid
3689			 * infinite loop mentioned in putbq(9F)
3690			 */
3691			noenable(wq);
3692			(void) putbq(wq, mp);
3693			enableok(wq);
3694
3695			break;
3696		}
3697	}
3698
3699exit:
3700
3701	return (0);
3702}
3703
3704/*
3705 * man_lrput - handle DLPI messages issued from downstream.
3706 *
3707 *	rq - lower read queue of mxx
3708 *	mp - mblk ptr to DLPI request
3709 *
3710 *	returns 0
3711 */
3712static int
3713man_lrput(queue_t *rq, mblk_t *mp)
3714{
3715	man_dest_t	*mdp;
3716	manstr_t	*msp;
3717
3718#if defined(DEBUG)
3719	union DL_primitives	*dlp;
3720	t_uscalar_t		prim = MAN_DLPI_MAX_PRIM + 1;
3721	char			*prim_str;
3722#endif  /* DEBUG */
3723
3724	mdp = (man_dest_t *)rq->q_ptr;
3725
3726#if defined(DEBUG)
3727	if (DB_TYPE(mp) == M_PROTO) {
3728		dlp = (union DL_primitives *)mp->b_rptr;
3729		prim = dlp->dl_primitive;
3730	}
3731
3732	prim_str = (prim > MAN_DLPI_MAX_PRIM) ? "NON DLPI" :
3733	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3734	    (prim == DLIOCRAW) ? "DLIOCRAW" :
3735	    dps[(unsigned int)prim];
3736	MAN_DBG(MAN_LRPUT, ("man_lrput: rq(0x%p) mp(0x%p) mdp(0x%p)"
3737	    " db_type(0x%x) dl_prim %s", (void *)rq,
3738	    (void *)mp, (void *)mdp, DB_TYPE(mp), prim_str));
3739	MAN_DBGCALL(MAN_LRPUT2, man_print_mdp(mdp));
3740#endif  /* DEBUG */
3741
3742	if (DB_TYPE(mp) == M_FLUSH) {
3743		/* Turn around */
3744		if (*mp->b_rptr & FLUSHW) {
3745			*mp->b_rptr &= ~FLUSHR;
3746			qreply(rq, mp);
3747		} else
3748			freemsg(mp);
3749		return (0);
3750	}
3751
3752	if (mdp == NULL || mdp->md_state != MAN_DSTATE_READY) {
3753
3754		MAN_DBG(MAN_LRPUT, ("man_lrput: not ready mdp(0x%p),"
3755		    " state(%d)", (void *)mdp, mdp ? mdp->md_state : -1));
3756		freemsg(mp);
3757		return (0);
3758	}
3759
3760	/*
3761	 * If we have a destination in the right state, forward on datagrams.
3762	 */
3763	if (MAN_IS_DATA(mp)) {
3764		if (mdp->md_dlpistate == DL_IDLE && canputnext(mdp->md_rq)) {
3765
3766			msp = mdp->md_msp;
3767			if (!(msp->ms_flags & MAN_SFLAG_PROMISC))
3768				mdp->md_rcvcnt++; /* Count for failover */
3769			/*
3770			 * go put mblk_t directly up to next queue.
3771			 */
3772			MAN_DBG(MAN_LRPUT, ("man_lrput: putnext to rq(0x%p)",
3773			    (void *)mdp->md_rq));
3774			(void) putnext(mdp->md_rq, mp);
3775		} else {
3776			freemsg(mp);
3777		}
3778	} else {
3779		/*
3780		 * Handle in man_lrsrv with exclusive inner perimeter lock.
3781		 */
3782		(void) putq(rq, mp);
3783	}
3784
3785	return (0);
3786}
3787
3788/*
3789 * Either this is a response from our attempt to sync the upper and lower
3790 * stream states, or its data. If its not data. Do DL_* response processing
3791 * and transition md_dlpistate accordingly. If its data, toss it.
3792 */
3793static int
3794man_lrsrv(queue_t *rq)
3795{
3796	man_dest_t		*mdp;
3797	mblk_t			*mp;
3798	union DL_primitives	*dlp;
3799	ulong_t			prim;
3800	ulong_t			cprim;
3801	int			need_dl_reset = FALSE;
3802
3803#if defined(DEBUG)
3804		struct iocblk	*iocp;
3805		char		ioc_cmd[256];
3806#endif  /* DEBUG */
3807
3808	MAN_DBG(MAN_LRSRV, ("man_lrsrv: rq(0x%p)", (void *)rq));
3809
3810	mdp = (man_dest_t *)rq->q_ptr;
3811
3812	if ((mdp == NULL) || (mdp->md_state & MAN_DSTATE_CLOSING)) {
3813			flushq(rq, FLUSHDATA);
3814			flushq(WR(rq), FLUSHDATA);
3815			goto exit;
3816	}
3817
3818	while (mp = getq(rq)) {
3819
3820
3821	/*
3822	 * If we're not connected, or its a datagram, toss it.
3823	 */
3824	if (MAN_IS_DATA(mp) || mdp->md_state != MAN_DSTATE_READY) {
3825
3826		MAN_DBG(MAN_LRSRV, ("man_lrsrv: dropping mblk mdp(0x%p)"
3827		    " is_data(%d)", (void *)mdp, MAN_IS_DATA(mp)));
3828		freemsg(mp);
3829		continue;
3830	}
3831
3832	/*
3833	 * Should be response to man_dlpi_replay. Discard unless there
3834	 * is a failure we care about.
3835	 */
3836
3837	switch (DB_TYPE(mp)) {
3838	case M_PROTO:
3839	case M_PCPROTO:
3840		/* Do proto processing below. */
3841		break;
3842
3843	case M_IOCNAK:
3844		/*
3845		 * DL_IOC* failed for some reason.
3846		 */
3847		need_dl_reset = TRUE;
3848
3849#if defined(DEBUG)
3850		iocp = (struct iocblk *)mp->b_rptr;
3851
3852		(void) sprintf(ioc_cmd, "0x%x", iocp->ioc_cmd);
3853		MAN_DBG(MAN_LRSRV, ("man_lrsrv: M_IOCNAK err %d for cmd(%s)\n",
3854		    iocp->ioc_error,
3855		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3856		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
3857#endif  /* DEBUG */
3858
3859		/* FALLTHRU */
3860
3861	case M_IOCACK:
3862	case M_CTL:
3863		/*
3864		 * OK response from DL_IOC*, ignore.
3865		 */
3866		goto dl_reset;
3867	}
3868
3869	dlp = (union DL_primitives *)mp->b_rptr;
3870	prim = dlp->dl_primitive;
3871
3872	MAN_DBG(MAN_LRSRV, ("man_lrsrv: prim %s", dps[(int)prim]));
3873
3874	/*
3875	 * DLPI state processing big theory: We do not rigorously check
3876	 * DLPI states (e.g. PENDING stuff). Simple rules:
3877	 *
3878	 * 	1) If we see an OK_ACK to an ATTACH_REQ, dlpistate = DL_UNBOUND.
3879	 *	2) If we see an BIND_ACK to a BIND_REQ, dlpistate = DL_IDLE.
3880	 *	3) If we see a OK_ACK response to an UNBIND_REQ
3881	 *	   dlpistate = DL_UNBOUND.
3882	 *	4) If we see a OK_ACK response to a DETACH_REQ,
3883	 *	   dlpistate = DL_UNATTACHED.
3884	 *
3885	 * Everything that isn't handle by 1-4 above is handled by 5)
3886	 *
3887	 *	5) A NAK to any DL_* messages we care about causes
3888	 *	   dlpistate = DL_UNATTACHED and man_reset_dlpi to run
3889	 *
3890	 * TBD - need a reset counter so we can try a switch if it gets
3891	 * too high.
3892	 */
3893
3894	switch (prim) {
3895	case DL_OK_ACK:
3896		cprim = dlp->ok_ack.dl_correct_primitive;
3897
3898		switch (cprim) {
3899		case DL_ATTACH_REQ:
3900			if (man_dlioc_replay(mdp)) {
3901				D_SETSTATE(mdp, DL_UNBOUND);
3902			} else {
3903				need_dl_reset = TRUE;
3904				break;
3905			}
3906			break;
3907
3908		case DL_DETACH_REQ:
3909			D_SETSTATE(mdp, DL_UNATTACHED);
3910			break;
3911
3912		case DL_UNBIND_REQ:
3913			/*
3914			 * Cancel timer and set md_dlpistate.
3915			 */
3916			D_SETSTATE(mdp, DL_UNBOUND);
3917
3918			ASSERT(mdp->md_bc_id == 0);
3919			if (mdp->md_lc_timer_id != 0) {
3920				(void) quntimeout(man_ctl_wq,
3921				    mdp->md_lc_timer_id);
3922				mdp->md_lc_timer_id = 0;
3923			}
3924		}
3925		MAN_DBG(MAN_DLPI,
3926		    ("		cprim %s", dps[(int)cprim]));
3927		break;
3928
3929	case DL_BIND_ACK:
3930		/*
3931		 * We're ready for data. Get man_lwsrv to run to
3932		 * process any defered data and start linkcheck timer.
3933		 */
3934		D_SETSTATE(mdp, DL_IDLE);
3935		qenable(mdp->md_wq);
3936		mdp->md_linkstate = MAN_LINKGOOD;
3937		if (man_needs_linkcheck(mdp)) {
3938			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
3939			    man_linkcheck_timer, (void *)mdp,
3940			    man_gettimer(MAN_TIMER_LINKCHECK, mdp));
3941		}
3942
3943		break;
3944
3945	case DL_ERROR_ACK:
3946		cprim = dlp->error_ack.dl_error_primitive;
3947		switch (cprim) {
3948		case DL_ATTACH_REQ:
3949		case DL_BIND_REQ:
3950		case DL_DISABMULTI_REQ:
3951		case DL_ENABMULTI_REQ:
3952		case DL_PROMISCON_REQ:
3953		case DL_PROMISCOFF_REQ:
3954		case DL_SET_PHYS_ADDR_REQ:
3955			need_dl_reset = TRUE;
3956			break;
3957
3958		/*
3959		 * ignore error TBD (better comment)
3960		 */
3961		case DL_UNBIND_REQ:
3962		case DL_DETACH_REQ:
3963			break;
3964		}
3965
3966		MAN_DBG(MAN_DLPI,
3967		    ("\tdl_errno %d dl_unix_errno %d cprim %s",
3968		    dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno,
3969		    dps[(int)cprim]));
3970		break;
3971
3972	case DL_UDERROR_IND:
3973		MAN_DBG(MAN_DLPI,
3974		    ("\tdl_errno %d unix_errno %d",
3975		    dlp->uderror_ind.dl_errno,
3976		    dlp->uderror_ind.dl_unix_errno));
3977		break;
3978
3979	case DL_INFO_ACK:
3980		break;
3981
3982	default:
3983		/*
3984		 * We should not get here.
3985		 */
3986		cmn_err(CE_WARN, "man_lrsrv: unexpected DL prim 0x%lx!",
3987		    prim);
3988		need_dl_reset = TRUE;
3989		break;
3990	}
3991
3992dl_reset:
3993	freemsg(mp);
3994
3995	if (need_dl_reset) {
3996		man_pg_t	*mpg;
3997		man_path_t	*mp;
3998
3999		if (qsize(rq)) {	/* Dump all messages. */
4000			flushq(rq, FLUSHDATA);
4001			flushq(WR(rq), FLUSHDATA);
4002		}
4003
4004		mdp->md_dlpierrors++;
4005		D_SETSTATE(mdp, DL_UNATTACHED);
4006		if (mdp->md_lc_timer_id != 0) {
4007			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4008			mdp->md_lc_timer_id = 0;
4009		}
4010
4011		mutex_enter(&man_lock);
4012		ASSERT(mdp->md_msp != NULL);
4013		ASSERT(mdp->md_msp->ms_manp != NULL);
4014		mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg,
4015		    mdp->md_pg_id);
4016		ASSERT(mpg != NULL);
4017		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4018		    mdp->md_device.mdev_ppa);
4019		ASSERT(mp != NULL);
4020		mp->mp_device.mdev_state |= MDEV_FAILED;
4021		if ((mdp->md_dlpierrors >= MAN_MAX_DLPIERRORS) &&
4022		    (man_is_on_domain ||
4023		    mdp->md_msp->ms_manp->man_meta_ppa == 1)) {
4024			/*
4025			 * Autoswitching is disabled for instance 0
4026			 * on the SC as we expect the domain to
4027			 * initiate the path switching.
4028			 */
4029			(void) man_do_autoswitch((man_dest_t *)mdp);
4030			MAN_DBG(MAN_WARN, ("man_lrsrv: dlpi failure(%d,%d),"
4031			    " switching path", mdp->md_device.mdev_major,
4032			    mdp->md_device.mdev_ppa));
4033		} else {
4034			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
4035			    man_reset_dlpi, (void *)mdp,
4036			    man_gettimer(MAN_TIMER_DLPIRESET, mdp));
4037		}
4038		mutex_exit(&man_lock);
4039	}
4040
4041
4042	} /* End while (getq()) */
4043
4044exit:
4045	MAN_DBG(MAN_DLPI, ("man_lrsrv: returns"));
4046
4047	return (0);
4048}
4049
4050static int
4051man_needs_linkcheck(man_dest_t *mdp)
4052{
4053	/*
4054	 * Not ready for linkcheck.
4055	 */
4056	if (mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
4057		return (0);
4058
4059	/*
4060	 * Linkchecking needs to be done on IP streams. For domain, all
4061	 * driver instances need checking, for SC only instance 1 needs it.
4062	 */
4063	if ((man_is_on_domain || mdp->md_msp->ms_manp->man_meta_ppa == 1) &&
4064	    (mdp->md_msp->ms_sap == ETHERTYPE_IP ||
4065	    mdp->md_msp->ms_sap == ETHERTYPE_IPV6))
4066
4067		return (1);
4068
4069	/*
4070	 * Linkcheck not need on this link.
4071	 */
4072	return (0);
4073}
4074
4075/*
4076 * The following routines process work requests posted to man_iwork_q
4077 * from the non-STREAMS half of the driver (see man_bwork.c). The work
4078 * requires access to the inner perimeter lock of the driver. This
4079 * lock is acquired by man_uwsrv, who calls man_iwork to process the
4080 * man_iwork_q->
4081 */
4082
4083/*
4084 * The man_bwork has posted some work for us to do inside the
4085 * perimeter. This mainly involves updating lower multiplexor data
4086 * structures (non-blocking type stuff). So, we can hold the man_lock
4087 * until we are done processing all work items. Note that some of these
4088 * routines in turn submit work back to the bgthread, which they can do
4089 * since we hold the man_lock.
4090 */
4091static void
4092man_iwork()
4093{
4094	man_work_t	*wp;
4095	int		wp_finished;
4096
4097	MAN_DBG(MAN_SWITCH, ("man_iwork: q_work(0x%p)",
4098	    (void *)man_iwork_q->q_work));
4099
4100	mutex_enter(&man_lock);
4101
4102	while (man_iwork_q->q_work) {
4103
4104		wp = man_iwork_q->q_work;
4105		man_iwork_q->q_work = wp->mw_next;
4106		wp->mw_next = NULL;
4107
4108		mutex_exit(&man_lock);
4109
4110		MAN_DBG(MAN_SWITCH, ("man_iwork: type %s",
4111		    _mw_type[wp->mw_type]));
4112
4113		wp_finished = TRUE;
4114
4115		switch (wp->mw_type) {
4116		case MAN_WORK_DRATTACH:
4117			(void) man_do_dr_attach(wp);
4118			break;
4119
4120		case MAN_WORK_DRSWITCH:
4121			/*
4122			 * Return status to man_dr_detach immediately. If
4123			 * no error submitting SWITCH request, man_iswitch
4124			 * or man_bclose will cv_signal man_dr_detach on
4125			 * completion of SWITCH work request.
4126			 */
4127			if (man_do_dr_switch(wp) == 0)
4128				wp_finished = FALSE;
4129			break;
4130
4131		case MAN_WORK_DRDETACH:
4132			man_do_dr_detach(wp);
4133			break;
4134
4135		case MAN_WORK_SWITCH:
4136			if (man_iswitch(wp))
4137				wp_finished = FALSE;
4138			break;
4139
4140		case MAN_WORK_KSTAT_UPDATE:
4141			man_do_kstats(wp);
4142			break;
4143
4144		default:
4145			cmn_err(CE_WARN, "man_iwork: "
4146			    "illegal work type(%d)", wp->mw_type);
4147			break;
4148		}
4149
4150		mutex_enter(&man_lock);
4151
4152		/*
4153		 * If we've completed the work request, delete, or
4154		 * cv_signal waiter.
4155		 */
4156		if (wp_finished) {
4157			wp->mw_flags |= MAN_WFLAGS_DONE;
4158
4159			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
4160				cv_signal(&wp->mw_cv);
4161			else
4162				man_work_free(wp);
4163		}
4164	}
4165
4166	mutex_exit(&man_lock);
4167}
4168
4169/*
4170 * man_dr_detach has submitted a request to DRSWITCH a path.
4171 * He is in cv_wait_sig(wp->mw_cv). We forward the work request on to
4172 * man_bwork as a switch request. It should end up back at
4173 * man_iwork, who will cv_signal(wp->mw_cv) man_dr_detach.
4174 *
4175 * Called holding inner perimeter lock.
4176 * man_lock is held to synchronize access to pathgroup list(man_pg).
4177 */
4178static int
4179man_do_dr_switch(man_work_t *wp)
4180{
4181	man_t		*manp;
4182	man_pg_t	*mpg;
4183	man_path_t	*mp;
4184	man_path_t	*ap;
4185	man_adest_t	*adp;
4186	mi_path_t	mpath;
4187	int		status = 0;
4188
4189	adp = &wp->mw_arg;
4190
4191	MAN_DBG(MAN_SWITCH, ("man_do_dr_switch: pg_id %d work:", adp->a_pg_id));
4192	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4193
4194	mutex_enter(&man_lock);
4195	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4196	if (manp == NULL || manp->man_pg == NULL) {
4197		status = ENODEV;
4198		goto exit;
4199	}
4200
4201	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4202	if (mpg == NULL) {
4203		status = ENODEV;
4204		goto exit;
4205	}
4206
4207	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4208		status = EAGAIN;
4209		goto exit;
4210	}
4211
4212	/*
4213	 * Check to see if detaching device is active. If so, activate
4214	 * an alternate.
4215	 */
4216	mp = man_find_active_path(mpg->mpg_pathp);
4217	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4218
4219		ap = man_find_alternate_path(mpg->mpg_pathp);
4220		if (ap == NULL) {
4221			status = EBUSY;
4222			goto exit;
4223		}
4224
4225		bzero((char *)&mpath, sizeof (mi_path_t));
4226
4227		mpath.mip_cmd = MI_PATH_ACTIVATE;
4228		mpath.mip_man_ppa = 0;
4229		mpath.mip_pg_id = 0;
4230		mpath.mip_devs[0] = ap->mp_device;
4231		mpath.mip_ndevs = 1;
4232		ether_copy(&manp->man_eaddr, &mpath.mip_eaddr);
4233
4234		/*
4235		 * DR thread is sleeping on wp->mw_cv. We change the work
4236		 * request from DRSWITCH to SWITCH and submit it to
4237		 * for processing by man_bwork (via man_pg_cmd). At
4238		 * completion the SWITCH work request is processed by
4239		 * man_iswitch() or man_bclose and the DR thread will
4240		 * be cv_signal'd.
4241		 */
4242		wp->mw_type = MAN_WORK_SWITCH;
4243		if (status = man_pg_cmd(&mpath, wp))
4244			goto exit;
4245
4246	} else {
4247		/*
4248		 * Tell man_dr_detach that detaching device is not currently
4249		 * in use.
4250		 */
4251		status = ENODEV;
4252	}
4253
4254exit:
4255	if (status) {
4256		/*
4257		 * ENODEV is a noop, not really an error.
4258		 */
4259		if (status != ENODEV)
4260			wp->mw_status = status;
4261	}
4262	mutex_exit(&man_lock);
4263
4264	return (status);
4265}
4266
4267/*
4268 * man_dr_attach has submitted a request to DRATTACH a path,
4269 * add that path to the path list.
4270 *
4271 * Called holding perimeter lock.
4272 */
4273static int
4274man_do_dr_attach(man_work_t *wp)
4275{
4276	man_t		*manp;
4277	man_adest_t	*adp;
4278	mi_path_t	mpath;
4279	manc_t		manc;
4280	int		status = 0;
4281
4282	adp = &wp->mw_arg;
4283
4284	MAN_DBG(MAN_SWITCH, ("man_do_dr_attach: pg_id %d work:", adp->a_pg_id));
4285	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4286
4287	mutex_enter(&man_lock);
4288	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4289	if (manp == NULL || manp->man_pg == NULL) {
4290		status = ENODEV;
4291		goto exit;
4292	}
4293
4294	if (status = man_get_iosram(&manc)) {
4295		goto exit;
4296	}
4297	/*
4298	 * Extract SC ethernet address from IOSRAM.
4299	 */
4300	ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4301
4302	mpath.mip_pg_id = adp->a_pg_id;
4303	mpath.mip_man_ppa = adp->a_man_ppa;
4304	/*
4305	 * man_dr_attach passes the new device info in a_sf_dev.
4306	 */
4307	MAN_DBG(MAN_DR, ("man_do_dr_attach: "));
4308	MAN_DBGCALL(MAN_DR, man_print_dev(&adp->a_sf_dev));
4309	mpath.mip_devs[0] = adp->a_sf_dev;
4310	mpath.mip_ndevs = 1;
4311	mpath.mip_cmd = MI_PATH_ADD;
4312	status = man_pg_cmd(&mpath, NULL);
4313
4314exit:
4315	mutex_exit(&man_lock);
4316	return (status);
4317}
4318
4319/*
4320 * man_dr_detach has submitted a request to DRDETACH a path.
4321 * He is in cv_wait_sig(wp->mw_cv). We remove the path and
4322 * cv_signal(wp->mw_cv) man_dr_detach.
4323 *
4324 * Called holding perimeter lock.
4325 */
4326static void
4327man_do_dr_detach(man_work_t *wp)
4328{
4329	man_t		*manp;
4330	man_pg_t	*mpg;
4331	man_path_t	*mp;
4332	man_adest_t	*adp;
4333	manc_t		manc;
4334	mi_path_t	mpath;
4335	int		i;
4336	int		found;
4337	int		status = 0;
4338
4339	adp = &wp->mw_arg;
4340
4341	MAN_DBG(MAN_SWITCH, ("man_do_dr_detach: pg_id %d work:", adp->a_pg_id));
4342	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4343
4344	mutex_enter(&man_lock);
4345	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4346	if (manp == NULL || manp->man_pg == NULL) {
4347		status = ENODEV;
4348		goto exit;
4349	}
4350
4351	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4352	if (mpg == NULL) {
4353		status = ENODEV;
4354		goto exit;
4355	}
4356
4357	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4358		status = EAGAIN;
4359		goto exit;
4360	}
4361
4362	/*
4363	 * We should have switched detaching path if it was active.
4364	 */
4365	mp = man_find_active_path(mpg->mpg_pathp);
4366	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4367		status = EAGAIN;
4368		goto exit;
4369	}
4370
4371	/*
4372	 * Submit an ASSIGN command, minus the detaching device.
4373	 */
4374	bzero((char *)&mpath, sizeof (mi_path_t));
4375
4376	if (status = man_get_iosram(&manc)) {
4377		goto exit;
4378	}
4379
4380	mpath.mip_cmd = MI_PATH_ASSIGN;
4381	mpath.mip_man_ppa = 0;
4382	mpath.mip_pg_id = 0;
4383
4384	mp = mpg->mpg_pathp;
4385	i = 0;
4386	found = FALSE;
4387	while (mp != NULL) {
4388		if (mp->mp_device.mdev_ppa != adp->a_sf_dev.mdev_ppa) {
4389			mpath.mip_devs[i] = mp->mp_device;
4390			i++;
4391		} else {
4392			found = TRUE;
4393		}
4394		mp = mp->mp_next;
4395	}
4396
4397	if (found) {
4398		/*
4399		 * Need to include SCs ethernet address in command.
4400		 */
4401		mpath.mip_ndevs = i;
4402		ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4403
4404		status = man_pg_cmd(&mpath, NULL);
4405	}
4406
4407	/*
4408	 * Hand back status to man_dr_detach request.
4409	 */
4410exit:
4411	if (status != ENODEV)
4412		wp->mw_status = status;
4413
4414	mutex_exit(&man_lock);
4415
4416}
4417
4418
4419/*
4420 * The background thread has configured new lower multiplexor streams for
4421 * the given destinations. Update the appropriate destination data structures
4422 * inside the inner perimeter. We must take care to deal with destinations
4423 * whose upper stream has closed or detached from lower streams.
4424 *
4425 * Returns
4426 *	0		Done with work request.
4427 *	1		Reused work request.
4428 */
4429static int
4430man_iswitch(man_work_t *wp)
4431{
4432	man_adest_t	*adp;
4433	man_t		*manp;
4434	man_pg_t	*mpg;
4435	man_path_t	*mp = NULL;
4436	man_dest_t	*mdp;
4437	man_dest_t	*tdp;
4438	int		i;
4439	int		switch_ok = TRUE;
4440
4441	adp = &wp->mw_arg;
4442
4443	if (wp->mw_status != 0) {
4444		switch_ok = FALSE;	/* Never got things opened */
4445	}
4446
4447	/*
4448	 * Update destination structures as appropriate.
4449	 */
4450	for (i = 0; i < adp->a_ndests; i++) {
4451		man_dest_t	tmp;
4452
4453		/*
4454		 * Check to see if lower stream we just switch is still
4455		 * around.
4456		 */
4457		tdp = &adp->a_mdp[i];
4458		mdp = man_switch_match(tdp, adp->a_pg_id, tdp->md_switch_id);
4459
4460		if (mdp == NULL)
4461			continue;
4462
4463		if (switch_ok == FALSE) {
4464			/*
4465			 * Switch failed for some reason.  Clear
4466			 * PLUMBING flag and retry switch again later.
4467			 */
4468			man_ifail_dest(mdp);
4469			continue;
4470		}
4471
4472		/*
4473		 * Swap new info, for old. We return the old info to
4474		 * man_bwork to close things up below.
4475		 */
4476		bcopy((char *)mdp, (char *)&tmp, sizeof (man_dest_t));
4477
4478		ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4479		ASSERT(mdp->md_state == tdp->md_state);
4480
4481		mdp->md_state = tdp->md_state;
4482
4483		/*
4484		 * save the wq from the destination passed(tdp).
4485		 */
4486		mdp->md_wq = tdp->md_wq;
4487		RD(mdp->md_wq)->q_ptr = (void *)(mdp);
4488		WR(mdp->md_wq)->q_ptr = (void *)(mdp);
4489
4490		mdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4491		mdp->md_state |= MAN_DSTATE_READY;
4492
4493		ASSERT(mdp->md_device.mdev_major == adp->a_sf_dev.mdev_major);
4494
4495		ASSERT(tdp->md_device.mdev_ppa == adp->a_st_dev.mdev_ppa);
4496		ASSERT(tdp->md_device.mdev_major == adp->a_st_dev.mdev_major);
4497
4498		mdp->md_device = tdp->md_device;
4499		mdp->md_muxid = tdp->md_muxid;
4500		mdp->md_linkstate = MAN_LINKUNKNOWN;
4501		(void) drv_getparm(TIME, &mdp->md_lastswitch);
4502		mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4503		mdp->md_switch_id = 0;
4504		mdp->md_switches++;
4505		mdp->md_dlpierrors = 0;
4506		D_SETSTATE(mdp, DL_UNATTACHED);
4507
4508		/*
4509		 * Resync lower w/ upper dlpi state. This will start link
4510		 * timer if/when lower stream goes to DL_IDLE (see man_lrsrv).
4511		 */
4512		man_reset_dlpi((void *)mdp);
4513
4514		bcopy((char *)&tmp, (char *)tdp, sizeof (man_dest_t));
4515	}
4516
4517	if (switch_ok) {
4518		for (i = 0; i < adp->a_ndests; i++) {
4519			tdp = &adp->a_mdp[i];
4520
4521			tdp->md_state &= ~MAN_DSTATE_PLUMBING;
4522			tdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4523			tdp->md_state |= MAN_DSTATE_READY;
4524		}
4525	} else {
4526		/*
4527		 * Never got switch-to destinations open, free them.
4528		 */
4529		man_kfree(adp->a_mdp,
4530		    sizeof (man_dest_t) * adp->a_ndests);
4531	}
4532
4533	/*
4534	 * Clear pathgroup switching flag and update path flags.
4535	 */
4536	mutex_enter(&man_lock);
4537	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4538
4539	ASSERT(manp != NULL);
4540	ASSERT(manp->man_pg != NULL);
4541
4542	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4543	ASSERT(mpg != NULL);
4544	ASSERT(mpg->mpg_flags & MAN_PG_SWITCHING);
4545	mpg->mpg_flags &= ~MAN_PG_SWITCHING;
4546
4547	/*
4548	 * Switch succeeded, mark path we switched from as failed, and
4549	 * device we switch to as active and clear its failed flag (if set).
4550	 * Sync up kstats.
4551	 */
4552	if (switch_ok) {
4553		mp = man_find_active_path(mpg->mpg_pathp);
4554		if (mp != NULL) {
4555
4556			ASSERT(adp->a_sf_dev.mdev_major != 0);
4557
4558			MAN_DBG(MAN_SWITCH, ("man_iswitch: switch from dev:"));
4559			MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_sf_dev));
4560
4561			mp->mp_device.mdev_state &= ~MDEV_ACTIVE;
4562		} else
4563			ASSERT(adp->a_sf_dev.mdev_major == 0);
4564
4565		MAN_DBG(MAN_SWITCH, ("man_iswitch: switch to dev:"));
4566		MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_st_dev));
4567
4568		ASSERT(adp->a_st_dev.mdev_major != 0);
4569
4570		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4571		    adp->a_st_dev.mdev_ppa);
4572
4573		ASSERT(mp != NULL);
4574
4575		mp->mp_device.mdev_state |= MDEV_ACTIVE;
4576	}
4577
4578	/*
4579	 * Decrement manp reference count and hand back work request if
4580	 * needed.
4581	 */
4582	manp->man_refcnt--;
4583
4584	if (switch_ok) {
4585		wp->mw_type = MAN_WORK_CLOSE;
4586		man_work_add(man_bwork_q, wp);
4587	}
4588
4589	mutex_exit(&man_lock);
4590
4591	return (switch_ok);
4592}
4593
4594/*
4595 * Find the destination in the upper stream that we just switched.
4596 */
4597man_dest_t *
4598man_switch_match(man_dest_t *sdp, int pg_id, void *sid)
4599{
4600	man_dest_t	*mdp = NULL;
4601	manstr_t	*msp;
4602
4603	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4604		/*
4605		 * Check if upper stream closed, or detached.
4606		 */
4607		if (msp != sdp->md_msp)
4608			continue;
4609
4610		if (msp->ms_dests == NULL)
4611			break;
4612
4613		mdp = &msp->ms_dests[pg_id];
4614
4615		/*
4616		 * Upper stream detached and reattached while we were
4617		 * switching.
4618		 */
4619		if (mdp->md_switch_id != sid) {
4620			mdp = NULL;
4621			break;
4622		}
4623	}
4624
4625	return (mdp);
4626}
4627
4628/*
4629 * bg_thread cant complete the switch for some reason. (Re)start the
4630 * linkcheck timer again.
4631 */
4632static void
4633man_ifail_dest(man_dest_t *mdp)
4634{
4635	ASSERT(mdp->md_lc_timer_id == 0);
4636	ASSERT(mdp->md_bc_id == 0);
4637	ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4638
4639	MAN_DBG(MAN_SWITCH, ("man_ifail_dest"));
4640	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
4641
4642	mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4643	mdp->md_linkstate = MAN_LINKFAIL;
4644
4645	/*
4646	 * If we have not yet initialized link, or the upper stream is
4647	 * DL_IDLE, restart the linktimer.
4648	 */
4649	if ((mdp->md_state & MAN_DSTATE_INITIALIZING) ||
4650	    ((mdp->md_msp->ms_sap == ETHERTYPE_IPV6 ||
4651	    mdp->md_msp->ms_sap == ETHERTYPE_IP) &&
4652	    mdp->md_msp->ms_dlpistate == DL_IDLE)) {
4653
4654		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
4655		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4656	}
4657
4658}
4659
4660/*
4661 * Arrange to replay all of ms_dl_mp on the new lower stream to get it
4662 * in sync with the upper stream. Note that this includes setting the
4663 * physical address.
4664 *
4665 * Called from qtimeout with inner perimeter lock.
4666 */
4667static void
4668man_reset_dlpi(void *argp)
4669{
4670	man_dest_t	*mdp = (man_dest_t *)argp;
4671	manstr_t	*msp;
4672	mblk_t		*mp;
4673	mblk_t		*rmp = NULL;
4674	mblk_t		*tmp;
4675
4676	mdp->md_lc_timer_id = 0;
4677
4678	if (mdp->md_state != MAN_DSTATE_READY) {
4679		MAN_DBG(MAN_DLPI, ("man_reset_dlpi: not ready!"));
4680		return;
4681	}
4682
4683	msp = mdp->md_msp;
4684
4685	rmp = man_dup_mplist(msp->ms_dl_mp);
4686	if (rmp == NULL)
4687		goto fail;
4688
4689	/*
4690	 * Send down an unbind and detach request, just to clean things
4691	 * out, we ignore ERROR_ACKs for unbind and detach in man_lrsrv.
4692	 */
4693	tmp = man_alloc_ubreq_dreq();
4694	if (tmp == NULL) {
4695		goto fail;
4696	}
4697	mp = tmp;
4698	while (mp->b_next != NULL)
4699		mp = mp->b_next;
4700	mp->b_next = rmp;
4701	rmp = tmp;
4702
4703	man_dlpi_replay(mdp, rmp);
4704
4705	return;
4706
4707fail:
4708
4709	while (rmp) {
4710		mp = rmp;
4711		rmp = rmp->b_next;
4712		mp->b_next = mp->b_prev = NULL;
4713		freemsg(mp);
4714	}
4715
4716	ASSERT(mdp->md_lc_timer_id == 0);
4717	ASSERT(mdp->md_bc_id == 0);
4718
4719	/*
4720	 * If low on memory, try again later. I Could use qbufcall, but that
4721	 * could fail and I would have to try and recover from that w/
4722	 * qtimeout anyway.
4723	 */
4724	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_reset_dlpi,
4725	    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4726}
4727
4728/*
4729 * Once we receive acknowledgement that DL_ATTACH_REQ was successful,
4730 * we can send down the DL_* related IOCTLs (e.g. DL_IOC_HDR). If we
4731 * try and send them downsteam w/o waiting, the ioctl's get processed before
4732 * the ATTACH_REQ and they are rejected. TBD - could just do the lower
4733 * dlpi state change in lock step. TBD
4734 */
4735static int
4736man_dlioc_replay(man_dest_t *mdp)
4737{
4738	mblk_t		*rmp;
4739	int		status = 1;
4740
4741	if (mdp->md_msp->ms_dlioc_mp == NULL)
4742		goto exit;
4743
4744	rmp = man_dup_mplist(mdp->md_msp->ms_dlioc_mp);
4745	if (rmp == NULL) {
4746		status = 0;
4747		goto exit;
4748	}
4749
4750	man_dlpi_replay(mdp, rmp);
4751exit:
4752	return (status);
4753}
4754
4755static mblk_t *
4756man_alloc_ubreq_dreq()
4757{
4758	mblk_t			*dreq;
4759	mblk_t			*ubreq = NULL;
4760	union DL_primitives	*dlp;
4761
4762	dreq = allocb(DL_DETACH_REQ_SIZE, BPRI_MED);
4763	if (dreq == NULL)
4764		goto exit;
4765
4766	dreq->b_datap->db_type = M_PROTO;
4767	dlp = (union DL_primitives *)dreq->b_rptr;
4768	dlp->dl_primitive = DL_DETACH_REQ;
4769	dreq->b_wptr += DL_DETACH_REQ_SIZE;
4770
4771	ubreq = allocb(DL_UNBIND_REQ_SIZE, BPRI_MED);
4772	if (ubreq == NULL) {
4773		freemsg(dreq);
4774		goto exit;
4775	}
4776
4777	ubreq->b_datap->db_type = M_PROTO;
4778	dlp = (union DL_primitives *)ubreq->b_rptr;
4779	dlp->dl_primitive = DL_UNBIND_REQ;
4780	ubreq->b_wptr += DL_UNBIND_REQ_SIZE;
4781
4782	ubreq->b_next = dreq;
4783
4784exit:
4785
4786	return (ubreq);
4787}
4788
4789static mblk_t *
4790man_dup_mplist(mblk_t *mp)
4791{
4792	mblk_t	*listp = NULL;
4793	mblk_t	*tailp = NULL;
4794
4795	for (; mp != NULL; mp = mp->b_next) {
4796
4797		mblk_t	*nmp;
4798		mblk_t	*prev;
4799		mblk_t	*next;
4800
4801		prev = mp->b_prev;
4802		next = mp->b_next;
4803		mp->b_prev = mp->b_next = NULL;
4804
4805		nmp = copymsg(mp);
4806
4807		mp->b_prev = prev;
4808		mp->b_next = next;
4809
4810		if (nmp == NULL)
4811			goto nomem;
4812
4813		if (listp == NULL) {
4814			listp = tailp = nmp;
4815		} else {
4816			tailp->b_next = nmp;
4817			tailp = nmp;
4818		}
4819	}
4820
4821	return (listp);
4822nomem:
4823
4824	while (listp) {
4825		mp = listp;
4826		listp = mp->b_next;
4827		mp->b_next = mp->b_prev = NULL;
4828		freemsg(mp);
4829	}
4830
4831	return (NULL);
4832
4833}
4834
4835static mblk_t *
4836man_alloc_physreq_mp(eaddr_t *man_eap)
4837{
4838
4839	mblk_t			*mp;
4840	union DL_primitives	*dlp;
4841	t_uscalar_t		off;
4842	eaddr_t			*eap;
4843
4844	mp = allocb(DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL, BPRI_MED);
4845	if (mp == NULL)
4846		goto exit;
4847
4848	mp->b_datap->db_type = M_PROTO;
4849	dlp = (union DL_primitives *)mp->b_wptr;
4850	dlp->set_physaddr_req.dl_primitive = DL_SET_PHYS_ADDR_REQ;
4851	dlp->set_physaddr_req.dl_addr_length = ETHERADDRL;
4852	off = DL_SET_PHYS_ADDR_REQ_SIZE;
4853	dlp->set_physaddr_req.dl_addr_offset =  off;
4854	mp->b_wptr += DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL;
4855
4856	eap = (eaddr_t *)(mp->b_rptr + off);
4857	ether_copy(man_eap, eap);
4858
4859exit:
4860	MAN_DBG(MAN_DLPI, ("man_alloc_physreq: physaddr %s\n",
4861	    ether_sprintf(eap)));
4862
4863	return (mp);
4864}
4865
4866/*
4867 * A new path in a pathgroup has become active for the first time. Setup
4868 * the lower destinations in prepartion for man_pg_activate to call
4869 * man_autoswitch.
4870 */
4871static void
4872man_add_dests(man_pg_t *mpg)
4873{
4874	manstr_t	*msp;
4875	man_dest_t	*mdp;
4876
4877	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4878
4879		if (!man_str_uses_pg(msp, mpg))
4880			continue;
4881
4882		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4883
4884/*
4885 * TBD - Take out
4886 *		ASSERT(mdp->md_device.mdev_state == MDEV_UNASSIGNED);
4887 *		ASSERT(mdp->md_state == MAN_DSTATE_NOTPRESENT);
4888 */
4889		if (mdp->md_device.mdev_state != MDEV_UNASSIGNED) {
4890			cmn_err(CE_NOTE, "man_add_dests mdev !unassigned");
4891			MAN_DBGCALL(MAN_PATH, man_print_mdp(mdp));
4892		}
4893
4894		man_start_dest(mdp, msp, mpg);
4895	}
4896
4897}
4898
4899static int
4900man_remove_dests(man_pg_t *mpg)
4901{
4902	manstr_t	*msp;
4903	int		close_cnt = 0;
4904	man_dest_t	*cdp;
4905	man_dest_t	*mdp;
4906	man_dest_t	*tdp;
4907	man_work_t	*wp;
4908	mblk_t		*mp;
4909	int		status = 0;
4910
4911	wp = man_work_alloc(MAN_WORK_CLOSE, KM_NOSLEEP);
4912	if (wp == NULL) {
4913		status = ENOMEM;
4914		goto exit;
4915	}
4916
4917	/*
4918	 * Count up number of destinations we need to close.
4919	 */
4920	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4921		if (!man_str_uses_pg(msp, mpg))
4922			continue;
4923
4924		close_cnt++;
4925	}
4926
4927	if (close_cnt == 0)
4928		goto exit;
4929
4930	cdp = man_kzalloc(sizeof (man_dest_t) * close_cnt, KM_NOSLEEP);
4931	if (cdp == NULL) {
4932		status = ENOMEM;
4933		man_work_free(wp);
4934		goto exit;
4935	}
4936
4937	tdp = cdp;
4938	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4939		if (!man_str_uses_pg(msp, mpg))
4940			continue;
4941
4942		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4943
4944		mdp->md_state |= MAN_DSTATE_CLOSING;
4945		mdp->md_device.mdev_state = MDEV_UNASSIGNED;
4946		mdp->md_msp = NULL;
4947		mdp->md_rq = NULL;
4948
4949		/*
4950		 * Clean up optimized destination pointer if we are
4951		 * closing it.
4952		 */
4953		man_set_optimized_dest(msp);
4954
4955		if (mdp->md_lc_timer_id != 0) {
4956			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4957			mdp->md_lc_timer_id = 0;
4958		}
4959		if (mdp->md_bc_id != 0) {
4960			qunbufcall(man_ctl_wq, mdp->md_bc_id);
4961			mdp->md_bc_id = 0;
4962		}
4963
4964		mutex_enter(&mdp->md_lock);
4965		while ((mp = mdp->md_dmp_head) != NULL) {
4966			mdp->md_dmp_head = mp->b_next;
4967			mp->b_next = NULL;
4968			freemsg(mp);
4969		}
4970		mdp->md_dmp_count = 0;
4971		mdp->md_dmp_tail = NULL;
4972		mutex_exit(&mdp->md_lock);
4973
4974		*tdp++ = *mdp;
4975
4976		mdp->md_state = MAN_DSTATE_NOTPRESENT;
4977		mdp->md_muxid = -1;
4978	}
4979
4980	wp->mw_arg.a_mdp = cdp;
4981	wp->mw_arg.a_ndests = close_cnt;
4982	man_work_add(man_bwork_q, wp);
4983
4984exit:
4985	return (status);
4986
4987}
4988
4989/*
4990 * Returns TRUE if stream uses pathgroup, FALSE otherwise.
4991 */
4992static int
4993man_str_uses_pg(manstr_t *msp, man_pg_t *mpg)
4994{
4995	int	status;
4996
4997	status = ((msp->ms_flags & MAN_SFLAG_CONTROL)	||
4998	    (msp->ms_dests == NULL)	||
4999	    (msp->ms_manp == NULL)	||
5000	    (msp->ms_manp->man_meta_ppa != mpg->mpg_man_ppa));
5001
5002	return (!status);
5003}
5004
5005static int
5006man_gettimer(int timer, man_dest_t *mdp)
5007{
5008
5009	int attached = TRUE;
5010	int time = 0;
5011
5012	if (mdp == NULL || mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
5013		attached = FALSE;
5014
5015	switch (timer) {
5016	case MAN_TIMER_INIT:
5017		if (attached)
5018			time = mdp->md_msp->ms_manp->man_init_time;
5019		else
5020			time = MAN_INIT_TIME;
5021		break;
5022
5023	case MAN_TIMER_LINKCHECK:
5024		if (attached) {
5025			if (mdp->md_linkstate == MAN_LINKSTALE)
5026				time = mdp->md_msp->ms_manp->man_linkstale_time;
5027			else
5028				time = mdp->md_msp->ms_manp->man_linkcheck_time;
5029		} else
5030			time = MAN_LINKCHECK_TIME;
5031		break;
5032
5033	case MAN_TIMER_DLPIRESET:
5034		if (attached)
5035			time = mdp->md_msp->ms_manp->man_dlpireset_time;
5036		else
5037			time = MAN_DLPIRESET_TIME;
5038		break;
5039
5040	default:
5041		MAN_DBG(MAN_LINK, ("man_gettimer: unknown timer %d", timer));
5042		time = MAN_LINKCHECK_TIME;
5043		break;
5044	}
5045
5046	return (drv_usectohz(time));
5047}
5048
5049/*
5050 * Check the links for each active destination. Called inside inner
5051 * perimeter via qtimeout. This timer only runs on the domain side of the
5052 * driver. It should never run on the SC side.
5053 *
5054 * On a MAN_LINKGOOD link, we check/probe the link health every
5055 * MAN_LINKCHECK_TIME seconds. If the link goes MAN_LINKSTALE, the we probe
5056 * the link every MAN_LINKSTALE_TIME seconds, and fail the link after probing
5057 * the link MAN_LINKSTALE_RETRIES times.
5058 * The man_lock is held to synchronize access pathgroup list(man_pg).
5059 */
5060void
5061man_linkcheck_timer(void *argp)
5062{
5063	man_dest_t		*mdp = (man_dest_t *)argp;
5064	int			restart_timer = TRUE;
5065	int			send_ping = TRUE;
5066	int			newstate;
5067	int			oldstate;
5068	man_pg_t		*mpg;
5069	man_path_t		*mp;
5070
5071	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: mdp"));
5072	MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5073
5074	/*
5075	 * Clear timeout id and check if someones waiting on us to
5076	 * complete a close.
5077	 */
5078	mdp->md_lc_timer_id = 0;
5079
5080	if (mdp->md_state == MAN_DSTATE_NOTPRESENT ||
5081	    mdp->md_state & MAN_DSTATE_BUSY) {
5082
5083		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: not ready mdp"));
5084		MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5085		goto exit;
5086	}
5087
5088	mutex_enter(&man_lock);
5089	/*
5090	 * If the lower stream needs initializing, just go straight to
5091	 * switch code. As the linkcheck timer is started for all
5092	 * SAPs, do not send ping packets during the initialization.
5093	 */
5094	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5095		send_ping = FALSE;
5096		goto do_switch;
5097	}
5098
5099	newstate = oldstate = mdp->md_linkstate;
5100
5101	if (!man_needs_linkcheck(mdp)) {
5102		cmn_err(CE_NOTE,
5103		    "man_linkcheck_timer: unneeded linkcheck on mdp(0x%p)",
5104		    (void *)mdp);
5105		mutex_exit(&man_lock);
5106		return;
5107	}
5108
5109	/*
5110	 * The above call to  man_needs_linkcheck() validates
5111	 * mdp->md_msp and mdp->md_msp->ms_manp pointers.
5112	 */
5113	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5114	ASSERT(mpg != NULL);
5115	mp = man_find_path_by_ppa(mpg->mpg_pathp, mdp->md_device.mdev_ppa);
5116	ASSERT(mp != NULL);
5117
5118	/*
5119	 * This is the most common case, when traffic is flowing.
5120	 */
5121	if (mdp->md_rcvcnt != mdp->md_lastrcvcnt) {
5122
5123		newstate = MAN_LINKGOOD;
5124		mdp->md_lastrcvcnt = mdp->md_rcvcnt;
5125		send_ping = FALSE;
5126
5127		/*
5128		 * Clear the FAILED flag and update lru.
5129		 */
5130		mp->mp_device.mdev_state &= ~MDEV_FAILED;
5131		(void) drv_getparm(TIME, &mp->mp_lru);
5132
5133		if (mdp->md_link_updown_msg == MAN_LINK_DOWN_MSG) {
5134			man_t *manp = mdp->md_msp->ms_manp;
5135
5136			cmn_err(CE_NOTE, "%s%d Link up",
5137			    ddi_major_to_name(manp->man_meta_major),
5138			    manp->man_meta_ppa);
5139
5140			mdp->md_link_updown_msg = MAN_LINK_UP_MSG;
5141		}
5142
5143		goto done;
5144	}
5145
5146	/*
5147	 * If we're here, it means we have not seen any traffic
5148	 */
5149	switch (oldstate) {
5150	case MAN_LINKINIT:
5151	case MAN_LINKGOOD:
5152		newstate = MAN_LINKSTALE;
5153		mdp->md_linkstales++;
5154		mdp->md_linkstale_retries =
5155		    mdp->md_msp->ms_manp->man_linkstale_retries;
5156		break;
5157
5158	case MAN_LINKSTALE:
5159	case MAN_LINKFAIL:
5160		mdp->md_linkstales++;
5161		mdp->md_linkstale_retries--;
5162		if (mdp->md_linkstale_retries < 0) {
5163			newstate = MAN_LINKFAIL;
5164			mdp->md_linkfails++;
5165			mdp->md_linkstale_retries =
5166			    mdp->md_msp->ms_manp->man_linkstale_retries;
5167			/*
5168			 * Mark the destination as FAILED and
5169			 * update lru.
5170			 */
5171			if (oldstate != MAN_LINKFAIL) {
5172				mp->mp_device.mdev_state |= MDEV_FAILED;
5173				(void) drv_getparm(TIME, &mp->mp_lru);
5174			}
5175		}
5176		break;
5177
5178	default:
5179		cmn_err(CE_WARN, "man_linkcheck_timer: illegal link"
5180		    " state %d", oldstate);
5181		break;
5182	}
5183done:
5184
5185	if (oldstate != newstate) {
5186
5187		MAN_DBG(MAN_LINK, ("man_linkcheck_timer"
5188		    " link state %s -> %s", lss[oldstate],
5189		    lss[newstate]));
5190
5191		mdp->md_linkstate = newstate;
5192	}
5193
5194	/*
5195	 * Do any work required from state transitions above.
5196	 */
5197	if (newstate == MAN_LINKFAIL) {
5198do_switch:
5199		if (!man_do_autoswitch(mdp)) {
5200			/*
5201			 * Stop linkcheck timer until switch completes.
5202			 */
5203			restart_timer = FALSE;
5204			send_ping = FALSE;
5205		}
5206	}
5207
5208	mutex_exit(&man_lock);
5209	if (send_ping)
5210		man_do_icmp_bcast(mdp, mdp->md_msp->ms_sap);
5211
5212	if (restart_timer)
5213		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
5214		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
5215
5216exit:
5217	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: returns"));
5218
5219}
5220
5221/*
5222 * Handle linkcheck initiated autoswitching.
5223 * Called with man_lock held.
5224 */
5225static int
5226man_do_autoswitch(man_dest_t *mdp)
5227{
5228	man_pg_t	*mpg;
5229	man_path_t	*ap;
5230	int		status = 0;
5231
5232	ASSERT(MUTEX_HELD(&man_lock));
5233	/*
5234	 * Set flags and refcnt. Cleared in man_iswitch when SWITCH completes.
5235	 */
5236	mdp->md_msp->ms_manp->man_refcnt++;
5237
5238	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5239	ASSERT(mpg);
5240
5241	if (mpg->mpg_flags & MAN_PG_SWITCHING)
5242		return (EBUSY);
5243
5244	mpg->mpg_flags |= MAN_PG_SWITCHING;
5245
5246	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5247		/*
5248		 * We're initializing, ask for a switch to our currently
5249		 * active device.
5250		 */
5251		status = man_autoswitch(mpg, &mdp->md_device, NULL);
5252	} else {
5253
5254		if (mdp->md_msp != NULL && mdp->md_msp->ms_manp != NULL &&
5255		    mdp->md_link_updown_msg == MAN_LINK_UP_MSG) {
5256
5257			man_t *manp = mdp->md_msp->ms_manp;
5258
5259			cmn_err(CE_NOTE, "%s%d Link down",
5260			    ddi_major_to_name(manp->man_meta_major),
5261			    manp->man_meta_ppa);
5262		}
5263		mdp->md_link_updown_msg = MAN_LINK_DOWN_MSG;
5264
5265		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: link failure on %s%d",
5266		    ddi_major_to_name(mdp->md_device.mdev_major),
5267		    mdp->md_device.mdev_ppa));
5268
5269		ap = man_find_alternate_path(mpg->mpg_pathp);
5270
5271		if (ap == NULL) {
5272			status = ENODEV;
5273			goto exit;
5274		}
5275		status = man_autoswitch(mpg, &ap->mp_device, NULL);
5276	}
5277exit:
5278	if (status != 0) {
5279		/*
5280		 * man_iswitch not going to run, clean up.
5281		 */
5282		mpg->mpg_flags &= ~MAN_PG_SWITCHING;
5283		mdp->md_msp->ms_manp->man_refcnt--;
5284	}
5285
5286	return (status);
5287}
5288
5289/*
5290 * Gather up all lower multiplexor streams that have this link open and
5291 * try to switch them. Called from inner perimeter and holding man_lock.
5292 *
5293 *	pg_id		- Pathgroup to do switch for.
5294 *	st_devp		- New device to switch to.
5295 *	wait_for_switch	- whether or not to qwait for completion.
5296 */
5297static int
5298man_autoswitch(man_pg_t *mpg, man_dev_t *st_devp, man_work_t *waiter_wp)
5299{
5300	man_work_t	*wp;
5301	int		sdp_cnt = 0;
5302	man_dest_t	*sdp;
5303	int		status = 0;
5304
5305	ASSERT(MUTEX_HELD(&man_lock));
5306	if (waiter_wp == NULL) {
5307		wp = man_work_alloc(MAN_WORK_SWITCH, KM_NOSLEEP);
5308		if (wp == NULL) {
5309			status = ENOMEM;
5310			goto exit;
5311		}
5312	} else {
5313		ASSERT(waiter_wp->mw_type == MAN_WORK_SWITCH);
5314		wp = waiter_wp;
5315	}
5316
5317	/*
5318	 * Set dests as PLUMBING, cancel timers and return array of dests
5319	 * that need a switch.
5320	 */
5321	status = man_prep_dests_for_switch(mpg, &sdp, &sdp_cnt);
5322	if (status) {
5323		if (waiter_wp == NULL)
5324			man_work_free(wp);
5325		goto exit;
5326	}
5327
5328	/*
5329	 * If no streams are active, there are no streams to switch.
5330	 * Return ENODEV (see man_pg_activate).
5331	 */
5332	if (sdp_cnt == 0) {
5333		if (waiter_wp == NULL)
5334			man_work_free(wp);
5335		status = ENODEV;
5336		goto exit;
5337	}
5338
5339	/*
5340	 * Ask the bgthread to switch. See man_bwork.
5341	 */
5342	wp->mw_arg.a_sf_dev = sdp->md_device;
5343	wp->mw_arg.a_st_dev = *st_devp;
5344	wp->mw_arg.a_pg_id = mpg->mpg_pg_id;
5345	wp->mw_arg.a_man_ppa = mpg->mpg_man_ppa;
5346
5347	wp->mw_arg.a_mdp = sdp;
5348	wp->mw_arg.a_ndests = sdp_cnt;
5349	man_work_add(man_bwork_q, wp);
5350
5351exit:
5352
5353	return (status);
5354}
5355
5356/*
5357 * If an alternate path exists for pathgroup, arrange for switch to
5358 * happen. Note that we need to switch each of msp->dests[pg_id], for
5359 * all on man_strup. We must:
5360 *
5361 *		Cancel any timers
5362 *		Mark dests as PLUMBING
5363 *		Submit switch request to man_bwork_q->
5364 */
5365static int
5366man_prep_dests_for_switch(man_pg_t *mpg, man_dest_t **mdpp, int *cntp)
5367{
5368	manstr_t	*msp;
5369	man_dest_t	*mdp;
5370	int		sdp_cnt = 0;
5371	man_dest_t	*sdp = NULL;
5372	man_dest_t	*tdp;
5373	int		status = 0;
5374
5375	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: pg_id %d",
5376	    mpg->mpg_pg_id));
5377
5378	/*
5379	 * Count up number of streams, there is one destination that needs
5380	 * switching per stream.
5381	 */
5382	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5383		if (man_str_uses_pg(msp, mpg))
5384			sdp_cnt++;
5385	}
5386
5387	if (sdp_cnt == 0)
5388		goto exit;
5389
5390	sdp = man_kzalloc(sizeof (man_dest_t) * sdp_cnt, KM_NOSLEEP);
5391	if (sdp == NULL) {
5392		status = ENOMEM;
5393		goto exit;
5394	}
5395	tdp = sdp;
5396	/*
5397	 * Mark each destination as unusable.
5398	 */
5399	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5400		if (man_str_uses_pg(msp, mpg)) {
5401
5402			/*
5403			 * Mark destination as plumbing and store the
5404			 * address of sdp as a way to identify the
5405			 * SWITCH request when it comes back (see man_iswitch).
5406			 */
5407			mdp = &msp->ms_dests[mpg->mpg_pg_id];
5408			mdp->md_state |= MAN_DSTATE_PLUMBING;
5409			mdp->md_switch_id = sdp;
5410
5411			/*
5412			 * Copy destination info.
5413			 */
5414			bcopy(mdp, tdp, sizeof (man_dest_t));
5415			tdp++;
5416
5417			/*
5418			 * Cancel timers.
5419			 */
5420			if (mdp->md_lc_timer_id) {
5421				(void) quntimeout(man_ctl_wq,
5422				    mdp->md_lc_timer_id);
5423				mdp->md_lc_timer_id = 0;
5424			}
5425			if (mdp->md_bc_id) {
5426				qunbufcall(man_ctl_wq, mdp->md_bc_id);
5427				mdp->md_bc_id = 0;
5428			}
5429		}
5430	}
5431
5432	*mdpp = sdp;
5433	*cntp = sdp_cnt;
5434	status = 0;
5435exit:
5436
5437	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: returns %d"
5438	    " sdp(0x%p) sdp_cnt(%d)", status, (void *)sdp, sdp_cnt));
5439
5440	return (status);
5441
5442}
5443
5444/*
5445 * The code below generates an ICMP echo packet and sends it to the
5446 * broadcast address in the hopes that the other end will respond
5447 * and the man_linkcheck_timer logic will see the traffic.
5448 *
5449 * This assumes ethernet-like media.
5450 */
5451/*
5452 * Generate an ICMP packet. Called exclusive inner perimeter.
5453 *
5454 *	mdp - destination to send packet to.
5455 *	sap - either ETHERTYPE_ARP or ETHERTYPE_IPV6
5456 */
5457static void
5458man_do_icmp_bcast(man_dest_t *mdp, t_uscalar_t sap)
5459{
5460	mblk_t			*mp = NULL;
5461
5462	/* TBD - merge pinger and this routine. */
5463
5464	ASSERT(sap == ETHERTYPE_IPV6 || sap == ETHERTYPE_IP);
5465
5466	if (sap == ETHERTYPE_IPV6) {
5467		mdp->md_icmpv6probes++;
5468	} else {
5469		mdp->md_icmpv4probes++;
5470	}
5471	/*
5472	 * Send the ICMP message
5473	 */
5474	mp = man_pinger(sap);
5475
5476	MAN_DBG(MAN_LINK, ("man_do_icmp_bcast: sap=0x%x mp=0x%p",
5477	    sap, (void *)mp));
5478	if (mp == NULL)
5479		return;
5480
5481	/*
5482	 * Send it out.
5483	 */
5484	if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
5485
5486		MAN_DBG(MAN_LINK, ("man_do_icmp_broadcast: xmit failed"));
5487
5488		freemsg(mp);
5489	}
5490
5491}
5492
5493static mblk_t *
5494man_pinger(t_uscalar_t sap)
5495{
5496	mblk_t		*mp = NULL;
5497	man_dladdr_t	dlsap;
5498	icmph_t		*icmph;
5499	int		ipver;
5500	ipha_t		*ipha;
5501	ip6_t		*ip6h;
5502	int		iph_hdr_len;
5503	int		datalen = 64;
5504	uchar_t		*datap;
5505	uint16_t	size;
5506	uchar_t		i;
5507
5508	dlsap.dl_sap = htons(sap);
5509	bcopy(&etherbroadcast, &dlsap.dl_phys, sizeof (dlsap.dl_phys));
5510
5511	if (sap == ETHERTYPE_IPV6) {
5512		ipver = IPV6_VERSION;
5513		iph_hdr_len = sizeof (ip6_t);
5514		size = ICMP6_MINLEN;
5515	} else {
5516		ipver = IPV4_VERSION;
5517		iph_hdr_len = sizeof (ipha_t);
5518		size = ICMPH_SIZE;
5519	}
5520	size += (uint16_t)iph_hdr_len;
5521	size += datalen;
5522
5523	mp = man_alloc_udreq(size, &dlsap);
5524	if (mp == NULL)
5525		goto exit;
5526
5527	/*
5528	 * fill out the ICMP echo packet headers
5529	 */
5530	mp->b_cont->b_wptr += iph_hdr_len;
5531	if (ipver == IPV4_VERSION) {
5532		ipha = (ipha_t *)mp->b_cont->b_rptr;
5533		ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
5534		    | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
5535		ipha->ipha_type_of_service = 0;
5536		ipha->ipha_length = size;
5537		ipha->ipha_fragment_offset_and_flags = IPH_DF;
5538		ipha->ipha_ttl = 1;
5539		ipha->ipha_protocol = IPPROTO_ICMP;
5540		if (man_is_on_domain) {
5541			manc_t		manc;
5542
5543			if (man_get_iosram(&manc)) {
5544				freemsg(mp);
5545				mp = NULL;
5546				goto exit;
5547			}
5548
5549			/*
5550			 * Domain generates ping packets for domain to
5551			 * SC network (dman0 <--> scman0).
5552			 */
5553			ipha->ipha_dst = manc.manc_sc_ipaddr;
5554			ipha->ipha_src = manc.manc_dom_ipaddr;
5555		} else {
5556			/*
5557			 * Note that ping packets are only generated
5558			 * by the SC across scman1 (SC to SC network).
5559			 */
5560			ipha->ipha_dst = man_sc_ipaddrs.ip_other_sc_ipaddr;
5561			ipha->ipha_src = man_sc_ipaddrs.ip_my_sc_ipaddr;
5562		}
5563
5564		ipha->ipha_ident = 0;
5565
5566		ipha->ipha_hdr_checksum = 0;
5567		ipha->ipha_hdr_checksum = IP_CSUM(mp->b_cont, 0, 0);
5568
5569	} else {
5570		ip6h = (ip6_t *)mp->b_cont->b_rptr;
5571		/*
5572		 * IP version = 6, priority = 0, flow = 0
5573		 */
5574		ip6h->ip6_flow = (IPV6_VERSION << 28);
5575		ip6h->ip6_plen =
5576		    htons((short)(size - iph_hdr_len));
5577		ip6h->ip6_nxt = IPPROTO_ICMPV6;
5578		ip6h->ip6_hlim = 1;	/* stay on link */
5579
5580		if (man_is_on_domain) {
5581			manc_t		manc;
5582
5583			if (man_get_iosram(&manc)) {
5584				freemsg(mp);
5585				mp = NULL;
5586				goto exit;
5587			}
5588
5589			/*
5590			 * Domain generates ping packets for domain to
5591			 * SC network (dman0 <--> scman0).
5592			 */
5593			ip6h->ip6_src = manc.manc_dom_ipv6addr;
5594			ip6h->ip6_dst = manc.manc_sc_ipv6addr;
5595		} else {
5596			/*
5597			 * Note that ping packets are only generated
5598			 * by the SC across scman1 (SC to SC network).
5599			 */
5600			ip6h->ip6_src = man_sc_ip6addrs.ip6_my_sc_ipaddr;
5601			ip6h->ip6_dst = man_sc_ip6addrs.ip6_other_sc_ipaddr;
5602		}
5603	}
5604
5605	/*
5606	 * IPv6 and IP are the same for ICMP as far as I'm concerned.
5607	 */
5608	icmph = (icmph_t *)mp->b_cont->b_wptr;
5609	if (ipver == IPV4_VERSION) {
5610		mp->b_cont->b_wptr += ICMPH_SIZE;
5611		icmph->icmph_type = ICMP_ECHO_REQUEST;
5612		icmph->icmph_code = 0;
5613	} else {
5614		mp->b_cont->b_wptr += ICMP6_MINLEN;
5615		icmph->icmph_type = ICMP6_ECHO_REQUEST;
5616		icmph->icmph_code = 0;
5617	}
5618
5619	datap = mp->b_cont->b_wptr;
5620	mp->b_cont->b_wptr += datalen;
5621
5622	for (i = 0; i < datalen; i++)
5623		*datap++ = i;
5624
5625	if (ipver == IPV4_VERSION) {
5626		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len, 0);
5627	} else {
5628		uint32_t	sum;
5629
5630		sum = htons(IPPROTO_ICMPV6) + ip6h->ip6_plen;
5631		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len - 32,
5632		    (sum & 0xffff) + (sum >> 16));
5633	}
5634
5635/*
5636 * TBD
5637 *	icp->icmp_time =  ???;
5638 */
5639
5640exit:
5641	return (mp);
5642}
5643
5644static mblk_t *
5645man_alloc_udreq(int size, man_dladdr_t *dlsap)
5646{
5647	dl_unitdata_req_t	*udreq;
5648	mblk_t			*bp;
5649	mblk_t			*mp;
5650
5651	mp = allocb(sizeof (dl_unitdata_req_t) + sizeof (*dlsap), BPRI_MED);
5652
5653	if (mp == NULL) {
5654		cmn_err(CE_NOTE, "man_preparepkt: allocb failed");
5655		return (NULL);
5656	}
5657
5658	if ((bp = allocb(size, BPRI_MED)) == NULL) {
5659		freemsg(mp);
5660		cmn_err(CE_NOTE, "man_preparepkts: allocb failed");
5661		return (NULL);
5662	}
5663	bzero(bp->b_rptr, size);
5664
5665	mp->b_cont = bp;
5666	mp->b_datap->db_type = M_PROTO;
5667	udreq = (dl_unitdata_req_t *)mp->b_wptr;
5668	mp->b_wptr += sizeof (dl_unitdata_req_t);
5669
5670	/*
5671	 * phys addr first - TBD
5672	 */
5673	bcopy((char *)dlsap, mp->b_wptr, sizeof (*dlsap));
5674	mp->b_wptr += sizeof (*dlsap);
5675
5676	udreq->dl_primitive = DL_UNITDATA_REQ;
5677	udreq->dl_dest_addr_length = sizeof (*dlsap);
5678	udreq->dl_dest_addr_offset = sizeof (*udreq);
5679	udreq->dl_priority.dl_min = 0;
5680	udreq->dl_priority.dl_max = 0;
5681
5682	return (mp);
5683}
5684
5685
5686/*
5687 * The routines in this file are executed by the MAN background thread,
5688 * which executes outside of the STREAMS framework (see man_str.c). It is
5689 * allowed to do the things required to modify the STREAMS driver (things
5690 * that are normally done from a user process). These routines do things like
5691 * open and close drivers, PLINK and PUNLINK streams to/from the multiplexor,
5692 * etc.
5693 *
5694 * The mechanism of communication between the STREAMS portion of the driver
5695 * and the background thread portion are two work queues, man_bwork_q
5696 * and man_iwork_q (background work q and streams work q).  Work
5697 * requests are placed on those queues when one half of the driver wants
5698 * the other half to do some work for it.
5699 *
5700 * The MAN background thread executes the man_bwork routine. Its sole
5701 * job is to process work requests placed on this work q. The MAN upper
5702 * write service routine is responsible for processing work requests posted
5703 * to the man_iwork_q->
5704 *
5705 * Both work queues are protected by the global mutex man_lock. The
5706 * man_bwork is signalged via the condvarman_bwork_q->q_cv. The man_uwsrv
5707 * routine is signaled by calling qenable (forcing man_uwsrv to run).
5708 */
5709
5710/*
5711 * man_bwork - Work thread for this device.  It is responsible for
5712 * performing operations which can't occur within the STREAMS framework.
5713 *
5714 * Locking:
5715 *	- Called holding no locks
5716 *	- Obtains the global mutex man_lock to remove work from
5717 *	  man_bwork_q, and post work to man_iwork_q->
5718 *	- Note that we do not want to hold any locks when making
5719 *	  any ldi_ calls.
5720 */
5721void
5722man_bwork()
5723{
5724	man_work_t	*wp;
5725	int		done = 0;
5726	callb_cpr_t	cprinfo;
5727	int		wp_finished;
5728
5729	CALLB_CPR_INIT(&cprinfo, &man_lock, callb_generic_cpr,
5730	    "mn_work_thrd");
5731
5732	MAN_DBG(MAN_CONFIG, ("man_bwork: enter"));
5733
5734	while (done == 0) {
5735
5736		mutex_enter(&man_lock);
5737		/*
5738		 * While there is nothing to do, sit in cv_wait.  If work
5739		 * request is made, requester will signal.
5740		 */
5741		while (man_bwork_q->q_work == NULL) {
5742
5743			CALLB_CPR_SAFE_BEGIN(&cprinfo);
5744
5745			cv_wait(&man_bwork_q->q_cv, &man_lock);
5746
5747			CALLB_CPR_SAFE_END(&cprinfo, &man_lock);
5748		}
5749
5750		wp = man_bwork_q->q_work;
5751		man_bwork_q->q_work = wp->mw_next;
5752		wp->mw_next = NULL;
5753		mutex_exit(&man_lock);
5754
5755		wp_finished = TRUE;
5756
5757		MAN_DBG(MAN_SWITCH, ("man_bwork: type %s",
5758		    _mw_type[wp->mw_type]));
5759
5760		switch (wp->mw_type) {
5761		case MAN_WORK_OPEN_CTL:
5762			wp->mw_status = man_open_ctl();
5763			break;
5764
5765		case MAN_WORK_CLOSE_CTL:
5766			man_close_ctl();
5767			break;
5768
5769		case MAN_WORK_CLOSE:
5770		case MAN_WORK_CLOSE_STREAM:
5771			man_bclose(&wp->mw_arg);
5772			break;
5773
5774		case MAN_WORK_SWITCH:
5775			man_bswitch(&wp->mw_arg, wp);
5776			wp_finished = FALSE;
5777			break;
5778
5779		case MAN_WORK_STOP:		/* man_bwork_stop() */
5780			done = 1;
5781			mutex_enter(&man_lock);
5782			CALLB_CPR_EXIT(&cprinfo); /* Unlocks man_lock */
5783			break;
5784
5785		default:
5786			cmn_err(CE_WARN, "man_bwork: "
5787			    "illegal work type(%d)", wp->mw_type);
5788			break;
5789		}
5790
5791		mutex_enter(&man_lock);
5792
5793		if (wp_finished) {
5794			wp->mw_flags |= MAN_WFLAGS_DONE;
5795			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
5796				cv_signal(&wp->mw_cv);
5797			else if (wp->mw_flags & MAN_WFLAGS_QWAITER)
5798				qenable(wp->mw_q);
5799			else
5800				man_work_free(wp);
5801		}
5802
5803		mutex_exit(&man_lock);
5804	}
5805
5806	MAN_DBG(MAN_CONFIG, ("man_bwork: thread_exit"));
5807
5808	mutex_enter(&man_lock);
5809	man_bwork_id = NULL;
5810	mutex_exit(&man_lock);
5811
5812	thread_exit();
5813}
5814
5815/*
5816 * man_open_ctl - Open the control stream.
5817 *
5818 *	returns	- success - 0
5819 *		- failure - errno code
5820 *
5821 * Mutex Locking Notes:
5822 *	We need a way to keep the CLONE_OPEN qwaiters in man_open from
5823 *	checking the man_config variables after the ldi_open call below
5824 *	returns from man_open, leaving the inner perimeter. So, we use the
5825 *	man_lock to synchronize the threads in man_open_ctl and man_open.  We
5826 *	hold man_lock across this call into man_open, which in general is a
5827 *	no-no. But, the STREAMs portion of the driver (other than open)
5828 *	doesn't use it. So, if ldi_open gets hijacked to run any part of
5829 *	the MAN streams driver, it wont end up recursively trying to acquire
5830 *	man_lock. Note that the non-CLONE_OPEN portion of man_open doesnt
5831 *	acquire it either, so again no recursive mutex.
5832 */
5833static int
5834man_open_ctl()
5835{
5836	int		status = 0;
5837	ldi_handle_t	ctl_lh = NULL;
5838	ldi_ident_t	li = NULL;
5839
5840	MAN_DBG(MAN_CONFIG, ("man_open_ctl: plumbing control stream\n"));
5841
5842	/*
5843	 * Get eri driver loaded and kstats initialized. Is there a better
5844	 * way to do this? - TBD.
5845	 */
5846	status = ldi_ident_from_mod(&modlinkage, &li);
5847	if (status) {
5848		cmn_err(CE_WARN,
5849		    "man_open_ctl: ident alloc failed, error %d", status);
5850		goto exit;
5851	}
5852
5853	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
5854	    kcred, &ctl_lh, li);
5855	if (status) {
5856		cmn_err(CE_WARN,
5857		    "man_open_ctl: eri open failed, error %d", status);
5858		ctl_lh = NULL;
5859		goto exit;
5860	}
5861	(void) ldi_close(ctl_lh, NULL, kcred);
5862	ctl_lh = NULL;
5863
5864	mutex_enter(&man_lock);
5865
5866	if (man_ctl_lh != NULL) {
5867		mutex_exit(&man_lock);
5868		goto exit;
5869	}
5870
5871	ASSERT(man_ctl_wq == NULL);
5872	mutex_exit(&man_lock);
5873
5874	status = ldi_open_by_name(DMAN_INT_PATH, FREAD | FWRITE | FNOCTTY,
5875	    kcred, &ctl_lh, li);
5876	if (status) {
5877		cmn_err(CE_WARN,
5878		    "man_open_ctl: man control dev open failed, "
5879		    "error %d", status);
5880		goto exit;
5881	}
5882
5883	/*
5884	 * Update global config state. TBD - dont need lock here, since
5885	 * everyone is stuck in open until we finish. Only other modifier
5886	 * is man_deconfigure via _fini, which returns EBUSY if there is
5887	 * any open streams (other than control). Do need to signal qwaiters
5888	 * on error.
5889	 */
5890	mutex_enter(&man_lock);
5891	ASSERT(man_config_state == MAN_CONFIGURING);
5892	ASSERT(man_ctl_lh == NULL);
5893	man_ctl_lh = ctl_lh;
5894	mutex_exit(&man_lock);
5895
5896exit:
5897	if (li)
5898		ldi_ident_release(li);
5899
5900	MAN_DBG(MAN_CONFIG, ("man_open_ctl: man_ctl_lh(0x%p) errno = %d\n",
5901	    (void *)man_ctl_lh, status));
5902
5903	return (status);
5904}
5905
5906/*
5907 * man_close_ctl - Close control stream, we are about to unload driver.
5908 *
5909 * Locking:
5910 *	- Called holding no locks.
5911 */
5912static void
5913man_close_ctl()
5914{
5915	ldi_handle_t tlh;
5916
5917	MAN_DBG(MAN_CONFIG, ("man_close_ctl: unplumbing control stream\n"));
5918
5919	mutex_enter(&man_lock);
5920	if ((tlh = man_ctl_lh) != NULL)
5921		man_ctl_lh = NULL;
5922	mutex_exit(&man_lock);
5923
5924	if (tlh != NULL) {
5925		(void) ldi_close(tlh, NULL, kcred);
5926	}
5927
5928}
5929
5930/*
5931 * Close the lower streams. Get all the timers canceled, close the lower
5932 * stream and delete the dest array.
5933 *
5934 * Returns:
5935 *	0	Closed all streams.
5936 *	1	Couldn't close one or more streams, timers still running.
5937 *
5938 * Locking:
5939 *	- Called holding no locks.
5940 */
5941static void
5942man_bclose(man_adest_t *adp)
5943{
5944	int		i;
5945	man_dest_t	*mdp;
5946
5947	man_cancel_timers(adp);
5948
5949	for (i = 0; i < adp->a_ndests; i++) {
5950		mdp = &adp->a_mdp[i];
5951
5952		if (mdp->md_muxid != -1)
5953			man_unplumb(mdp);
5954	}
5955
5956	mutex_destroy(&mdp->md_lock);
5957	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
5958	adp->a_mdp = NULL;
5959}
5960
5961/*
5962 * We want to close down all lower streams. Need to wait until all
5963 * timers and work related to these lower streams is quiesced.
5964 *
5965 * Returns 1 if lower streams are quiesced, 0 if we need to wait
5966 * a bit longer.
5967 */
5968static void
5969man_cancel_timers(man_adest_t *adp)
5970{
5971	man_dest_t	*mdp;
5972	int		cnt;
5973	int		i;
5974
5975	mdp = adp->a_mdp;
5976	cnt = adp->a_ndests;
5977
5978	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: mdp(0x%p) cnt %d",
5979	    (void *)mdp, cnt));
5980
5981	for (i = 0; i < cnt; i++) {
5982
5983		if (mdp[i].md_lc_timer_id != 0) {
5984			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
5985			mdp[i].md_lc_timer_id = 0;
5986		}
5987
5988		if (mdp[i].md_bc_id != 0) {
5989			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
5990			mdp[i].md_bc_id = 0;
5991		}
5992	}
5993
5994	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: returns"));
5995}
5996
5997/*
5998 * A failover is started at start of day, when the driver detects a
5999 * link failure (see man_linkcheck_timer), or when DR detaches
6000 * the IO board containing the current active link between SC and
6001 * domain (see man_dr_detach, man_iwork, and man_do_dr_detach). A
6002 * MAN_WORK_SWITCH work request containing all the lower streams that
6003 * should be switched is posted on the man_bwork_q-> This work request is
6004 * processed here. Once all lower streams have been switched to an
6005 * alternate path, the MAN_WORK_SWITCH work request is passed back to
6006 * man_iwork_q where it is processed within the inner perimeter of the
6007 * STREAMS framework (see man_iswitch).
6008 *
6009 * Note that when the switch fails for whatever reason, we just hand
6010 * back the lower streams untouched and let another failover happen.
6011 * Hopefully we will sooner or later succeed at the failover.
6012 */
6013static void
6014man_bswitch(man_adest_t *adp, man_work_t *wp)
6015{
6016	man_dest_t	*tdp;
6017	man_t		*manp;
6018	int		i;
6019	int		status = 0;
6020
6021	/*
6022	 * Make a temporary copy of dest array, updating device to the
6023	 * alternate and try to open all lower streams. bgthread can sleep.
6024	 */
6025
6026	tdp = man_kzalloc(sizeof (man_dest_t) * adp->a_ndests,
6027	    KM_SLEEP);
6028	bcopy(adp->a_mdp, tdp, sizeof (man_dest_t) * adp->a_ndests);
6029
6030	/*
6031	 * Before we switch to the new path, lets sync the kstats.
6032	 */
6033	mutex_enter(&man_lock);
6034
6035	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
6036	if (manp != NULL) {
6037		man_update_path_kstats(manp);
6038	} else
6039		status = ENODEV;
6040
6041	mutex_exit(&man_lock);
6042
6043	if (status != 0)
6044		goto exit;
6045
6046	for (i = 0; i < adp->a_ndests; i++) {
6047
6048		tdp[i].md_device = adp->a_st_dev;
6049		tdp[i].md_muxid = -1;
6050
6051		if (man_plumb(&tdp[i]))
6052			break;
6053	}
6054
6055	/*
6056	 * Didn't plumb everyone, unplumb new lower stuff and return.
6057	 */
6058	if (i < adp->a_ndests) {
6059		int	j;
6060
6061		for (j = 0; j <= i; j++)
6062			man_unplumb(&tdp[j]);
6063		status = EAGAIN;
6064		goto exit;
6065	}
6066
6067	if (man_is_on_domain && man_dossc_switch(adp->a_st_dev.mdev_exp_id)) {
6068		/*
6069		 * If we cant set new path on the SSC, then fail the
6070		 * failover.
6071		 */
6072		for (i = 0; i < adp->a_ndests; i++)
6073			man_unplumb(&tdp[i]);
6074		status = EAGAIN;
6075		goto exit;
6076	}
6077
6078	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
6079	adp->a_mdp = tdp;
6080
6081exit:
6082	if (status)
6083		man_kfree(tdp, sizeof (man_dest_t) * adp->a_ndests);
6084
6085
6086	MAN_DBG(MAN_SWITCH, ("man_bswitch: returns %d", status));
6087
6088	/*
6089	 * Hand processed switch request back to man_iwork for
6090	 * processing in man_iswitch.
6091	 */
6092	wp->mw_status = status;
6093
6094	mutex_enter(&man_lock);
6095	man_work_add(man_iwork_q, wp);
6096	mutex_exit(&man_lock);
6097
6098}
6099
6100/*
6101 * man_plumb - Configure a lower stream for this destination.
6102 *
6103 * Locking:
6104 * 	- Called holding no locks.
6105 *
6106 * Returns:
6107 *	- success - 0
6108 *	- failure - error code of failure
6109 */
6110static int
6111man_plumb(man_dest_t *mdp)
6112{
6113	int		status;
6114	int		muxid;
6115	ldi_handle_t	lh;
6116	ldi_ident_t	li = NULL;
6117
6118	MAN_DBG(MAN_SWITCH, ("man_plumb: mdp(0x%p) %s%d exp(%d)",
6119	    (void *)mdp, ddi_major_to_name(mdp->md_device.mdev_major),
6120	    mdp->md_device.mdev_ppa, mdp->md_device.mdev_exp_id));
6121
6122	/*
6123	 * Control stream should already be open.
6124	 */
6125	if (man_ctl_lh == NULL) {
6126		status = EAGAIN;
6127		goto exit;
6128	}
6129
6130	mutex_enter(&man_lock);
6131	ASSERT(man_ctl_wq != NULL);
6132	status = ldi_ident_from_stream(man_ctl_wq, &li);
6133	if (status != 0) {
6134		cmn_err(CE_WARN,
6135		    "man_plumb: ident alloc failed, error %d", status);
6136		goto exit;
6137	}
6138	mutex_exit(&man_lock);
6139
6140	/*
6141	 * previously opens were done by a dev_t of makedev(clone_major,
6142	 * mdev_major) which should always map to /devices/pseudo/clone@0:eri
6143	 */
6144	ASSERT(strcmp(ERI_IDNAME,
6145	    ddi_major_to_name(mdp->md_device.mdev_major)) == 0);
6146
6147	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
6148	    kcred, &lh, li);
6149	if (status) {
6150		cmn_err(CE_WARN,
6151		    "man_plumb: eri open failed, error %d", status);
6152		goto exit;
6153	}
6154
6155	/*
6156	 * Link netdev under MAN.
6157	 */
6158	ASSERT(mdp->md_muxid == -1);
6159
6160	status = ldi_ioctl(man_ctl_lh, I_PLINK, (intptr_t)lh,
6161	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
6162	if (status) {
6163		cmn_err(CE_WARN,
6164		    "man_plumb: ldi_ioctl(I_PLINK) failed, error %d", status);
6165		(void) ldi_close(lh, NULL, kcred);
6166		goto exit;
6167
6168	}
6169	mdp->md_muxid = muxid;
6170	mdp->md_wq = man_linkrec_find(muxid);
6171	/*
6172	 * If we can't find the linkrec then return an
6173	 * error. It will be automatically unplumbed on failure.
6174	 */
6175	if (mdp->md_wq == NULL)
6176		status = EAGAIN;
6177
6178	(void) ldi_close(lh, NULL, kcred);
6179exit:
6180	if (li)
6181		ldi_ident_release(li);
6182
6183	MAN_DBG(MAN_SWITCH, ("man_plumb: exit\n"));
6184
6185	return (status);
6186}
6187
6188/*
6189 * man_unplumb - tear down the STREAMs framework for the lower multiplexor.
6190 *
6191 *	mdp - destination struct of interest
6192 *
6193 *	returns	- success - 0
6194 *		- failure - return error from ldi_ioctl
6195 */
6196static void
6197man_unplumb(man_dest_t *mdp)
6198{
6199	int	status, rval;
6200
6201	MAN_DBG(MAN_SWITCH, ("man_unplumb: mdp"));
6202	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
6203
6204	if (mdp->md_muxid == -1)
6205		return;
6206
6207	ASSERT(man_ctl_lh != NULL);
6208
6209	/*
6210	 * I_PUNLINK causes the multiplexor resources to be freed.
6211	 */
6212	status = ldi_ioctl(man_ctl_lh, I_PUNLINK, (intptr_t)mdp->md_muxid,
6213	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &rval);
6214	if (status) {
6215		cmn_err(CE_WARN, "man_unplumb: ldi_ioctl(I_PUNLINK) failed"
6216		    " errno %d\n", status);
6217	}
6218	/*
6219	 * Delete linkrec if it exists.
6220	 */
6221	(void) man_linkrec_find(mdp->md_muxid);
6222	mdp->md_muxid = -1;
6223
6224}
6225
6226/*
6227 * The routines below deal with paths and pathgroups. These data structures
6228 * are used to track the physical devices connecting the domain and SSC.
6229 * These devices make up the lower streams of the MAN multiplexor. The
6230 * routines all expect the man_lock to be held.
6231 *
6232 * A pathgroup consists of all paths that connect a particular domain and the
6233 * SSC. The concept of a pathgroup id (pg_id) is used to uniquely identify
6234 * a pathgroup.  For Domains, there is just one pathgroup, that connecting
6235 * the domain to the SSC (pg_id == 0). On the SSC, there is one pathgroup per
6236 * domain. The pg_id field corresponds to the domain tags A-R. A pg_id of
6237 * 0 means domain tag A, a pg_id of 1 means domain B, etc.
6238 *
6239 * The path data structure identifies one path between the SSC and a domain.
6240 * It describes the information for the path: the major and minor number of
6241 * the physical device; kstat pointers; and ethernet address of the
6242 * other end of the path.
6243 *
6244 * The pathgroups are anchored at man_pg_head and are protected by the
6245 * by the inner perimeter. The routines are only called by the STREAMs
6246 * portion of the driver.
6247 */
6248
6249/*
6250 * Update man instance pathgroup info. Exclusive inner perimeter assures
6251 * this code is single threaded. man_refcnt assures man_t wont detach
6252 * while we are playing with man_pg stuff.
6253 *
6254 * Returns 0 on success, errno on failure.
6255 */
6256int
6257man_pg_cmd(mi_path_t *mip, man_work_t *waiter_wp)
6258{
6259	int		status = 0;
6260	man_t		*manp;
6261
6262	if (mip->mip_ndevs < 0) {
6263		status = EINVAL;
6264		cmn_err(CE_WARN, "man_pg_cmd: EINVAL: mip_ndevs %d",
6265		    mip->mip_ndevs);
6266		goto exit;
6267	}
6268
6269	ASSERT(MUTEX_HELD(&man_lock));
6270	manp = ddi_get_soft_state(man_softstate, mip->mip_man_ppa);
6271	if (manp == NULL) {
6272		status = ENODEV;
6273		goto exit;
6274	}
6275
6276	MAN_DBG(MAN_PATH, ("man_pg_cmd: mip"));
6277	MAN_DBGCALL(MAN_PATH, man_print_mip(mip));
6278
6279	MAN_DBG(MAN_PATH, ("\tman_t"));
6280	MAN_DBGCALL(MAN_PATH, man_print_man(manp));
6281
6282	switch (mip->mip_cmd) {
6283	case MI_PATH_ASSIGN:
6284		status = man_pg_assign(&manp->man_pg, mip, FALSE);
6285		break;
6286
6287	case MI_PATH_ADD:
6288		status = man_pg_assign(&manp->man_pg, mip, TRUE);
6289		break;
6290
6291	case MI_PATH_UNASSIGN:
6292		status = man_pg_unassign(&manp->man_pg, mip);
6293		break;
6294
6295	case MI_PATH_ACTIVATE:
6296		status = man_pg_activate(manp, mip, waiter_wp);
6297		break;
6298
6299	case MI_PATH_READ:
6300		status = man_pg_read(manp->man_pg, mip);
6301		break;
6302
6303	default:
6304		status = EINVAL;
6305		cmn_err(CE_NOTE, "man_pg_cmd: invalid command");
6306		break;
6307	}
6308
6309exit:
6310	MAN_DBG(MAN_PATH, ("man_pg_cmd: returns %d", status));
6311
6312	return (status);
6313}
6314
6315/*
6316 * Assign paths to a pathgroup. If pathgroup doesnt exists, create it.
6317 * If path doesnt exist, create it. If ethernet address of existing
6318 * pathgroup different, change it. If an existing path is not in the new
6319 * list, remove it.  If anything changed, send PATH_UPDATE request to
6320 * man_iwork to update all man_dest_t's.
6321 *
6322 * 	mplpp	- man pathgroup list point to point.
6323 *	mip	- new/updated pathgroup info to assign.
6324 */
6325static int
6326man_pg_assign(man_pg_t **mplpp, mi_path_t *mip, int add_only)
6327{
6328	man_pg_t	*mpg;
6329	man_path_t	*mp;
6330	man_path_t	*add_paths = NULL;
6331	int		cnt;
6332	int		i;
6333	int		first_pass = TRUE;
6334	int		status = 0;
6335
6336	ASSERT(MUTEX_HELD(&man_lock));
6337
6338	cnt = mip->mip_ndevs;
6339	if (cnt == 0) {
6340		status = EINVAL;
6341		cmn_err(CE_NOTE, "man_pg_assign: mip_ndevs == 0");
6342		goto exit;
6343	}
6344
6345	/*
6346	 * Assure the devices to be assigned are not assigned to some other
6347	 * pathgroup.
6348	 */
6349	for (i = 0; i < cnt; i++) {
6350		mpg = man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL);
6351
6352		if (mpg == NULL)
6353			continue;
6354
6355		if ((mpg->mpg_man_ppa != mip->mip_man_ppa) ||
6356		    (mpg->mpg_pg_id != mip->mip_pg_id)) {
6357			/*
6358			 * Already assigned to some other man instance
6359			 * or pathgroup.
6360			 */
6361			status = EEXIST;
6362			goto exit;
6363		}
6364	}
6365
6366	/*
6367	 * Find pathgroup, or allocate new one if it doesnt exist and
6368	 * add it to list at mplpp. Result is that mpg points to
6369	 * pathgroup to modify.
6370	 */
6371	mpg = man_find_pg_by_id(*mplpp, mip->mip_pg_id);
6372	if (mpg == NULL) {
6373
6374		status = man_pg_create(mplpp, &mpg, mip);
6375		if (status)
6376			goto exit;
6377
6378	} else if (ether_cmp(&mip->mip_eaddr, &mpg->mpg_dst_eaddr) != 0) {
6379
6380		cmn_err(CE_WARN, "man_pg_assign: ethernet address mismatch");
6381		cmn_err(CE_CONT, "existing %s",
6382		    ether_sprintf(&mpg->mpg_dst_eaddr));
6383		cmn_err(CE_CONT, "new %s",
6384		    ether_sprintf(&mip->mip_eaddr));
6385
6386		status = EINVAL;
6387		goto exit;
6388	}
6389
6390	/*
6391	 * Create list of new paths to add to pathgroup.
6392	 */
6393	for (i = 0; i < cnt; i++) {
6394
6395		if (man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL))
6396			continue;	/* Already exists in this pathgroup */
6397
6398		mp = man_kzalloc(sizeof (man_path_t), KM_NOSLEEP);
6399		if (mp == NULL) {
6400			status = ENOMEM;
6401			goto exit;
6402		}
6403
6404		mp->mp_device = mip->mip_devs[i];
6405		mp->mp_device.mdev_state = MDEV_ASSIGNED;
6406
6407		MAN_DBG(MAN_PATH, ("man_pg_assign: assigning mdp"));
6408		MAN_DBGCALL(MAN_PATH, man_print_dev(&mp->mp_device));
6409
6410		status = man_path_kstat_init(mp);
6411		if (status) {
6412			man_kfree(mp, sizeof (man_path_t));
6413			goto exit;
6414		}
6415
6416		man_path_insert(&add_paths, mp);
6417	}
6418
6419	/*
6420	 * man_dr_attach passes only the path which is being DRd in.
6421	 * So just add the path and don't worry about removing paths.
6422	 */
6423	if (add_only == TRUE)
6424		goto exit;
6425
6426
6427	/*
6428	 * Check if any paths we want to remove are ACTIVE. If not,
6429	 * do a second pass and remove them.
6430	 */
6431again:
6432	mp = mpg->mpg_pathp;
6433	while (mp != NULL) {
6434		int		in_new_list;
6435		man_path_t	*rp;
6436
6437		rp = NULL;
6438		in_new_list = FALSE;
6439
6440		for (i = 0; i < cnt; i++) {
6441			if (mp->mp_device.mdev_ppa ==
6442			    mip->mip_devs[i].mdev_ppa) {
6443
6444				in_new_list = TRUE;
6445				break;
6446			}
6447		}
6448
6449		if (!in_new_list) {
6450			if (first_pass) {
6451				if (mp->mp_device.mdev_state & MDEV_ACTIVE) {
6452					status = EBUSY;
6453					goto exit;
6454				}
6455			} else {
6456				rp = mp;
6457			}
6458		}
6459		mp = mp->mp_next;
6460
6461		if (rp != NULL)
6462			man_path_remove(&mpg->mpg_pathp, rp);
6463	}
6464
6465	if (first_pass == TRUE) {
6466		first_pass = FALSE;
6467		goto again;
6468	}
6469
6470exit:
6471	if (status == 0) {
6472		if (add_paths)
6473			man_path_merge(&mpg->mpg_pathp, add_paths);
6474	} else {
6475		while (add_paths != NULL) {
6476			mp = add_paths;
6477			add_paths = mp->mp_next;
6478			mp->mp_next = NULL;
6479
6480			man_path_kstat_uninit(mp);
6481			man_kfree(mp, sizeof (man_path_t));
6482		}
6483	}
6484
6485	return (status);
6486}
6487
6488/*
6489 * Remove all paths from a pathgroup (domain shutdown). If there is an
6490 * active path in the group, shut down all destinations referencing it
6491 * first.
6492 */
6493static int
6494man_pg_unassign(man_pg_t **plpp, mi_path_t *mip)
6495{
6496	man_pg_t	*mpg;
6497	man_pg_t	*tpg;
6498	man_pg_t	*tppg;
6499	man_path_t	*mp = NULL;
6500	int		status = 0;
6501
6502	ASSERT(MUTEX_HELD(&man_lock));
6503
6504	/*
6505	 * Check for existence of pathgroup.
6506	 */
6507	if ((mpg = man_find_pg_by_id(*plpp, mip->mip_pg_id)) == NULL)
6508		goto exit;
6509
6510	if (man_find_active_path(mpg->mpg_pathp) != NULL) {
6511		status = man_remove_dests(mpg);
6512		if (status)
6513			goto exit;
6514	}
6515
6516	/*
6517	 * Free all the paths for this pathgroup.
6518	 */
6519	while (mpg->mpg_pathp) {
6520		mp = mpg->mpg_pathp;
6521		mpg->mpg_pathp = mp->mp_next;
6522		mp->mp_next = NULL;
6523
6524		man_path_kstat_uninit(mp);
6525		man_kfree(mp, sizeof (man_path_t));
6526	}
6527
6528	/*
6529	 * Remove this pathgroup from the list, and free it.
6530	 */
6531	tpg = tppg = *plpp;
6532	if (tpg == mpg) {
6533		*plpp = tpg->mpg_next;
6534		goto free_pg;
6535	}
6536
6537	for (tpg = tpg->mpg_next; tpg != NULL; tpg = tpg->mpg_next) {
6538		if (tpg == mpg)
6539			break;
6540		tppg = tpg;
6541	}
6542
6543	ASSERT(tpg != NULL);
6544
6545	tppg->mpg_next = tpg->mpg_next;
6546	tpg->mpg_next = NULL;
6547
6548free_pg:
6549	man_kfree(tpg, sizeof (man_pg_t));
6550
6551exit:
6552	return (status);
6553
6554}
6555
6556/*
6557 * Set a new active path. This is done via man_ioctl so we are
6558 * exclusive in the inner perimeter.
6559 */
6560static int
6561man_pg_activate(man_t *manp, mi_path_t *mip, man_work_t *waiter_wp)
6562{
6563	man_pg_t	*mpg1;
6564	man_pg_t	*mpg2;
6565	man_pg_t	*plp;
6566	man_path_t	*mp;
6567	man_path_t	*ap;
6568	int		status = 0;
6569
6570	ASSERT(MUTEX_HELD(&man_lock));
6571	MAN_DBG(MAN_PATH, ("man_pg_activate: dev"));
6572	MAN_DBGCALL(MAN_PATH, man_print_dev(mip->mip_devs));
6573
6574	if (mip->mip_ndevs != 1) {
6575		status = EINVAL;
6576		goto exit;
6577	}
6578
6579	plp = manp->man_pg;
6580	mpg1 = man_find_pg_by_id(plp, mip->mip_pg_id);
6581	if (mpg1 == NULL) {
6582		status = EINVAL;
6583		goto exit;
6584	}
6585
6586	mpg2 = man_find_path_by_dev(plp, mip->mip_devs, &mp);
6587	if (mpg2 == NULL) {
6588		status = ENODEV;
6589		goto exit;
6590	}
6591
6592	if (mpg1 != mpg2) {
6593		status = EINVAL;
6594		goto exit;
6595	}
6596
6597	ASSERT(mp->mp_device.mdev_ppa == mip->mip_devs->mdev_ppa);
6598
6599	if (mpg1->mpg_flags & MAN_PG_SWITCHING) {
6600		status = EAGAIN;
6601		goto exit;
6602	}
6603
6604	ap = man_find_active_path(mpg1->mpg_pathp);
6605	if (ap == NULL) {
6606		/*
6607		 * This is the first time a path has been activated for
6608		 * this pathgroup. Initialize all upper streams dest
6609		 * structure for this pathgroup so autoswitch will find
6610		 * them.
6611		 */
6612		mp->mp_device.mdev_state |= MDEV_ACTIVE;
6613		man_add_dests(mpg1);
6614		goto exit;
6615	}
6616
6617	/*
6618	 * Path already active, nothing to do.
6619	 */
6620	if (ap == mp)
6621		goto exit;
6622
6623	/*
6624	 * Try to autoswitch to requested device. Set flags and refcnt.
6625	 * Cleared in man_iswitch when SWITCH completes.
6626	 */
6627	manp->man_refcnt++;
6628	mpg1->mpg_flags |= MAN_PG_SWITCHING;
6629
6630	/*
6631	 * Switch to path specified.
6632	 */
6633	status = man_autoswitch(mpg1, mip->mip_devs, waiter_wp);
6634
6635	if (status != 0) {
6636		/*
6637		 * man_iswitch not going to run, clean up.
6638		 */
6639		manp->man_refcnt--;
6640		mpg1->mpg_flags &= ~MAN_PG_SWITCHING;
6641
6642		if (status == ENODEV) {
6643			/*
6644			 * Device not plumbed isn't really an error. Change
6645			 * active device setting here, since man_iswitch isn't
6646			 * going to be run to do it.
6647			 */
6648			status = 0;
6649			ap->mp_device.mdev_state &= ~MDEV_ACTIVE;
6650			mp->mp_device.mdev_state |= MDEV_ACTIVE;
6651		}
6652	}
6653
6654exit:
6655	MAN_DBG(MAN_PATH, ("man_pg_activate: returns %d", status));
6656
6657	return (status);
6658}
6659
6660static int
6661man_pg_read(man_pg_t *plp, mi_path_t *mip)
6662{
6663	man_pg_t	*mpg;
6664	man_path_t	*mp;
6665	int		cnt;
6666	int		status = 0;
6667
6668	ASSERT(MUTEX_HELD(&man_lock));
6669
6670	if ((mpg = man_find_pg_by_id(plp, mip->mip_pg_id)) == NULL) {
6671		status = ENODEV;
6672		goto exit;
6673	}
6674
6675	cnt = 0;
6676	for (mp = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6677		bcopy(&mp->mp_device, &mip->mip_devs[cnt], sizeof (man_dev_t));
6678		if (cnt == mip->mip_ndevs)
6679			break;
6680		cnt++;
6681	}
6682
6683	MAN_DBG(MAN_PATH, ("man_pg_read: pg(0x%p) id(%d) found %d paths",
6684	    (void *)mpg, mpg->mpg_pg_id, cnt));
6685
6686	mip->mip_ndevs = cnt;
6687
6688	/*
6689	 * TBD - What should errno be if user buffer too small ?
6690	 */
6691	if (mp != NULL) {
6692		status = ENOMEM;
6693	}
6694
6695exit:
6696
6697	return (status);
6698}
6699
6700/*
6701 * return existing pathgroup, or create it. TBD - Need to update
6702 * all of destinations if we added a pathgroup. Also, need to update
6703 * all of man_strup if we add a path.
6704 *
6705 * 	mplpp	- man pathgroup list point to pointer.
6706 * 	mpgp	- returns newly created man pathgroup.
6707 *	mip	- info to fill in mpgp.
6708 */
6709static int
6710man_pg_create(man_pg_t **mplpp, man_pg_t **mpgp, mi_path_t *mip)
6711{
6712	man_pg_t	*mpg;
6713	man_pg_t	*tpg;
6714	int		status = 0;
6715
6716	ASSERT(MUTEX_HELD(&man_lock));
6717
6718	if (ether_cmp(&mip->mip_eaddr, &zero_ether_addr) == 0) {
6719		cmn_err(CE_NOTE, "man_ioctl: man_pg_create: ether"
6720		    " addresss not set!");
6721		status = EINVAL;
6722		goto exit;
6723	}
6724
6725	mpg = man_kzalloc(sizeof (man_pg_t), KM_NOSLEEP);
6726	if (mpg == NULL) {
6727		status = ENOMEM;
6728		goto exit;
6729	}
6730
6731	mpg->mpg_flags = MAN_PG_IDLE;
6732	mpg->mpg_pg_id = mip->mip_pg_id;
6733	mpg->mpg_man_ppa = mip->mip_man_ppa;
6734	ether_copy(&mip->mip_eaddr, &mpg->mpg_dst_eaddr);
6735
6736	MAN_DBG(MAN_PATH, ("man_pg_create: new mpg"));
6737	MAN_DBGCALL(MAN_PATH, man_print_mpg(mpg));
6738
6739	tpg = *mplpp;
6740	if (tpg == NULL) {
6741		*mplpp = mpg;
6742	} else {
6743		while (tpg->mpg_next != NULL)
6744			tpg = tpg->mpg_next;
6745		tpg->mpg_next = mpg;
6746	}
6747
6748exit:
6749	*mpgp = mpg;
6750
6751	return (status);
6752}
6753
6754/*
6755 * Return pointer to pathgroup containing mdevp, null otherwise. Also,
6756 * if a path pointer is passed in, set it to matching path in pathgroup.
6757 *
6758 * Called holding man_lock.
6759 */
6760static man_pg_t *
6761man_find_path_by_dev(man_pg_t *plp, man_dev_t *mdevp, man_path_t **mpp)
6762{
6763	man_pg_t	*mpg;
6764	man_path_t	*mp;
6765
6766	ASSERT(MUTEX_HELD(&man_lock));
6767	for (mpg = plp; mpg != NULL; mpg = mpg->mpg_next) {
6768		for (mp  = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6769			if (mp->mp_device.mdev_major == mdevp->mdev_major &&
6770			    mp->mp_device.mdev_ppa == mdevp->mdev_ppa) {
6771
6772				if (mpp != NULL)
6773					*mpp = mp;
6774				return (mpg);
6775			}
6776		}
6777	}
6778
6779	return (NULL);
6780}
6781
6782/*
6783 * Return pointer to pathgroup assigned to destination, null if not found.
6784 *
6785 * Called holding man_lock.
6786 */
6787static man_pg_t *
6788man_find_pg_by_id(man_pg_t *mpg, int pg_id)
6789{
6790	ASSERT(MUTEX_HELD(&man_lock));
6791	for (; mpg != NULL; mpg = mpg->mpg_next) {
6792		if (mpg->mpg_pg_id == pg_id)
6793			return (mpg);
6794	}
6795
6796	return (NULL);
6797}
6798
6799static man_path_t *
6800man_find_path_by_ppa(man_path_t *mplist, int ppa)
6801{
6802	man_path_t	*mp;
6803
6804	ASSERT(MUTEX_HELD(&man_lock));
6805	for (mp = mplist; mp != NULL; mp = mp->mp_next) {
6806		if (mp->mp_device.mdev_ppa == ppa)
6807			return (mp);
6808	}
6809
6810	return (NULL);
6811}
6812
6813static man_path_t *
6814man_find_active_path(man_path_t *mplist)
6815{
6816	man_path_t	*mp;
6817
6818	ASSERT(MUTEX_HELD(&man_lock));
6819	for (mp = mplist; mp != NULL; mp = mp->mp_next)
6820		if (mp->mp_device.mdev_state & MDEV_ACTIVE)
6821			return (mp);
6822
6823	return (NULL);
6824}
6825
6826/*
6827 * Try and find an alternate path.
6828 */
6829static man_path_t *
6830man_find_alternate_path(man_path_t *mlp)
6831{
6832	man_path_t	*ap;		/* Active path */
6833	man_path_t	*np;		/* New alternate path */
6834	man_path_t	*fp = NULL;	/* LRU failed path */
6835
6836	ASSERT(MUTEX_HELD(&man_lock));
6837	ap = man_find_active_path(mlp);
6838
6839	/*
6840	 * Find a non-failed path, or the lru failed path and switch to it.
6841	 */
6842	for (np = mlp; np != NULL; np = np->mp_next) {
6843		if (np == ap)
6844			continue;
6845
6846		if (np->mp_device.mdev_state == MDEV_ASSIGNED)
6847			goto exit;
6848
6849		if (np->mp_device.mdev_state & MDEV_FAILED) {
6850			if (fp == NULL)
6851				fp = np;
6852			else
6853				if (fp->mp_lru > np->mp_lru)
6854						fp = np;
6855		}
6856	}
6857
6858	/*
6859	 * Nowhere to switch to.
6860	 */
6861	if (np == NULL && (np =  fp) == NULL)
6862		goto exit;
6863
6864exit:
6865	return (np);
6866}
6867
6868/*
6869 * Assumes caller has verified existence.
6870 */
6871static void
6872man_path_remove(man_path_t **lpp, man_path_t *mp)
6873{
6874	man_path_t	*tp;
6875	man_path_t	*tpp;
6876
6877	ASSERT(MUTEX_HELD(&man_lock));
6878	MAN_DBG(MAN_PATH, ("man_path_remove: removing path"));
6879	MAN_DBGCALL(MAN_PATH, man_print_path(mp));
6880
6881	tp = tpp = *lpp;
6882	if (tp == mp) {
6883		*lpp = tp->mp_next;
6884		goto exit;
6885	}
6886
6887	for (tp = tp->mp_next; tp != NULL; tp = tp->mp_next) {
6888		if (tp == mp)
6889			break;
6890		tpp = tp;
6891	}
6892
6893	ASSERT(tp != NULL);
6894
6895	tpp->mp_next = tp->mp_next;
6896	tp->mp_next = NULL;
6897
6898exit:
6899	man_path_kstat_uninit(tp);
6900	man_kfree(tp, sizeof (man_path_t));
6901
6902}
6903
6904/*
6905 * Insert path into list, ascending order by ppa.
6906 */
6907static void
6908man_path_insert(man_path_t **lpp, man_path_t *mp)
6909{
6910	man_path_t	*tp;
6911	man_path_t	*tpp;
6912
6913	ASSERT(MUTEX_HELD(&man_lock));
6914	if (*lpp == NULL) {
6915		*lpp = mp;
6916		return;
6917	}
6918
6919	tp = tpp = *lpp;
6920	if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa) {
6921		mp->mp_next = tp;
6922		*lpp = mp;
6923		return;
6924	}
6925
6926	for (tp = tp->mp_next; tp != NULL; tp =  tp->mp_next) {
6927		if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa)
6928			break;
6929		tpp = tp;
6930	}
6931
6932	if (tp == NULL) {
6933		tpp->mp_next = mp;
6934	} else {
6935		tpp->mp_next = mp;
6936		mp->mp_next = tp;
6937	}
6938}
6939
6940/*
6941 * Merge npp into lpp, ascending order by ppa. Assumes no
6942 * duplicates in either list.
6943 */
6944static void
6945man_path_merge(man_path_t **lpp, man_path_t *np)
6946{
6947	man_path_t	*tmp;
6948
6949	ASSERT(MUTEX_HELD(&man_lock));
6950	while (np != NULL) {
6951		tmp = np;
6952		np = np->mp_next;
6953		tmp->mp_next = NULL;
6954
6955		man_path_insert(lpp, tmp);
6956	}
6957
6958}
6959
6960static int
6961man_path_kstat_init(man_path_t *mpp)
6962{
6963
6964	kstat_named_t	*dev_knp;
6965	int		status = 0;
6966
6967	ASSERT(MUTEX_HELD(&man_lock));
6968	MAN_DBG(MAN_PATH, ("man_path_kstat_init: mpp(0x%p)\n", (void *)mpp));
6969
6970	/*
6971	 * Create named kstats for accounting purposes.
6972	 */
6973	dev_knp = man_kzalloc(MAN_NUMSTATS * sizeof (kstat_named_t),
6974	    KM_NOSLEEP);
6975	if (dev_knp == NULL) {
6976		status = ENOMEM;
6977		goto exit;
6978	}
6979	man_kstat_named_init(dev_knp, MAN_NUMSTATS);
6980	mpp->mp_last_knp = dev_knp;
6981
6982exit:
6983
6984	MAN_DBG(MAN_PATH, ("man_path_kstat_init: returns %d\n", status));
6985
6986	return (status);
6987}
6988
6989static void
6990man_path_kstat_uninit(man_path_t *mp)
6991{
6992	ASSERT(MUTEX_HELD(&man_lock));
6993	man_kfree(mp->mp_last_knp, MAN_NUMSTATS * sizeof (kstat_named_t));
6994}
6995
6996/*
6997 * man_work_alloc - allocate and initiate a work request structure
6998 *
6999 *	type - type of request to allocate
7000 *	returns	- success - ptr to an initialized work structure
7001 *		- failure - NULL
7002 */
7003man_work_t *
7004man_work_alloc(int type, int kmflag)
7005{
7006	man_work_t	*wp;
7007
7008	wp = man_kzalloc(sizeof (man_work_t), kmflag);
7009	if (wp == NULL)
7010		goto exit;
7011
7012	cv_init(&wp->mw_cv, NULL, CV_DRIVER, NULL); \
7013	wp->mw_type = type;
7014
7015exit:
7016	return (wp);
7017}
7018
7019/*
7020 * man_work_free - deallocate a work request structure
7021 *
7022 *	wp - ptr to work structure to be freed
7023 */
7024void
7025man_work_free(man_work_t *wp)
7026{
7027	cv_destroy(&wp->mw_cv);
7028	man_kfree((void *)wp, sizeof (man_work_t));
7029}
7030
7031/*
7032 * Post work to a work queue.  The man_bwork sleeps on
7033 * man_bwork_q->q_cv, and work requesters may sleep on mw_cv.
7034 * The man_lock is used to protect both cv's.
7035 */
7036void
7037man_work_add(man_workq_t *q, man_work_t *wp)
7038{
7039	man_work_t	*lp = q->q_work;
7040
7041	if (lp) {
7042		while (lp->mw_next != NULL)
7043			lp = lp->mw_next;
7044
7045		lp->mw_next = wp;
7046
7047	} else {
7048		q->q_work = wp;
7049	}
7050
7051	/*
7052	 * cv_signal for man_bwork_q, qenable for man_iwork_q
7053	 */
7054	if (q == man_bwork_q) {
7055		cv_signal(&q->q_cv);
7056
7057	} else {	/* q == man_iwork_q */
7058
7059		if (man_ctl_wq != NULL)
7060			qenable(man_ctl_wq);
7061	}
7062
7063}
7064
7065/* <<<<<<<<<<<<<<<<<<<<<<< NDD SUPPORT FUNCTIONS	>>>>>>>>>>>>>>>>>>> */
7066/*
7067 * ndd support functions to get/set parameters
7068 */
7069
7070/*
7071 * Register each element of the parameter array with the
7072 * named dispatch handler. Each element is loaded using
7073 * nd_load()
7074 *
7075 * 	cnt	- the number of elements present in the parameter array
7076 */
7077static int
7078man_param_register(param_t *manpa, int cnt)
7079{
7080	int	i;
7081	ndgetf_t getp;
7082	ndsetf_t setp;
7083	int	status = B_TRUE;
7084
7085	MAN_DBG(MAN_CONFIG, ("man_param_register: manpa(0x%p) cnt %d\n",
7086	    (void *)manpa, cnt));
7087
7088	getp = man_param_get;
7089
7090	for (i = 0; i < cnt; i++, manpa++) {
7091		switch (man_param_display[i]) {
7092		case MAN_NDD_GETABLE:
7093			setp = NULL;
7094			break;
7095
7096		case MAN_NDD_SETABLE:
7097			setp = man_param_set;
7098			break;
7099
7100		default:
7101			continue;
7102		}
7103
7104		if (!nd_load(&man_ndlist, manpa->param_name, getp,
7105		    setp, (caddr_t)manpa)) {
7106
7107			(void) man_nd_free(&man_ndlist);
7108			status = B_FALSE;
7109			goto exit;
7110		}
7111	}
7112
7113	if (!nd_load(&man_ndlist, "man_pathgroups_report",
7114	    man_pathgroups_report, NULL, NULL)) {
7115
7116		(void) man_nd_free(&man_ndlist);
7117		status = B_FALSE;
7118		goto exit;
7119	}
7120
7121	if (!nd_load(&man_ndlist, "man_set_active_path",
7122	    NULL, man_set_active_path, NULL)) {
7123
7124		(void) man_nd_free(&man_ndlist);
7125		status = B_FALSE;
7126		goto exit;
7127	}
7128
7129	if (!nd_load(&man_ndlist, "man_get_hostinfo",
7130	    man_get_hostinfo, NULL, NULL)) {
7131
7132		(void) man_nd_free(&man_ndlist);
7133		status = B_FALSE;
7134		goto exit;
7135	}
7136
7137exit:
7138
7139	MAN_DBG(MAN_CONFIG, ("man_param_register: returns %d\n", status));
7140
7141	return (status);
7142}
7143
7144static void
7145man_nd_getset(queue_t *wq, mblk_t *mp)
7146{
7147
7148	if (!nd_getset(wq, man_ndlist, mp))
7149		miocnak(wq, mp, 0, ENOENT);
7150	else
7151		qreply(wq, mp);
7152}
7153
7154/*ARGSUSED*/
7155static int
7156man_pathgroups_report(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7157{
7158
7159	man_t		*manp;
7160	man_pg_t	*mpg;
7161	int		i;
7162	char		pad[] = "                 "; /* 17 spaces */
7163	int		pad_end;
7164
7165
7166	MAN_DBG(MAN_PATH, ("man_pathgroups_report: wq(0x%p) mp(0x%p)"
7167	    " caddr 0x%p", (void *)wq, (void *)mp, (void *)cp));
7168
7169	(void) mi_mpprintf(mp, "MAN Pathgroup report: (* == failed)");
7170	(void) mi_mpprintf(mp, "====================================="
7171	    "==========================================");
7172
7173	mutex_enter(&man_lock);
7174
7175	for (i = 0; i < 2; i++) {
7176		manp = ddi_get_soft_state(man_softstate, i);
7177		if (manp == NULL)
7178			continue;
7179
7180	(void) mi_mpprintf(mp,
7181	    "Interface\tDestination\t\tActive Path\tAlternate Paths");
7182	(void) mi_mpprintf(mp, "---------------------------------------"
7183	    "----------------------------------------");
7184
7185		for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7186
7187			(void) mi_mpprintf(mp, "%s%d\t\t",
7188			    ddi_major_to_name(manp->man_meta_major),
7189			    manp->man_meta_ppa);
7190
7191			if (man_is_on_domain) {
7192				(void) mi_mpprintf_nr(mp, "Master SSC\t");
7193				man_preport(mpg->mpg_pathp, mp);
7194			} else {
7195				if (i == 0) {
7196					pad_end = 17 - strlen(ether_sprintf(
7197					    &mpg->mpg_dst_eaddr));
7198					if (pad_end < 0 || pad_end > 16)
7199					pad_end = 0;
7200					pad[pad_end] = '\0';
7201
7202					(void) mi_mpprintf_nr(mp, "%c %s%s",
7203					    mpg->mpg_pg_id + 'A',
7204					    ether_sprintf(&mpg->mpg_dst_eaddr),
7205					    pad);
7206
7207					pad[pad_end] = ' ';
7208				} else {
7209					(void) mi_mpprintf_nr(mp,
7210					    "Other SSC\t");
7211				}
7212				man_preport(mpg->mpg_pathp, mp);
7213			}
7214			(void) mi_mpprintf_nr(mp, "\n");
7215		}
7216	}
7217
7218	mutex_exit(&man_lock);
7219	MAN_DBG(MAN_PATH, ("man_pathgroups_report: returns"));
7220
7221	return (0);
7222}
7223
7224static void
7225man_preport(man_path_t *plist, mblk_t *mp)
7226{
7227	man_path_t	*ap;
7228
7229	ap = man_find_active_path(plist);
7230	/*
7231	 * Active path
7232	 */
7233	if (ap != NULL) {
7234		(void) mi_mpprintf_nr(mp, "\t%s%d\t\t",
7235		    ddi_major_to_name(ap->mp_device.mdev_major),
7236		    ap->mp_device.mdev_ppa);
7237	} else {
7238		(void) mi_mpprintf_nr(mp, "None \t");
7239	}
7240
7241	/*
7242	 * Alternate Paths.
7243	 */
7244	while (plist != NULL) {
7245		(void) mi_mpprintf_nr(mp, "%s%d exp %d",
7246		    ddi_major_to_name(plist->mp_device.mdev_major),
7247		    plist->mp_device.mdev_ppa,
7248		    plist->mp_device.mdev_exp_id);
7249		if (plist->mp_device.mdev_state & MDEV_FAILED)
7250			(void) mi_mpprintf_nr(mp, "*");
7251		plist = plist->mp_next;
7252		if (plist)
7253			(void) mi_mpprintf_nr(mp, ", ");
7254	}
7255}
7256
7257/*
7258 * NDD request to set active path. Calling context is man_ioctl, so we are
7259 * exclusive in the inner perimeter.
7260 *
7261 *	Syntax is "ndd -set /dev/dman <man ppa> <pg_id> <phys ppa>"
7262 */
7263/* ARGSUSED3 */
7264static int
7265man_set_active_path(queue_t *wq, mblk_t *mp, char *value, caddr_t cp,
7266    cred_t *cr)
7267{
7268	char		*end, *meta_ppap, *phys_ppap, *pg_idp;
7269	int		meta_ppa;
7270	int		phys_ppa;
7271	int		pg_id;
7272	man_t		*manp;
7273	man_pg_t	*mpg;
7274	man_path_t	*np;
7275	mi_path_t	mpath;
7276	int		status = 0;
7277
7278	MAN_DBG(MAN_PATH, ("man_set_active_path: wq(0x%p) mp(0x%p)"
7279	    " args %s", (void *)wq, (void *)mp, value));
7280
7281	meta_ppap = value;
7282
7283	if ((pg_idp = strchr(value, ' ')) == NULL) {
7284		status = EINVAL;
7285		goto exit;
7286	}
7287
7288	*pg_idp++ = '\0';
7289
7290	if ((phys_ppap = strchr(pg_idp, ' ')) == NULL) {
7291		status = EINVAL;
7292		goto exit;
7293	}
7294
7295	*phys_ppap++ = '\0';
7296
7297	meta_ppa = (int)mi_strtol(meta_ppap, &end, 10);
7298	pg_id = (int)mi_strtol(pg_idp, &end, 10);
7299	phys_ppa = (int)mi_strtol(phys_ppap, &end, 10);
7300
7301	mutex_enter(&man_lock);
7302	manp = ddi_get_soft_state(man_softstate, meta_ppa);
7303	if (manp == NULL || manp->man_pg == NULL) {
7304		status = EINVAL;
7305		mutex_exit(&man_lock);
7306		goto exit;
7307	}
7308
7309	mpg = man_find_pg_by_id(manp->man_pg, pg_id);
7310	if (mpg == NULL) {
7311		status = EINVAL;
7312		mutex_exit(&man_lock);
7313		goto exit;
7314	}
7315
7316	np = man_find_path_by_ppa(mpg->mpg_pathp, phys_ppa);
7317
7318	if (np == NULL) {
7319		status = EINVAL;
7320		mutex_exit(&man_lock);
7321		goto exit;
7322	}
7323
7324	mpath.mip_cmd = MI_PATH_ACTIVATE;
7325	mpath.mip_pg_id = pg_id;
7326	mpath.mip_man_ppa = meta_ppa;
7327	mpath.mip_devs[0] = np->mp_device;
7328	mpath.mip_ndevs = 1;
7329
7330	status = man_pg_cmd(&mpath, NULL);
7331	mutex_exit(&man_lock);
7332
7333exit:
7334
7335	MAN_DBG(MAN_PATH, ("man_set_active_path: returns %d", status));
7336
7337	return (status);
7338}
7339
7340/*
7341 * Dump out the contents of the IOSRAM handoff structure. Note that if
7342 * anything changes here, you must make sure that the sysinit script
7343 * stays in sync with this output.
7344 */
7345/* ARGSUSED */
7346static int
7347man_get_hostinfo(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7348{
7349	manc_t	manc;
7350	char	*ipaddr;
7351	char	ipv6addr[INET6_ADDRSTRLEN];
7352	int	i;
7353	int	status;
7354
7355	if (!man_is_on_domain)
7356		return (0);
7357
7358	if (status = man_get_iosram(&manc)) {
7359		return (status);
7360	}
7361
7362	(void) mi_mpprintf(mp, "manc_magic = 0x%x", manc.manc_magic);
7363	(void) mi_mpprintf(mp, "manc_version = 0%d", manc.manc_version);
7364	(void) mi_mpprintf(mp, "manc_csum = 0x%x", manc.manc_csum);
7365
7366	if (manc.manc_ip_type == AF_INET) {
7367		in_addr_t	netnum;
7368
7369		(void) mi_mpprintf(mp, "manc_ip_type = AF_INET");
7370
7371		ipaddr = man_inet_ntoa(manc.manc_dom_ipaddr);
7372		(void) mi_mpprintf(mp, "manc_dom_ipaddr = %s", ipaddr);
7373
7374		ipaddr = man_inet_ntoa(manc.manc_dom_ip_netmask);
7375		(void) mi_mpprintf(mp, "manc_dom_ip_netmask = %s", ipaddr);
7376
7377		netnum = manc.manc_dom_ipaddr & manc.manc_dom_ip_netmask;
7378		ipaddr = man_inet_ntoa(netnum);
7379		(void) mi_mpprintf(mp, "manc_dom_ip_netnum = %s", ipaddr);
7380
7381		ipaddr = man_inet_ntoa(manc.manc_sc_ipaddr);
7382		(void) mi_mpprintf(mp, "manc_sc_ipaddr = %s", ipaddr);
7383
7384	} else if (manc.manc_ip_type == AF_INET6) {
7385
7386		(void) mi_mpprintf(mp, "manc_ip_type = AF_INET6");
7387
7388		(void) inet_ntop(AF_INET6, (void *)&manc.manc_dom_ipv6addr,
7389		    ipv6addr, INET6_ADDRSTRLEN);
7390		(void) mi_mpprintf(mp, "manc_dom_ipv6addr = %s", ipv6addr);
7391
7392		(void) mi_mpprintf(mp, "manc_dom_ipv6_netmask = %d",
7393		    manc.manc_dom_ipv6_netmask.s6_addr[0]);
7394
7395		(void) inet_ntop(AF_INET6, (void *)&manc.manc_sc_ipv6addr,
7396		    ipv6addr, INET6_ADDRSTRLEN);
7397		(void) mi_mpprintf(mp, "manc_sc_ipv6addr = %s", ipv6addr);
7398
7399	} else {
7400
7401		(void) mi_mpprintf(mp, "manc_ip_type = NONE");
7402	}
7403
7404	(void) mi_mpprintf(mp, "manc_dom_eaddr = %s",
7405	    ether_sprintf(&manc.manc_dom_eaddr));
7406	(void) mi_mpprintf(mp, "manc_sc_eaddr = %s",
7407	    ether_sprintf(&manc.manc_sc_eaddr));
7408
7409	(void) mi_mpprintf(mp, "manc_iob_bitmap = 0x%x\tio boards = ",
7410	    manc.manc_iob_bitmap);
7411	for (i = 0; i < MAN_MAX_EXPANDERS; i++) {
7412		if ((manc.manc_iob_bitmap >> i) & 0x1) {
7413			(void) mi_mpprintf_nr(mp, "%d.1, ", i);
7414		}
7415	}
7416	(void) mi_mpprintf(mp, "manc_golden_iob = %d", manc.manc_golden_iob);
7417
7418	return (0);
7419}
7420
7421static char *
7422man_inet_ntoa(in_addr_t in)
7423{
7424	static char b[18];
7425	unsigned char *p;
7426
7427	p = (unsigned char *)&in;
7428	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
7429	return (b);
7430}
7431
7432/*
7433 * parameter value. cp points to the required parameter.
7434 */
7435/* ARGSUSED */
7436static int
7437man_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
7438{
7439	param_t	*manpa = (param_t *)cp;
7440
7441	(void) mi_mpprintf(mp, "%u", manpa->param_val);
7442	return (0);
7443}
7444
7445/*
7446 * Sets the man parameter to the value in the param_register using
7447 * nd_load().
7448 */
7449/* ARGSUSED */
7450static int
7451man_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
7452{
7453	char *end;
7454	size_t new_value;
7455	param_t	*manpa = (param_t *)cp;
7456
7457	new_value = mi_strtol(value, &end, 10);
7458
7459	if (end == value || new_value < manpa->param_min ||
7460	    new_value > manpa->param_max) {
7461			return (EINVAL);
7462	}
7463
7464	manpa->param_val = new_value;
7465
7466	return (0);
7467
7468}
7469
7470/*
7471 * Free the Named Dispatch Table by calling man_nd_free
7472 */
7473static void
7474man_param_cleanup()
7475{
7476	if (man_ndlist != NULL)
7477		nd_free(&man_ndlist);
7478}
7479
7480/*
7481 * Free the table pointed to by 'ndp'
7482 */
7483static void
7484man_nd_free(caddr_t *nd_pparam)
7485{
7486	ND	*nd;
7487
7488	if ((nd = (ND *)(*nd_pparam)) != NULL) {
7489		if (nd->nd_tbl)
7490			mi_free((char *)nd->nd_tbl);
7491		mi_free((char *)nd);
7492		*nd_pparam = NULL;
7493	}
7494}
7495
7496
7497/*
7498 * man_kstat_update - update the statistics for a meta-interface.
7499 *
7500 *	ksp - kstats struct
7501 *	rw - flag indicating whether stats are to be read or written.
7502 *
7503 *	returns	0
7504 *
7505 * The destination specific kstat information is protected by the
7506 * perimeter lock, so we submit a work request to get the stats
7507 * updated (see man_do_kstats()), and then collect the results
7508 * when cv_signal'd. Note that we are doing cv_timedwait_sig()
7509 * as a precautionary measure only.
7510 */
7511static int
7512man_kstat_update(kstat_t *ksp, int rw)
7513{
7514	man_t			*manp;		/* per instance data */
7515	man_work_t		*wp;
7516	int			status = 0;
7517	kstat_named_t		*knp;
7518	kstat_named_t		*man_knp;
7519	int			i;
7520
7521	MAN_DBG(MAN_KSTAT, ("man_kstat_update: %s\n", rw ? "KSTAT_WRITE" :
7522	    "KSTAT_READ"));
7523
7524	mutex_enter(&man_lock);
7525	manp = (man_t *)ksp->ks_private;
7526	manp->man_refcnt++;
7527
7528	/*
7529	 * If the driver has been configured, get kstats updated by inner
7530	 * perimeter prior to retrieving.
7531	 */
7532	if (man_config_state == MAN_CONFIGURED) {
7533		clock_t wait_status;
7534
7535		man_update_path_kstats(manp);
7536		wp = man_work_alloc(MAN_WORK_KSTAT_UPDATE, KM_SLEEP);
7537		wp->mw_arg.a_man_ppa = manp->man_meta_ppa;
7538		wp->mw_flags = MAN_WFLAGS_CVWAITER;
7539		man_work_add(man_iwork_q, wp);
7540
7541		wait_status = cv_reltimedwait_sig(&wp->mw_cv, &man_lock,
7542		    drv_usectohz(manp->man_kstat_waittime), TR_CLOCK_TICK);
7543
7544		if (wp->mw_flags & MAN_WFLAGS_DONE) {
7545			status = wp->mw_status;
7546			man_work_free(wp);
7547		} else {
7548			ASSERT(wait_status <= 0);
7549			wp->mw_flags &= ~MAN_WFLAGS_CVWAITER;
7550			if (wait_status == 0)
7551				status = EINTR;
7552			else {
7553				MAN_DBG(MAN_KSTAT, ("man_kstat_update: "
7554				    "timedout, returning stale stats."));
7555				status = 0;
7556			}
7557		}
7558		if (status)
7559			goto exit;
7560	}
7561
7562	knp = (kstat_named_t *)ksp->ks_data;
7563	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7564
7565	if (rw == KSTAT_READ) {
7566		for (i = 0; i < MAN_NUMSTATS; i++) {
7567			knp[i].value.ui64 = man_knp[i].value.ui64;
7568		}
7569	} else {
7570		for (i = 0; i < MAN_NUMSTATS; i++) {
7571			man_knp[i].value.ui64 = knp[i].value.ui64;
7572		}
7573	}
7574
7575exit:
7576	manp->man_refcnt--;
7577	mutex_exit(&man_lock);
7578
7579	MAN_DBG(MAN_KSTAT, ("man_kstat_update: returns %d", status));
7580
7581	return (status);
7582}
7583
7584/*
7585 * Sum destination kstats for all active paths for a given instance of the
7586 * MAN driver. Called with perimeter lock.
7587 */
7588static void
7589man_do_kstats(man_work_t *wp)
7590{
7591	man_t		*manp;
7592	man_pg_t	*mpg;
7593	man_path_t	*mp;
7594
7595	MAN_DBG(MAN_KSTAT, ("man_do_kstats:"));
7596
7597	mutex_enter(&man_lock);
7598	/*
7599	 * Sync mp_last_knp for each path associated with the MAN instance.
7600	 */
7601	manp = (man_t *)ddi_get_soft_state(man_softstate,
7602	    wp->mw_arg.a_man_ppa);
7603	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7604
7605		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7606
7607		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7608
7609			MAN_DBG(MAN_KSTAT, ("\tkstat: path"));
7610			MAN_DBGCALL(MAN_KSTAT, man_print_path(mp));
7611
7612			/*
7613			 * We just to update the destination statistics here.
7614			 */
7615			man_sum_dests_kstats(mp->mp_last_knp, mpg);
7616		}
7617	}
7618	mutex_exit(&man_lock);
7619	MAN_DBG(MAN_KSTAT, ("man_do_kstats: returns"));
7620}
7621
7622/*
7623 * Sum device kstats for all active paths for a given instance of the
7624 * MAN driver. Called with man_lock.
7625 */
7626static void
7627man_update_path_kstats(man_t *manp)
7628{
7629	kstat_named_t	*man_knp;
7630	man_pg_t	*mpg;
7631	man_path_t	*mp;
7632
7633	ASSERT(MUTEX_HELD(&man_lock));
7634	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats:"));
7635
7636	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7637
7638	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7639
7640		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7641
7642		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7643
7644			man_update_dev_kstats(man_knp, mp);
7645
7646		}
7647	}
7648	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats: returns"));
7649}
7650
7651/*
7652 * Update the device kstats.
7653 * As man_kstat_update() is called with kstat_chain_lock held,
7654 * we can safely update the statistics from the underlying driver here.
7655 */
7656static void
7657man_update_dev_kstats(kstat_named_t *man_knp, man_path_t *mp)
7658{
7659	kstat_t		*dev_ksp;
7660	major_t		major;
7661	int		instance;
7662	char		buf[KSTAT_STRLEN];
7663
7664
7665	major = mp->mp_device.mdev_major;
7666	instance = mp->mp_device.mdev_ppa;
7667	(void) sprintf(buf, "%s%d", ddi_major_to_name(major), instance);
7668
7669	dev_ksp = kstat_hold_byname(ddi_major_to_name(major), instance, buf,
7670	    ALL_ZONES);
7671	if (dev_ksp != NULL) {
7672
7673		KSTAT_ENTER(dev_ksp);
7674		KSTAT_UPDATE(dev_ksp, KSTAT_READ);
7675		man_sum_kstats(man_knp, dev_ksp, mp->mp_last_knp);
7676		KSTAT_EXIT(dev_ksp);
7677		kstat_rele(dev_ksp);
7678
7679	} else {
7680		MAN_DBG(MAN_KSTAT,
7681		    ("man_update_dev_kstats: no kstat data found for %s(%d,%d)",
7682		    buf, major, instance));
7683	}
7684}
7685
7686static void
7687man_sum_dests_kstats(kstat_named_t *knp, man_pg_t *mpg)
7688{
7689	int		i;
7690	int		flags;
7691	char		*statname;
7692	manstr_t	*msp;
7693	man_dest_t	*mdp;
7694	uint64_t	switches = 0;
7695	uint64_t	linkfails = 0;
7696	uint64_t	linkstales = 0;
7697	uint64_t	icmpv4probes = 0;
7698	uint64_t	icmpv6probes = 0;
7699
7700	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: mpg 0x%p", (void *)mpg));
7701
7702	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
7703
7704		if (!man_str_uses_pg(msp, mpg))
7705			continue;
7706
7707		mdp = &msp->ms_dests[mpg->mpg_pg_id];
7708
7709		switches += mdp->md_switches;
7710		linkfails += mdp->md_linkfails;
7711		linkstales += mdp->md_linkstales;
7712		icmpv4probes += mdp->md_icmpv4probes;
7713		icmpv6probes += mdp->md_icmpv6probes;
7714	}
7715
7716	for (i = 0; i < MAN_NUMSTATS; i++) {
7717
7718		statname = man_kstat_info[i].mk_name;
7719		flags = man_kstat_info[i].mk_flags;
7720
7721		if (!(flags & MK_NOT_PHYSICAL))
7722			continue;
7723
7724		if (strcmp(statname, "man_switches") == 0) {
7725			knp[i].value.ui64 = switches;
7726		} else if (strcmp(statname, "man_link_fails") == 0) {
7727			knp[i].value.ui64 = linkfails;
7728		} else if (strcmp(statname, "man_link_stales") == 0) {
7729			knp[i].value.ui64 = linkstales;
7730		} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7731			knp[i].value.ui64 = icmpv4probes;
7732		} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7733			knp[i].value.ui64 = icmpv6probes;
7734		}
7735	}
7736
7737	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: returns"));
7738}
7739
7740/*
7741 * Initialize MAN named kstats in the space provided.
7742 */
7743static void
7744man_kstat_named_init(kstat_named_t *knp, int num_stats)
7745{
7746	int	i;
7747
7748	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: knp(0x%p) num_stats = %d",
7749	    (void *)knp, num_stats));
7750
7751	for (i = 0; i < num_stats; i++) {
7752		kstat_named_init(&knp[i], man_kstat_info[i].mk_name,
7753		    man_kstat_info[i].mk_type);
7754	}
7755
7756	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: returns"));
7757
7758}
7759
7760/*
7761 * man_kstat_byname - get a kernel stat value from its structure
7762 *
7763 *	ksp - kstat_t structure to play with
7764 *	s   - string to match names with
7765 *	res - in/out result data pointer
7766 *
7767 *	returns	- success - 1 (found)
7768 *		- failure - 0 (not found)
7769 */
7770static int
7771man_kstat_byname(kstat_t *ksp, char *s, kstat_named_t *res)
7772{
7773	int		found = 0;
7774
7775	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: GETTING %s\n", s));
7776
7777	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
7778		kstat_named_t *knp;
7779
7780		for (knp = KSTAT_NAMED_PTR(ksp);
7781		    (caddr_t)knp < ((caddr_t)ksp->ks_data+ksp->ks_data_size);
7782		    knp++) {
7783
7784			if (strcmp(s, knp->name) == NULL) {
7785
7786				res->data_type = knp->data_type;
7787				res->value = knp->value;
7788				found++;
7789
7790				MAN_DBG(MAN_KSTAT2, ("\t%s: %d\n", knp->name,
7791				    (int)knp->value.ul));
7792			}
7793		}
7794	} else {
7795		MAN_DBG(MAN_KSTAT2, ("\tbad kstats type %d\n", ksp->ks_type));
7796	}
7797
7798	/*
7799	 * if getting a value but couldn't find the namestring, result = 0.
7800	 */
7801	if (!found) {
7802		/*
7803		 * a reasonable default
7804		 */
7805		res->data_type = KSTAT_DATA_ULONG;
7806		res->value.l = 0;
7807		MAN_DBG(MAN_KSTAT2, ("\tcouldn't find, using defaults\n"));
7808	}
7809
7810	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: returns\n"));
7811
7812	return (found);
7813}
7814
7815
7816/*
7817 *
7818 * Accumulate MAN driver kstats from the incremental values of the underlying
7819 * physical interfaces.
7820 *
7821 * Parameters:
7822 *	sum_knp		- The named kstat area to put cumulative value,
7823 *			  NULL if we just want to sync next two params.
7824 *	phys_ksp	- Physical interface kstat_t pointer. Contains
7825 *			  more current counts.
7826 * 	phys_last_knp	- counts from the last time we were called for this
7827 *			  physical interface. Note that the name kstats
7828 *			  pointed to are actually in MAN format, but they
7829 *			  hold the mirrored physical devices last read
7830 *			  kstats.
7831 * Basic algorithm is:
7832 *
7833 * 	for each named kstat variable {
7834 *	    sum_knp[i] += (phys_ksp->ksp_data[i] - phys_last_knp[i]);
7835 *	    phys_last_knp[i] = phys_ksp->ksp_data[i];
7836 *	}
7837 *
7838 */
7839static void
7840man_sum_kstats(kstat_named_t *sum_knp, kstat_t *phys_ksp,
7841	kstat_named_t *phys_last_knp)
7842{
7843	char		*physname;
7844	char		*physalias;
7845	char		*statname;
7846	kstat_named_t	phys_kn_entry;
7847	uint64_t	delta64;
7848	int		i;
7849
7850	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: sum_knp(0x%p) phys_ksp(0x%p)"
7851	    " phys_last_knp(0x%p)\n", (void *)sum_knp, (void *)phys_ksp,
7852	    (void *)phys_last_knp));
7853
7854	/*
7855	 * Now for each entry in man_kstat_info, sum the named kstat.
7856	 * Not that all MAN specific kstats will end up !found.
7857	 */
7858	for (i = 0; i < MAN_NUMSTATS; i++) {
7859		int	found = 0;
7860		int	flags = 0;
7861
7862		delta64 = 0;
7863
7864		statname = man_kstat_info[i].mk_name;
7865		physname = man_kstat_info[i].mk_physname;
7866		physalias = man_kstat_info[i].mk_physalias;
7867		flags = man_kstat_info[i].mk_flags;
7868
7869		/*
7870		 * Update MAN private kstats.
7871		 */
7872		if (flags & MK_NOT_PHYSICAL) {
7873
7874			kstat_named_t	*knp = phys_last_knp;
7875
7876			if (sum_knp == NULL)
7877				continue;
7878
7879			if (strcmp(statname, "man_switches") == 0) {
7880				sum_knp[i].value.ui64 = knp[i].value.ui64;
7881			} else if (strcmp(statname, "man_link_fails") == 0) {
7882				sum_knp[i].value.ui64 = knp[i].value.ui64;
7883			} else if (strcmp(statname, "man_link_stales") == 0) {
7884				sum_knp[i].value.ui64 = knp[i].value.ui64;
7885			} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7886				sum_knp[i].value.ui64 = knp[i].value.ui64;
7887			} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7888				sum_knp[i].value.ui64 = knp[i].value.ui64;
7889			}
7890
7891			continue;	/* phys_ksp doesnt have this stat */
7892		}
7893
7894		/*
7895		 * first try it by the "official" name
7896		 */
7897		if (phys_ksp) {
7898			if (man_kstat_byname(phys_ksp, physname,
7899			    &phys_kn_entry)) {
7900
7901				found = 1;
7902
7903			} else if ((physalias) && (man_kstat_byname(phys_ksp,
7904			    physalias, &phys_kn_entry))) {
7905
7906				found = 1;
7907			}
7908		}
7909
7910		if (!found) {
7911			/*
7912			 * clear up the "last" value, no change to the sum
7913			 */
7914			phys_last_knp[i].value.ui64 = 0;
7915			continue;
7916		}
7917
7918		/*
7919		 * at this point, we should have the good underlying
7920		 * kstat value stored in phys_kn_entry
7921		 */
7922		if (flags & MK_NOT_COUNTER) {
7923			/*
7924			 * it isn't a counter, so store the value and
7925			 * move on (e.g. ifspeed)
7926			 */
7927			phys_last_knp[i].value = phys_kn_entry.value;
7928			continue;
7929		}
7930
7931		switch (phys_kn_entry.data_type) {
7932		case KSTAT_DATA_UINT32:
7933
7934			/*
7935			 * this handles 32-bit wrapping
7936			 */
7937			if (phys_kn_entry.value.ui32 <
7938			    phys_last_knp[i].value.ui32) {
7939
7940				/*
7941				 * we've wrapped!
7942				 */
7943				delta64 += (UINT_MAX -
7944				    phys_last_knp[i].value.ui32);
7945				phys_last_knp[i].value.ui32 = 0;
7946			}
7947
7948			delta64 += phys_kn_entry.value.ui32 -
7949			    phys_last_knp[i].value.ui32;
7950			phys_last_knp[i].value.ui32 = phys_kn_entry.value.ui32;
7951			break;
7952
7953		default:
7954			/*
7955			 * must be a 64-bit value, we ignore 64-bit
7956			 * wraps, since they shouldn't ever happen
7957			 * within the life of a machine (if we assume
7958			 * machines don't stay up for more than a few
7959			 * hundred years without a reboot...)
7960			 */
7961			delta64 = phys_kn_entry.value.ui64 -
7962			    phys_last_knp[i].value.ui64;
7963			phys_last_knp[i].value.ui64 = phys_kn_entry.value.ui64;
7964		}
7965
7966		if (sum_knp != NULL) {
7967			/*
7968			 * now we need to save the value
7969			 */
7970			switch (sum_knp[i].data_type) {
7971			case KSTAT_DATA_UINT32:
7972				/* trunk down to 32 bits, possibly lossy */
7973				sum_knp[i].value.ui32 += (uint32_t)delta64;
7974				break;
7975
7976			default:
7977				sum_knp[i].value.ui64 += delta64;
7978				break;
7979			}
7980		}
7981	}
7982
7983	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: returns\n"));
7984}
7985
7986
7987#if defined(DEBUG)
7988
7989
7990static char *_ms_flags[] = {
7991	"NONE",
7992	"FAST", 	/* 0x1 */
7993	"RAW",		/* 0x2 */
7994	"ALLPHYS",	/* 0x4 */
7995	"ALLMULTI",	/* 0x8 */
7996	"ALLSAP",	/* 0x10 */
7997	"CKSUM",	/* 0x20 */
7998	"MULTI",	/* 0x40 */
7999	"SERLPBK",	/* 0x80 */
8000	"MACLPBK",	/* 0x100 */
8001	"CLOSING",	/* 0x200 */
8002	"CLOSE_DONE",	/* 0x400 */
8003	"CONTROL"	/* 0x800 */
8004};
8005
8006static void
8007man_print_msp(manstr_t *msp)
8008{
8009	char	buf[512];
8010	char	prbuf[512];
8011	uint_t	flags;
8012	int	i;
8013
8014	cmn_err(CE_CONT, "\tmsp(0x%p)\n", (void *)msp);
8015
8016	if (msp == NULL)
8017		return;
8018
8019	cmn_err(CE_CONT, "\t%s%d SAP(0x%x):\n",
8020	    ddi_major_to_name(msp->ms_meta_maj), msp->ms_meta_ppa,
8021	    msp->ms_sap);
8022
8023	buf[0] = '\0';
8024	prbuf[0] = '\0';
8025	flags = msp->ms_flags;
8026	for (i = 0; i < A_CNT(_ms_flags); i++) {
8027		if ((flags >> i) & 0x1) {
8028			(void) sprintf(buf, " %s |", _ms_flags[i+1]);
8029			(void) strcat(prbuf, buf);
8030		}
8031	}
8032	prbuf[strlen(prbuf) - 1] = '\0';
8033	cmn_err(CE_CONT, "\tms_flags: %s\n", prbuf);
8034
8035	cmn_err(CE_CONT, "\tms_dlpistate: %s\n", dss[msp->ms_dlpistate]);
8036
8037	cmn_err(CE_CONT, "\tms_dl_mp: 0x%p\n", (void *)msp->ms_dl_mp);
8038
8039	cmn_err(CE_CONT, "\tms_manp: 0x%p\n", (void *)msp->ms_manp);
8040
8041	cmn_err(CE_CONT, "\tms_dests: 0x%p\n", (void *)msp->ms_dests);
8042
8043}
8044
8045static char *_md_state[] = {
8046	"NOTPRESENT",		/* 0x0 */
8047	"INITIALIZING",		/* 0x1 */
8048	"READY",		/* 0x2 */
8049	"PLUMBING",		/* 0x4 */
8050	"CLOSING"		/* 0x8 */
8051};
8052
8053static void
8054man_print_mdp(man_dest_t *mdp)
8055{
8056	uint_t		state;
8057	int		i;
8058	char		buf[64];
8059	char		prbuf[512];
8060
8061	buf[0] = '\0';
8062	prbuf[0] = '\0';
8063
8064	cmn_err(CE_CONT, "\tmdp(0x%p)\n", (void *)mdp);
8065
8066	if (mdp == NULL)
8067		return;
8068
8069	cmn_err(CE_CONT, "\tmd_pg_id: %d\n", mdp->md_pg_id);
8070	cmn_err(CE_CONT, "\tmd_dst_eaddr: %s\n",
8071	    ether_sprintf(&mdp->md_dst_eaddr));
8072	cmn_err(CE_CONT, "\tmd_src_eaddr: %s\n",
8073	    ether_sprintf(&mdp->md_src_eaddr));
8074	cmn_err(CE_CONT, "\tmd_dlpistate: %s", dss[mdp->md_dlpistate]);
8075	cmn_err(CE_CONT, "\tmd_muxid: 0x%u", mdp->md_muxid);
8076	cmn_err(CE_CONT, "\tmd_rcvcnt %lu md_lastrcvcnt %lu", mdp->md_rcvcnt,
8077	    mdp->md_lastrcvcnt);
8078
8079	/*
8080	 * Print out state as text.
8081	 */
8082	state = mdp->md_state;
8083
8084	if (state == 0) {
8085		(void) strcat(prbuf, _md_state[0]);
8086	} else {
8087
8088		for (i = 0; i < A_CNT(_md_state); i++) {
8089			if ((state >> i) & 0x1)  {
8090				(void) sprintf(buf, " %s |", _md_state[i+1]);
8091				(void) strcat(prbuf, buf);
8092			}
8093		}
8094		prbuf[strlen(prbuf) -1] = '\0';
8095	}
8096	cmn_err(CE_CONT, "\tmd_state: %s", prbuf);
8097
8098	cmn_err(CE_CONT, "\tmd_device:\n");
8099	man_print_dev(&mdp->md_device);
8100
8101}
8102
8103static void
8104man_print_man(man_t *manp)
8105{
8106	char	buf[512];
8107	char	prbuf[512];
8108
8109	buf[0] = '\0';
8110	prbuf[0] = '\0';
8111
8112	if (manp == NULL)
8113		return;
8114
8115	if (ddi_major_to_name(manp->man_meta_major)) {
8116		(void) sprintf(buf, "\t man_device: %s%d\n",
8117		    ddi_major_to_name(manp->man_meta_major),
8118		    manp->man_meta_ppa);
8119	} else {
8120		(void) sprintf(buf, "\t major: %d", manp->man_meta_major);
8121		(void) sprintf(buf, "\t ppa: %d", manp->man_meta_ppa);
8122	}
8123
8124	cmn_err(CE_CONT, "%s", buf);
8125
8126}
8127
8128static char *_mdev_state[] = {
8129	"UNASSIGNED  ",
8130	"ASSIGNED",
8131	"ACTIVE",
8132	"FAILED"
8133};
8134
8135static void
8136man_print_dev(man_dev_t *mdevp)
8137{
8138	char	buf[512];
8139	char	prbuf[512];
8140	int	i;
8141	uint_t	state;
8142
8143	buf[0] = '\0';
8144	prbuf[0] = '\0';
8145
8146	if (mdevp == NULL)
8147		return;
8148
8149	if (mdevp->mdev_major == 0) {
8150number:
8151		(void) sprintf(buf, "\t mdev_major: %d\n", mdevp->mdev_major);
8152	} else if (ddi_major_to_name(mdevp->mdev_major)) {
8153		(void) sprintf(buf, "\t mdev_device: %s%d\n",
8154		    ddi_major_to_name(mdevp->mdev_major),
8155		    mdevp->mdev_ppa);
8156	} else
8157		goto number;
8158
8159	cmn_err(CE_CONT, "%s", buf);
8160
8161	cmn_err(CE_CONT, "\t mdev_exp_id: %d\n", mdevp->mdev_exp_id);
8162
8163	buf[0] = '\0';
8164	prbuf[0] = '\0';
8165	state = mdevp->mdev_state;
8166
8167	if (state == 0) {
8168		(void) strcat(prbuf, _mdev_state[0]);
8169	} else {
8170		for (i = 0; i < A_CNT(_mdev_state); i++) {
8171			if ((state >> i) & 0x1) {
8172				(void) sprintf(buf, " %s |", _mdev_state[i+1]);
8173				(void) strcat(prbuf, buf);
8174			}
8175		}
8176	}
8177
8178	prbuf[strlen(prbuf) - 2] = '\0';
8179
8180	cmn_err(CE_CONT, "\t mdev_state: %s\n", prbuf);
8181
8182}
8183
8184static char *_mip_cmd[] = {
8185	"MI_PATH_READ",
8186	"MI_PATH_ASSIGN",
8187	"MI_PATH_ACTIVATE",
8188	"MI_PATH_DEACTIVATE",
8189	"MI_PATH_UNASSIGN"
8190};
8191
8192static void
8193man_print_mtp(mi_time_t *mtp)
8194{
8195	cmn_err(CE_CONT, "\tmtp(0x%p)\n", (void *)mtp);
8196
8197	if (mtp == NULL)
8198		return;
8199
8200	cmn_err(CE_CONT, "\tmtp_instance: %d\n", mtp->mtp_man_ppa);
8201
8202	cmn_err(CE_CONT, "\tmtp_time: %d\n", mtp->mtp_time);
8203
8204}
8205
8206static void
8207man_print_mip(mi_path_t *mip)
8208{
8209	cmn_err(CE_CONT, "\tmip(0x%p)\n", (void *)mip);
8210
8211	if (mip == NULL)
8212		return;
8213
8214	cmn_err(CE_CONT, "\tmip_pg_id: %d\n", mip->mip_pg_id);
8215
8216	cmn_err(CE_CONT, "\tmip_cmd: %s\n", _mip_cmd[mip->mip_cmd]);
8217
8218	cmn_err(CE_CONT, "\tmip_eaddr: %s\n", ether_sprintf(&mip->mip_eaddr));
8219
8220	cmn_err(CE_CONT, "\tmip_devs: 0x%p\n", (void *)mip->mip_devs);
8221
8222	cmn_err(CE_CONT, "\tmip_ndevs: %d\n", mip->mip_ndevs);
8223
8224}
8225
8226static void
8227man_print_mpg(man_pg_t *mpg)
8228{
8229	cmn_err(CE_CONT, "\tmpg(0x%p)\n", (void *)mpg);
8230
8231	if (mpg == NULL)
8232		return;
8233
8234	cmn_err(CE_CONT, "\tmpg_next: 0x%p\n", (void *)mpg->mpg_next);
8235
8236	cmn_err(CE_CONT, "\tmpg_pg_id: %d\n", mpg->mpg_pg_id);
8237
8238	cmn_err(CE_CONT, "\tmpg_man_ppa: %d\n", mpg->mpg_man_ppa);
8239
8240	cmn_err(CE_CONT, "\tmpg_dst_eaddr: %s\n",
8241	    ether_sprintf(&mpg->mpg_dst_eaddr));
8242
8243	cmn_err(CE_CONT, "\tmpg_pathp: 0x%p\n", (void *)mpg->mpg_pathp);
8244
8245}
8246
8247static char *_mw_flags[] = {
8248	"NOWAITER",		/* 0x0 */
8249	"CVWAITER",		/* 0x1 */
8250	"QWAITER",		/* 0x2 */
8251	"DONE"		/* 0x3 */
8252};
8253
8254static void
8255man_print_work(man_work_t *wp)
8256{
8257	int 	i;
8258
8259	cmn_err(CE_CONT, "\twp(0x%p)\n\n", (void *)wp);
8260
8261	if (wp == NULL)
8262		return;
8263
8264	cmn_err(CE_CONT, "\tmw_type: %s\n", _mw_type[wp->mw_type]);
8265
8266	cmn_err(CE_CONT, "\tmw_flags: ");
8267	for (i = 0; i < A_CNT(_mw_flags); i++) {
8268		if ((wp->mw_flags >> i) & 0x1)
8269			cmn_err(CE_CONT, "%s", _mw_flags[i]);
8270	}
8271	cmn_err(CE_CONT, "\n");
8272
8273	cmn_err(CE_CONT, "\twp_status: %d\n", wp->mw_status);
8274
8275	cmn_err(CE_CONT, "\twp_arg: 0x%p\n", (void *)&wp->mw_arg);
8276
8277	cmn_err(CE_CONT, "\tmw_next: 0x%p\n", (void *)wp->mw_next);
8278
8279	cmn_err(CE_CONT, "\twp_q: 0x%p", (void *)wp->mw_q);
8280
8281}
8282
8283static void
8284man_print_path(man_path_t *mp)
8285{
8286	cmn_err(CE_CONT, "\tmp(0x%p)\n\n", (void *)mp);
8287
8288	if (mp == NULL)
8289		return;
8290
8291	cmn_err(CE_CONT, "\tmp_device:");
8292	man_print_dev(&mp->mp_device);
8293
8294	cmn_err(CE_CONT, "\tmp_next: 0x%p\n", (void *)mp->mp_next);
8295
8296	cmn_err(CE_CONT, "\tmp_last_knp: 0x%p\n", (void *)mp->mp_last_knp);
8297
8298	cmn_err(CE_CONT, "\tmp_lru: 0x%lx", mp->mp_lru);
8299
8300}
8301
8302void *
8303man_dbg_kzalloc(int line, size_t size, int kmflags)
8304{
8305	void *tmp;
8306
8307	tmp = kmem_zalloc(size, kmflags);
8308	MAN_DBG(MAN_KMEM, ("0x%p %lu\tzalloc'd @ %d\n", (void *)tmp,
8309	    size, line));
8310
8311	return (tmp);
8312
8313}
8314
8315void
8316man_dbg_kfree(int line, void *buf, size_t size)
8317{
8318
8319	MAN_DBG(MAN_KMEM, ("0x%p %lu\tfree'd @ %d\n", (void *)buf, size, line));
8320
8321	kmem_free(buf, size);
8322
8323}
8324
8325#endif  /* DEBUG */
8326