1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef	_SYS_MDI_IMPLDEFS_H
27#define	_SYS_MDI_IMPLDEFS_H
28
29
30#include <sys/note.h>
31#include <sys/types.h>
32#include <sys/sunmdi.h>
33#include <sys/modhash.h>
34#include <sys/callb.h>
35#include <sys/devctl.h>
36
37#ifdef	__cplusplus
38extern "C" {
39#endif
40
41#ifdef _KERNEL
42
43/*
44 * Multipath Driver Interfaces
45 *
46 * The multipathing framework is provided in two modules.  The 'mpxio' misc.
47 * module provides the core multipath framework and the 'scsi_vhci' nexus
48 * driver provides the SCSI-III command set driver functionality for
49 * managing Fibre-Channel storage devices.
50 *
51 * As in any multipathing solution there are three major problems to solve:
52 *
53 * 1) Identification and enumeration of multipath client devices.
54 * 2) Optimal path selection when routing I/O requests.
55 * 3) Observability interfaces to snapshot the multipath configuration,
56 *    and infrastructure to provide performance and error statistics.
57 *
58 * The mpxio framework consists of several major components:
59 *
60 * 1) The MDI is the Multiplexed Device Interface; this is the core glue which
61 *    holds the following components together.
62 * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing
63 *    services for a given bus technology (example: 'scsi_vhci' provides
64 *    multipathing support for SCSI-III fibre-channel devices).
65 * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport
66 *    services for a given host controller (example: 'fcp' provides transport
67 *    for fibre-channel devices).
68 * 4) Client Devices are standard Solaris target (or leaf) drivers
69 *    (example: 'ssd' is the standard disk driver for fibre-channel arrays).
70 * 5) Multipath information nodes ('pathinfo' nodes) connect client device
71 *    nodes and pHCI device nodes in the device tree.
72 *
73 * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might
74 * look like this:
75 *
76 *              /\
77 *             /  ............
78 *     <vHCI>:/               \
79 *      +-----------+   +-----------+
80 *      | scsi_vhci |   |  pci@1f,0 |
81 *      +-----------+   +-----------+
82 *            /   \               \
83 * <Client>: /     \ :<Client>     \ :parent(pHCI)
84 *  +----------+ +-----------+    +-------------+
85 *  | ssd 1    | | ssd 2     |    | qlc@0,0     |
86 *  +----------+ +-----------+    +-------------+
87 *   |            |                /        \
88 *   |            |       <pHCI>: /          \ :<pHCI>
89 *   |            |      +-------------+   +-------------+
90 *   |            |      | pHCI 1 (fp) |   | pHCI 2 (fp) |
91 *   |            |      +-------------+   +-------------+
92 *   |            |          /        |      /          |
93 *   |            |    +------+       |    +------+     |
94 *   |            |    | ssd 3|       |    | ssd  |     |
95 *   |            |    |!mpxio|       |    | (OBP)|     |
96 *   |            |    +------+       |    +------+     |
97 *   |            |                   |                 |
98 *   |            |       <pathinfo>: |                 |
99 *   |            |               +-------+         +--------+
100 *   |            +-------------->| path  |-------->| path   |
101 *   |                            | info  |         | info   |
102 *   |                            | node 1|         | node 3 |
103 *   |                            +-------+         +--------+
104 *   |                                |               |
105 *   |                                |            +~~~~~~~~+
106 *   |                            +-------+        :+--------+
107 *   +--------------------------->| path  |-------->| path   |
108 *                                | info  |        :| info   |
109 *                                | node 2|        +| node 4 |
110 *                                +-------+         +--------+
111 *
112 * The multipath information nodes (mdi_pathinfo nodes) establish the
113 * relationship between the pseudo client driver instance nodes (children
114 * of the vHCI) and the physical host controller interconnect (pHCI
115 * drivers) forming a matrix structure.
116 *
117 * The mpxio module implements locking at multiple granularity levels to
118 * support the needs of various consumers.  The multipath matrix can be
119 * column locked, or row locked depending on the consumer. The intention
120 * is to balance simplicity and performance.
121 *
122 * Locking:
123 *
124 * The devinfo locking still applies:
125 *
126 *   1) An ndi_devi_enter of a parent protects linkage/state of children.
127 *   2) state >= DS_INITIALIZED adds devi_ref of parent
128 *   3) devi_ref at state >= DS_ATTACHED prevents detach(9E).
129 *
130 * The ordering of 1) is (vHCI, pHCI). For a DEBUG kernel this ordering
131 * is asserted by the ndi_devi_enter() implementation.  There is also an
132 * ndi_devi_enter(Client), which is atypical since the client is a leaf.
133 * This is done to synchronize pathinfo nodes during devinfo snapshot (see
134 * di_register_pip) by pretending that the pathinfo nodes are children
135 * of the client.
136 *
137 * In addition to devinfo locking the current implementation utilizes
138 * the following locks:
139 *
140 *   mdi_mutex: protects the global list of vHCIs.
141 *
142 *   vh_phci_mutex: per-vHCI (mutex) lock: protects list of pHCIs registered
143 *   with vHCI.
144 *
145 *   vh_client_mutex: per-vHCI (mutex) lock: protects list/hash of Clients
146 *   associated with vHCI.
147 *
148 *   ph_mutex: per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo
149 *   node list) and per-pHCI structure fields.  mdi_pathinfo node creation,
150 *   deletion and child mdi_pathinfo node state changes are serialized on per
151 *   pHCI basis (Protection against DR).
152 *
153 *   ct_mutex: per-client (mutex) lock: protects the row (client-mdi_pathinfo
154 *   node list) and per-client structure fields.  The client-mdi_pathinfo node
155 *   list is typically walked to select an optimal path when routing I/O
156 *   requests.
157 *
158 *   pi_mutex: per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node
159 *   structure fields.
160 *
161 * Note that per-Client structure and per-pHCI fields are freely readable when
162 * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo
163 * node guarantees that its corresponding client and pHCI devices will not be
164 * freed.
165 */
166
167/*
168 * MDI Client global unique identifier property name string definition
169 */
170extern const char			*mdi_client_guid_prop;
171#define	MDI_CLIENT_GUID_PROP		(char *)mdi_client_guid_prop
172
173/*
174 * MDI Client load balancing policy definitions
175 *
176 * Load balancing policies are determined on a per-vHCI basis and are
177 * configurable via the vHCI's driver.conf file.
178 */
179typedef enum {
180	LOAD_BALANCE_NONE,		/* Alternate pathing		*/
181	LOAD_BALANCE_RR,		/* Round Robin			*/
182	LOAD_BALANCE_LBA		/* Logical Block Addressing	*/
183} client_lb_t;
184
185typedef struct {
186	int region_size;
187}client_lb_args_t;
188
189/*
190 * MDI client load balancing property name/value string definitions
191 */
192extern const char			*mdi_load_balance;
193extern const char			*mdi_load_balance_none;
194extern const char			*mdi_load_balance_ap;
195extern const char			*mdi_load_balance_rr;
196extern const char			*mdi_load_balance_lba;
197
198#define	LOAD_BALANCE_PROP		(char *)mdi_load_balance
199#define	LOAD_BALANCE_PROP_NONE		(char *)mdi_load_balance_none
200#define	LOAD_BALANCE_PROP_AP		(char *)mdi_load_balance_ap
201#define	LOAD_BALANCE_PROP_RR		(char *)mdi_load_balance_rr
202#define	LOAD_BALANCE_PROP_LBA		(char *)mdi_load_balance_lba
203
204/* default for region size */
205#define	LOAD_BALANCE_DEFAULT_REGION_SIZE	18
206
207/*
208 * vHCI drivers:
209 *
210 * vHCI drivers are pseudo nexus drivers which implement multipath services
211 * for a specific command set or bus architecture ('class').  There is a
212 * single instance of the vHCI driver for each command set which supports
213 * multipath devices.
214 *
215 * Each vHCI driver registers the following callbacks from attach(9e).
216 */
217#define	MDI_VHCI_OPS_REV_1		1
218#define	MDI_VHCI_OPS_REV		MDI_VHCI_OPS_REV_1
219
220typedef struct mdi_vhci_ops {
221	/* revision management */
222	int	vo_revision;
223
224	/* mdi_pathinfo node init callback */
225	int	(*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags);
226
227	/* mdi_pathinfo node uninit callback */
228	int	(*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip,
229		    int flags);
230
231	/* mdi_pathinfo node state change callback */
232	int	(*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip,
233		    mdi_pathinfo_state_t state, uint32_t, int flags);
234
235	/* Client path failover callback */
236	int	(*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags);
237
238	/* Client attached callback */
239	void	(*vo_client_attached)(dev_info_t *cdip);
240
241	/* Ask vHCI if 'cinfo' device is support as a client */
242	int	(*vo_is_dev_supported)(dev_info_t *vdip, dev_info_t *pdip,
243		    void *cinfo);
244} mdi_vhci_ops_t;
245
246/*
247 * An mdi_vhci structure is created and bound to the devinfo node of every
248 * registered vHCI class driver; this happens when a vHCI registers itself from
249 * attach(9e).  This structure is unbound and freed when the vHCI unregisters
250 * at detach(9e) time;
251 *
252 * Each vHCI driver is associated with a vHCI class name; this is the handle
253 * used to register and unregister pHCI drivers for a given transport.
254 *
255 * Locking: Different parts of this structure are guarded by different
256 * locks: global threading of multiple vHCIs and initialization is protected
257 * by mdi_mutex, the list of pHCIs associated with a vHCI is protected by
258 * vh_phci_mutex, and Clients are protected by vh_client_mutex.
259 *
260 * XXX Depending on the context, some of the fields can be freely read without
261 * holding any locks (ex. holding vh_client_mutex lock also guarantees that
262 * the vHCI (parent) cannot be unexpectedly freed).
263 */
264typedef struct mdi_vhci {
265	/* protected by mdi_mutex... */
266	struct mdi_vhci		*vh_next;	/* next vHCI link	*/
267	struct mdi_vhci		*vh_prev;	/* prev vHCI link	*/
268	char			*vh_class;	/* vHCI class name	*/
269	dev_info_t		*vh_dip;	/* vHCI devi handle	*/
270	int			vh_refcnt;	/* vHCI reference count	*/
271	struct mdi_vhci_config	*vh_config;	/* vHCI config		*/
272	client_lb_t		vh_lb;		/* vHCI load-balancing	*/
273	struct mdi_vhci_ops	*vh_ops;	/* vHCI callback vectors */
274
275	/* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */
276	kmutex_t		vh_phci_mutex;	/* pHCI mutex		*/
277	int			vh_phci_count;	/* pHCI device count	*/
278	struct mdi_phci		*vh_phci_head;	/* pHCI list head	*/
279	struct mdi_phci		*vh_phci_tail;	/* pHCI list tail	*/
280
281	/* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */
282	kmutex_t		vh_client_mutex; /* Client mutex	*/
283	int			vh_client_count; /* Client count	*/
284	struct client_hash	*vh_client_table; /* Client hash	*/
285} mdi_vhci_t;
286
287/*
288 * per-vHCI lock macros
289 */
290#define	MDI_VHCI_PHCI_LOCK(vh)		mutex_enter(&(vh)->vh_phci_mutex)
291#define	MDI_VHCI_PHCI_TRYLOCK(vh)	mutex_tryenter(&(vh)->vh_phci_mutex)
292#define	MDI_VHCI_PHCI_UNLOCK(vh)	mutex_exit(&(vh)->vh_phci_mutex)
293#ifdef	DEBUG
294#define	MDI_VHCI_PCHI_LOCKED(vh)	MUTEX_HELD(&(vh)->vh_phci_mutex)
295#endif	/* DEBUG */
296#define	MDI_VHCI_CLIENT_LOCK(vh)	mutex_enter(&(vh)->vh_client_mutex)
297#define	MDI_VHCI_CLIENT_TRYLOCK(vh)	mutex_tryenter(&(vh)->vh_client_mutex)
298#define	MDI_VHCI_CLIENT_UNLOCK(vh)	mutex_exit(&(vh)->vh_client_mutex)
299#ifdef	DEBUG
300#define	MDI_VHCI_CLIENT_LOCKED(vh)	MUTEX_HELD(&(vh)->vh_client_mutex)
301#endif	/* DEBUG */
302
303
304/*
305 * GUID Hash definitions
306 *
307 * Since all the mpxio managed devices for a given class are enumerated under
308 * the single vHCI instance for that class, sequentially walking through the
309 * client device link to find a client would be prohibitively slow.
310 */
311
312#define	CLIENT_HASH_TABLE_SIZE	(32)	/* GUID hash */
313
314/*
315 * Client hash table structure
316 */
317struct client_hash {
318	struct mdi_client	*ct_hash_head;	/* Client hash head	*/
319	int			ct_hash_count;	/* Client hash count	*/
320};
321
322
323/*
324 * pHCI Drivers:
325 *
326 * Physical HBA drivers provide transport services for mpxio-managed devices.
327 * As each pHCI instance is attached, it must register itself with the mpxio
328 * framework using mdi_phci_register().  When the pHCI is detached it must
329 * similarly call mdi_phci_unregister().
330 *
331 * The framework maintains a list of registered pHCI device instances for each
332 * vHCI.  This list involves (vh_phci_count, vh_phci_head, vh_phci_tail) and
333 * (ph_next, ph_prev, ph_vhci) and is protected by vh_phci_mutex.
334 *
335 * Locking order:
336 *
337 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))		XXX
338 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock))		XXX
339 */
340typedef struct mdi_phci {
341	/* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */
342	struct mdi_phci		*ph_next;	/* next pHCI link	*/
343	struct mdi_phci		*ph_prev;	/* prev pHCI link	*/
344	dev_info_t		*ph_dip;	/* pHCI devi handle	*/
345	struct mdi_vhci		*ph_vhci;	/* pHCI back ref. to vHCI */
346
347	/* protected by MDI_PHCI_LOCK ph_mutex... */
348	kmutex_t		ph_mutex;	/* per-pHCI mutex	*/
349	int			ph_path_count;	/* pi count		*/
350	mdi_pathinfo_t		*ph_path_head;	/* pi list head		*/
351	mdi_pathinfo_t		*ph_path_tail;	/* pi list tail		*/
352	int			ph_flags;	/* pHCI operation flags	*/
353	int			ph_unstable;	/* Paths in transient state */
354	kcondvar_t		ph_unstable_cv;	/* Paths in transient state */
355
356	/* protected by mdi_phci_[gs]et_vhci_private caller... */
357	void			*ph_vprivate;	/* vHCI driver private	*/
358} mdi_phci_t;
359
360/*
361 * A pHCI device is 'unstable' while one or more paths are in a transitional
362 * state.  Hotplugging is prevented during this state.
363 */
364#define	MDI_PHCI_UNSTABLE(ph)		(ph)->ph_unstable++;
365#define	MDI_PHCI_STABLE(ph) { \
366	(ph)->ph_unstable--; \
367	if ((ph)->ph_unstable == 0) { \
368		cv_broadcast(&(ph)->ph_unstable_cv); \
369	} \
370}
371
372/*
373 * per-pHCI lock macros
374 */
375#define	MDI_PHCI_LOCK(ph)		mutex_enter(&(ph)->ph_mutex)
376#define	MDI_PHCI_TRYLOCK(ph)		mutex_tryenter(&(ph)->ph_mutex)
377#define	MDI_PHCI_UNLOCK(ph)		mutex_exit(&(ph)->ph_mutex)
378#ifdef	DEBUG
379#define	MDI_PHCI_LOCKED(vh)		MUTEX_HELD(&(ph)->ph_mutex)
380#endif	/* DEBUG */
381
382/*
383 * pHCI state definitions and macros to track the pHCI driver instance state
384 */
385#define	MDI_PHCI_FLAGS_OFFLINE		0x1	/* pHCI is offline */
386#define	MDI_PHCI_FLAGS_SUSPEND		0x2	/* pHCI is suspended */
387#define	MDI_PHCI_FLAGS_POWER_DOWN	0x4	/* pHCI is power down */
388#define	MDI_PHCI_FLAGS_DETACH		0x8	/* pHCI is detached */
389#define	MDI_PHCI_FLAGS_USER_DISABLE	0x10	/* pHCI is disabled,user */
390#define	MDI_PHCI_FLAGS_D_DISABLE	0x20	/* pHCI is disabled,driver */
391#define	MDI_PHCI_FLAGS_D_DISABLE_TRANS	0x40	/* pHCI is disabled,transient */
392#define	MDI_PHCI_FLAGS_POWER_TRANSITION	0x80	/* pHCI is power transition */
393
394#define	MDI_PHCI_DISABLE_MASK						\
395	    (MDI_PHCI_FLAGS_USER_DISABLE | MDI_PHCI_FLAGS_D_DISABLE |	\
396	    MDI_PHCI_FLAGS_D_DISABLE_TRANS)
397
398#define	MDI_PHCI_IS_READY(ph)						\
399	    (((ph)->ph_flags & MDI_PHCI_DISABLE_MASK) == 0)
400
401#define	MDI_PHCI_SET_OFFLINE(ph)					{\
402	    ASSERT(MDI_PHCI_LOCKED(ph));				\
403	    (ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE;			}
404#define	MDI_PHCI_SET_ONLINE(ph)						{\
405	    ASSERT(MDI_PHCI_LOCKED(ph));				\
406	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE;			}
407#define	MDI_PHCI_IS_OFFLINE(ph)						\
408	    ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE)
409
410#define	MDI_PHCI_SET_SUSPEND(ph)					{\
411	    ASSERT(MDI_PHCI_LOCKED(ph));				\
412	    (ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND;			}
413#define	MDI_PHCI_SET_RESUME(ph)						{\
414	    ASSERT(MDI_PHCI_LOCKED(ph));				\
415	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND;			}
416#define	MDI_PHCI_IS_SUSPENDED(ph)					\
417	    ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
418
419#define	MDI_PHCI_SET_DETACH(ph)						{\
420	    ASSERT(MDI_PHCI_LOCKED(ph));				\
421	    (ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH;			}
422#define	MDI_PHCI_SET_ATTACH(ph)						{\
423	    ASSERT(MDI_PHCI_LOCKED(ph));				\
424	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH;			}
425
426#define	MDI_PHCI_SET_POWER_DOWN(ph)					{\
427	    ASSERT(MDI_PHCI_LOCKED(ph));				\
428	    (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN;		}
429#define	MDI_PHCI_SET_POWER_UP(ph)					{\
430	    ASSERT(MDI_PHCI_LOCKED(ph));				\
431	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN;		}
432#define	MDI_PHCI_IS_POWERED_DOWN(ph)					\
433	    ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN)
434
435#define	MDI_PHCI_SET_USER_ENABLE(ph)					{\
436	    ASSERT(MDI_PHCI_LOCKED(ph));				\
437	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE;		}
438#define	MDI_PHCI_SET_USER_DISABLE(ph)					{\
439	    ASSERT(MDI_PHCI_LOCKED(ph));				\
440	    (ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE;		}
441#define	MDI_PHCI_IS_USER_DISABLED(ph)					\
442	    ((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE)
443
444#define	MDI_PHCI_SET_DRV_ENABLE(ph)					{\
445	    ASSERT(MDI_PHCI_LOCKED(ph));				\
446	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE;		}
447#define	MDI_PHCI_SET_DRV_DISABLE(ph)					{\
448	    ASSERT(MDI_PHCI_LOCKED(ph));				\
449	    (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE;			}
450#define	MDI_PHCI_IS_DRV_DISABLED(ph)					\
451	    ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE)
452
453#define	MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph)				{\
454	    ASSERT(MDI_PHCI_LOCKED(ph));				\
455	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS;		}
456#define	MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph)				{\
457	    ASSERT(MDI_PHCI_LOCKED(ph));				\
458	    (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS;		}
459#define	MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)				\
460	    ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS)
461
462#define	MDI_PHCI_SET_POWER_TRANSITION(ph)				{\
463	    ASSERT(MDI_PHCI_LOCKED(ph));				\
464	    (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION;		}
465#define	MDI_PHCI_CLEAR_POWER_TRANSITION(ph)				{\
466	    ASSERT(MDI_PHCI_LOCKED(ph));				\
467	    (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION;		}
468#define	MDI_PHCI_IS_POWER_TRANSITION(ph)				\
469	    ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION)
470
471/*
472 * mpxio Managed Clients:
473 *
474 * This framework creates a struct mdi_client for every client device created
475 * by the framework as a result of self-enumeration of target devices by the
476 * registered pHCI devices.  This structure is bound to client device dev_info
477 * node at the time of client device allocation (ndi_devi_alloc(9e)). This
478 * structure is unbound from the dev_info node when mpxio framework removes a
479 * client device node from the system.
480 *
481 * This structure is created when a first path is enumerated and removed when
482 * last path is de-enumerated from the system.
483 *
484 * Multipath client devices are instantiated as children of corresponding vHCI
485 * driver instance. Each client device is uniquely identified by a GUID
486 * provided by target device itself.  The parent vHCI device also maintains a
487 * hashed list of client devices, protected by vh_client_mutex.
488 *
489 * Typically pHCI devices self-enumerate their child devices using taskq,
490 * resulting in multiple paths to the same client device to be enumerated by
491 * competing threads.
492 *
493 * Currently this framework supports two kinds of load-balancing policy
494 * configurable through the vHCI driver configuration files.
495 *
496 * NONE		- Legacy AP mode
497 * Round Robin	- Balance the pHCI load in a Round Robin fashion.
498 *
499 * This framework identifies the client device in three distinct states:
500 *
501 * OPTIMAL	- Client device has at least one redundant path.
502 * DEGRADED	- No redundant paths (critical).  Failure in the current active
503 *		  path would result in data access failures.
504 * FAILED	- No paths are available to access this device.
505 *
506 * Locking order:
507 *
508 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))			XXX
509 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock))		XXX
510 */
511typedef struct mdi_client {
512	/* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */
513	struct mdi_client	*ct_hnext;	/* next client		*/
514	struct mdi_client	*ct_hprev;	/* prev client		*/
515	dev_info_t		*ct_dip;	/* client devi handle	*/
516	struct mdi_vhci		*ct_vhci;	/* vHCI back ref	*/
517	char			*ct_drvname;	/* client driver name	*/
518	char			*ct_guid;	/* client guid		*/
519	client_lb_t		ct_lb;		/* load balancing scheme */
520	client_lb_args_t	*ct_lb_args;	/* load balancing args */
521
522
523	/* protected by MDI_CLIENT_LOCK ct_mutex... */
524	kmutex_t		ct_mutex;	/* per-client mutex	*/
525	int			ct_path_count;	/* multi path count	*/
526	mdi_pathinfo_t		*ct_path_head;	/* multi path list head	*/
527	mdi_pathinfo_t		*ct_path_tail;	/* multi path list tail	*/
528	mdi_pathinfo_t		*ct_path_last;	/* last path used for i/o */
529	int			ct_state;	/* state information	*/
530	int			ct_flags;	/* Driver op. flags	*/
531	int			ct_failover_flags;	/* Failover args */
532	int			ct_failover_status;	/* last fo status */
533	kcondvar_t		ct_failover_cv;	/* Failover status cv	*/
534	int			ct_unstable;	/* Paths in transient state */
535	kcondvar_t		ct_unstable_cv;	/* Paths in transient state */
536
537	int			ct_power_cnt;	/* Hold count on parent power */
538	kcondvar_t		ct_powerchange_cv;
539					/* Paths in power transient state */
540	short			ct_powercnt_config;
541					/* held in pre/post config */
542	short			ct_powercnt_unconfig;
543					/* held in pre/post unconfig */
544	int			ct_powercnt_reset;
545					/* ct_power_cnt was reset */
546
547	void			*ct_cprivate;	/* client driver private */
548	void			*ct_vprivate;	/* vHCI driver private	*/
549} mdi_client_t;
550
551/*
552 * per-Client device locking definitions
553 */
554#define	MDI_CLIENT_LOCK(ct)		mutex_enter(&(ct)->ct_mutex)
555#define	MDI_CLIENT_TRYLOCK(ct)		mutex_tryenter(&(ct)->ct_mutex)
556#define	MDI_CLIENT_UNLOCK(ct)		mutex_exit(&(ct)->ct_mutex)
557#ifdef	DEBUG
558#define	MDI_CLIENT_LOCKED(ct)		MUTEX_HELD(&(ct)->ct_mutex)
559#endif	/* DEBUG */
560
561/*
562 * A Client device is in unstable while one or more paths are in transitional
563 * state.  We do not allow failover to take place while paths are in transient
564 * state. Similarly we do not allow state transition while client device
565 * failover is in progress.
566 */
567#define	MDI_CLIENT_UNSTABLE(ct)		(ct)->ct_unstable++;
568#define	MDI_CLIENT_STABLE(ct) { \
569	(ct)->ct_unstable--; \
570	if ((ct)->ct_unstable == 0) { \
571		cv_broadcast(&(ct)->ct_unstable_cv); \
572	} \
573}
574
575/*
576 * Client driver instance state definitions:
577 */
578#define	MDI_CLIENT_FLAGS_OFFLINE		0x00000001
579#define	MDI_CLIENT_FLAGS_SUSPEND		0x00000002
580#define	MDI_CLIENT_FLAGS_POWER_DOWN		0x00000004
581#define	MDI_CLIENT_FLAGS_DETACH			0x00000008
582#define	MDI_CLIENT_FLAGS_FAILOVER		0x00000010
583#define	MDI_CLIENT_FLAGS_REPORT_DEV		0x00000020
584#define	MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS	0x00000040
585#define	MDI_CLIENT_FLAGS_ASYNC_FREE		0x00000080
586#define	MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED	0x00000100
587#define	MDI_CLIENT_FLAGS_POWER_TRANSITION	0x00000200
588
589#define	MDI_CLIENT_SET_OFFLINE(ct)					{\
590	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
591	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE;			}
592#define	MDI_CLIENT_SET_ONLINE(ct)					{\
593	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
594	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE;		}
595#define	MDI_CLIENT_IS_OFFLINE(ct) \
596	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE)
597
598#define	MDI_CLIENT_SET_SUSPEND(ct)					{\
599	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
600	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND;			}
601#define	MDI_CLIENT_SET_RESUME(ct)					{\
602	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
603	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND;		}
604#define	MDI_CLIENT_IS_SUSPENDED(ct) \
605	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND)
606
607#define	MDI_CLIENT_SET_POWER_DOWN(ct)					{\
608	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
609	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN;		}
610#define	MDI_CLIENT_SET_POWER_UP(ct)					{\
611	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
612	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN;		}
613#define	MDI_CLIENT_IS_POWERED_DOWN(ct) \
614	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN)
615
616#define	MDI_CLIENT_SET_POWER_TRANSITION(ct)				{\
617	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
618	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION;	}
619#define	MDI_CLIENT_CLEAR_POWER_TRANSITION(ct)				{\
620	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
621	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION;	}
622#define	MDI_CLIENT_IS_POWER_TRANSITION(ct) \
623	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION)
624
625#define	MDI_CLIENT_SET_DETACH(ct)					{\
626	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
627	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH;			}
628#define	MDI_CLIENT_SET_ATTACH(ct)					{\
629	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
630	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH;			}
631#define	MDI_CLIENT_IS_DETACHED(ct) \
632	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH)
633
634#define	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct)				{\
635	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
636	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER;		}
637#define	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct)			{\
638	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
639	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER;		}
640#define	MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \
641	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER)
642
643#define	MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct)				{\
644	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
645	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV;		}
646#define	MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct)				{\
647	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
648	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV;		}
649#define	MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \
650	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV)
651
652#define	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct)			{\
653	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
654	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS;	}
655#define	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct)			{\
656	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
657	    (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS;	}
658#define	MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \
659	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS)
660
661#define	MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct)				{\
662	    ASSERT(MDI_CLIENT_LOCKED(ct));				\
663	    (ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED;	}
664#define	MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \
665	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)
666
667/*
668 * Client operating states.
669 */
670#define	MDI_CLIENT_STATE_OPTIMAL	1
671#define	MDI_CLIENT_STATE_DEGRADED	2
672#define	MDI_CLIENT_STATE_FAILED		3
673
674#define	MDI_CLIENT_STATE(ct) ((ct)->ct_state)
675#define	MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state)
676
677#define	MDI_CLIENT_IS_FAILED(ct) \
678	    ((ct)->ct_state == MDI_CLIENT_STATE_FAILED)
679
680/*
681 * mdi_pathinfo nodes:
682 *
683 * From this framework's perspective, a 'path' is a tuple consisting of a
684 * client or end device, a host controller which provides device
685 * identification and transport services (pHCI), and bus specific unit
686 * addressing information.  A path may be decorated with properties which
687 * describe the capabilities of the path; such properties are analogous to
688 * device node and minor node properties.
689 *
690 * The framework maintains link list of mdi_pathinfo nodes created by every
691 * pHCI driver instance via the pi_phci_link linkage; this is used (for example)
692 * to make sure that all relevant pathinfo nodes are freed before the pHCI
693 * is unregistered.
694 *
695 * Locking order:
696 *
697 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))		XXX
698 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))	XXX
699 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))		XXX
700 * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex))		XXX
701 *
702 * mdi_pathinfo node structure definition
703 */
704struct mdi_pathinfo {
705	/* protected by MDI_PHCI_LOCK ph_mutex... */
706	struct mdi_pathinfo	*pi_phci_link;	 /* next path in phci list */
707	mdi_phci_t		*pi_phci;	/* pHCI dev_info node	*/
708
709	/* protected by MDI_CLIENT_LOCK ct_mutex... */
710	struct mdi_pathinfo	*pi_client_link; /* next path in client list */
711	mdi_client_t		*pi_client;	/* client		*/
712
713	/* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */
714	char			*pi_addr;	/* path unit address	*/
715	int			pi_path_instance; /* path instance */
716
717	/* protected by MDI_PI_LOCK pi_mutex... */
718	kmutex_t		pi_mutex;	/* per path mutex	*/
719	mdi_pathinfo_state_t	pi_state;	/* path state		*/
720	mdi_pathinfo_state_t	pi_old_state;	/* path state		*/
721	kcondvar_t		pi_state_cv;	/* path state condvar	*/
722	nvlist_t		*pi_prop;	/* Properties		*/
723	void			*pi_cprivate;	/* client private info	*/
724	void			*pi_pprivate;	/* phci private info	*/
725	int			pi_ref_cnt;	/* pi reference count	*/
726	kcondvar_t		pi_ref_cv;	/* condition variable	*/
727	struct mdi_pi_kstats	*pi_kstats;	/* aggregate kstats */
728	int			pi_pm_held;	/* phci's kidsup incremented */
729	int			pi_preferred;	/* Preferred path	*/
730	void			*pi_vprivate;	/* vhci private info	*/
731	uint_t			pi_flags;	/* path flags */
732};
733
734/*
735 * pathinfo statistics:
736 *
737 * The mpxio architecture allows for multiple pathinfo nodes for each
738 * client-pHCI combination.  For statistics purposes, these statistics are
739 * aggregated into a single client-pHCI set of kstats.
740 */
741struct mdi_pi_kstats {
742	int	pi_kstat_ref;		/* # paths aggregated, also a ref cnt */
743	kstat_t	*pi_kstat_iostats;	/* mdi:iopath statistic set */
744	kstat_t *pi_kstat_errstats;	/* error statistics */
745};
746
747/*
748 * pathinfo error kstat
749 */
750struct pi_errs {
751	struct kstat_named pi_softerrs;		/* "Soft" Error */
752	struct kstat_named pi_harderrs;		/* "Hard" Error */
753	struct kstat_named pi_transerrs;	/* Transport Errors */
754	struct kstat_named pi_icnt_busy;	/* Interconnect Busy */
755	struct kstat_named pi_icnt_errors;	/* Interconnect Errors */
756	struct kstat_named pi_phci_rsrc;	/* pHCI No Resources */
757	struct kstat_named pi_phci_localerr;	/* pHCI Local Errors */
758	struct kstat_named pi_phci_invstate;	/* pHCI Invalid State */
759	struct kstat_named pi_failedfrom;	/* Failover: Failed From */
760	struct kstat_named pi_failedto;		/* Failover: Failed To */
761};
762
763/*
764 * increment an error counter
765 */
766#define	MDI_PI_ERRSTAT(pip, x) { \
767	if (MDI_PI((pip))->pi_kstats != NULL) { \
768		struct pi_errs *pep; \
769		pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \
770		pep->x.value.ui32++; \
771	} \
772}
773
774/*
775 * error codes which can be passed to MDI_PI_ERRSTAT
776 */
777#define	MDI_PI_SOFTERR	pi_softerrs
778#define	MDI_PI_HARDERR	pi_harderrs
779#define	MDI_PI_TRANSERR	pi_transerrs
780#define	MDI_PI_ICNTBUSY	pi_icnt_busy
781#define	MDI_PI_ICNTERR	pi_icnt_errors
782#define	MDI_PI_PHCIRSRC	pi_phci_rsrc
783#define	MDI_PI_PHCILOCL	pi_phci_localerr
784#define	MDI_PI_PHCIINVS	pi_phci_invstate
785#define	MDI_PI_FAILFROM	pi_failedfrom
786#define	MDI_PI_FAILTO	pi_failedto
787
788#define	MDI_PI(type)			((struct mdi_pathinfo *)(type))
789
790#define	MDI_PI_LOCK(pip)		mutex_enter(&MDI_PI(pip)->pi_mutex)
791#define	MDI_PI_TRYLOCK(pip)		mutex_tryenter(&MDI_PI(pip)->pi_mutex)
792#define	MDI_PI_UNLOCK(pip)		mutex_exit(&MDI_PI(pip)->pi_mutex)
793#ifdef	DEBUG
794#define	MDI_PI_LOCKED(pip)		MUTEX_HELD(&MDI_PI(pip)->pi_mutex)
795#endif	/* DEBUG */
796
797#define	MDI_PI_HOLD(pip)		(++MDI_PI(pip)->pi_ref_cnt)
798#define	MDI_PI_RELE(pip)		(--MDI_PI(pip)->pi_ref_cnt)
799
800#define	MDI_EXT_STATE_CHANGE		0x10000000
801
802
803#define	MDI_DISABLE_OP			0x1
804#define	MDI_ENABLE_OP			0x2
805#define	MDI_BEFORE_STATE_CHANGE		0x4
806#define	MDI_AFTER_STATE_CHANGE		0x8
807#define	MDI_SYNC_FLAG			0x10
808
809#define	MDI_PI_STATE(pip)						\
810	(MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK)
811#define	MDI_PI_OLD_STATE(pip)						\
812	(MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK)
813
814#define	MDI_PI_EXT_STATE(pip)						\
815	(MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK)
816#define	MDI_PI_OLD_EXT_STATE(pip)					\
817	(MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK)
818
819#define	MDI_PI_SET_TRANSIENT(pip)					{\
820	ASSERT(MDI_PI_LOCKED(pip));					\
821	MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT;		}
822#define	MDI_PI_CLEAR_TRANSIENT(pip)					{\
823	ASSERT(MDI_PI_LOCKED(pip));					\
824	MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT;		}
825#define	MDI_PI_IS_TRANSIENT(pip) \
826	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT)
827
828#define	MDI_PI_SET_USER_DISABLE(pip)					{\
829	ASSERT(MDI_PI_LOCKED(pip));					\
830	MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE;	}
831#define	MDI_PI_SET_DRV_DISABLE(pip)					{\
832	ASSERT(MDI_PI_LOCKED(pip));					\
833	MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE;	}
834#define	MDI_PI_SET_DRV_DISABLE_TRANS(pip)				{\
835	ASSERT(MDI_PI_LOCKED(pip));					\
836	MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; }
837
838#define	MDI_PI_SET_USER_ENABLE(pip)					{\
839	ASSERT(MDI_PI_LOCKED(pip));					\
840	MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE;	}
841#define	MDI_PI_SET_DRV_ENABLE(pip)					{\
842	ASSERT(MDI_PI_LOCKED(pip));					\
843	MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE;	}
844#define	MDI_PI_SET_DRV_ENABLE_TRANS(pip)				{\
845	ASSERT(MDI_PI_LOCKED(pip));					\
846	MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; }
847
848#define	MDI_PI_IS_USER_DISABLE(pip)					\
849	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE)
850#define	MDI_PI_IS_DRV_DISABLE(pip)					\
851	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE)
852#define	MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)				\
853	(MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT)
854
855#define	MDI_PI_IS_DISABLE(pip)						\
856	(MDI_PI_IS_USER_DISABLE(pip) ||					\
857	MDI_PI_IS_DRV_DISABLE(pip) ||					\
858	MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip))
859
860#define	MDI_PI_IS_INIT(pip)						\
861	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==		\
862		MDI_PATHINFO_STATE_INIT)
863
864#define	MDI_PI_IS_INITING(pip)						\
865	((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) ==	\
866		(MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT))
867
868#define	MDI_PI_SET_INIT(pip)						{\
869	ASSERT(MDI_PI_LOCKED(pip));					\
870	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT;		}
871
872#define	MDI_PI_SET_ONLINING(pip)					{\
873	uint32_t	ext_state;					\
874	ASSERT(MDI_PI_LOCKED(pip));					\
875	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
876	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);			\
877	MDI_PI(pip)->pi_state =						\
878	(MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT);	\
879	MDI_PI(pip)->pi_state |= ext_state;				}
880
881#define	MDI_PI_IS_ONLINING(pip)						\
882	((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) ==	\
883	(MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT))
884
885#define	MDI_PI_SET_ONLINE(pip)						{\
886	uint32_t	ext_state;					\
887	ASSERT(MDI_PI_LOCKED(pip));					\
888	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
889	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE;		\
890	MDI_PI(pip)->pi_state |= ext_state;				}
891
892#define	MDI_PI_IS_ONLINE(pip)						\
893	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==		\
894	MDI_PATHINFO_STATE_ONLINE)
895
896#define	MDI_PI_SET_OFFLINING(pip)					{\
897	uint32_t	ext_state;					\
898	ASSERT(MDI_PI_LOCKED(pip));					\
899	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
900	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);			\
901	MDI_PI(pip)->pi_state =						\
902	(MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT);	\
903	MDI_PI(pip)->pi_state |= ext_state;				}
904
905#define	MDI_PI_IS_OFFLINING(pip)					\
906	((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) ==	\
907	(MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT))
908
909#define	MDI_PI_SET_OFFLINE(pip)						{\
910	uint32_t	ext_state;					\
911	ASSERT(MDI_PI_LOCKED(pip));					\
912	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
913	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE;		\
914	MDI_PI(pip)->pi_state |= ext_state;				}
915
916#define	MDI_PI_IS_OFFLINE(pip)						\
917	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==		\
918	MDI_PATHINFO_STATE_OFFLINE)
919
920#define	MDI_PI_SET_STANDBYING(pip)					{\
921	uint32_t	ext_state;					\
922	ASSERT(MDI_PI_LOCKED(pip));					\
923	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
924	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);			\
925	MDI_PI(pip)->pi_state =						\
926	(MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT);	\
927	MDI_PI(pip)->pi_state |= ext_state;				}
928
929#define	MDI_PI_SET_STANDBY(pip)						{\
930	uint32_t	ext_state;					\
931	ASSERT(MDI_PI_LOCKED(pip));					\
932	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
933	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY;		\
934	MDI_PI(pip)->pi_state |= ext_state;				}
935
936#define	MDI_PI_IS_STANDBY(pip)						\
937	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==		\
938	MDI_PATHINFO_STATE_STANDBY)
939
940#define	MDI_PI_SET_FAULTING(pip)					{\
941	uint32_t	ext_state;					\
942	ASSERT(MDI_PI_LOCKED(pip));					\
943	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
944	MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip);			\
945	MDI_PI(pip)->pi_state =						\
946	    (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT);	\
947	MDI_PI(pip)->pi_state |= ext_state;				}
948
949#define	MDI_PI_SET_FAULT(pip)						{\
950	uint32_t	ext_state;					\
951	ASSERT(MDI_PI_LOCKED(pip));					\
952	ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \
953	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT;		\
954	MDI_PI(pip)->pi_state |= ext_state;				}
955
956#define	MDI_PI_IS_FAULT(pip)						\
957	((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) ==		\
958	MDI_PATHINFO_STATE_FAULT)
959
960#define	MDI_PI_IS_SUSPENDED(pip)					\
961	((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
962
963#define	MDI_PI_FLAGS_SET_HIDDEN(pip)					{\
964	ASSERT(MDI_PI_LOCKED(pip));					\
965	MDI_PI(pip)->pi_flags |= MDI_PATHINFO_FLAGS_HIDDEN;		}
966#define	MDI_PI_FLAGS_CLR_HIDDEN(pip)					{\
967	ASSERT(MDI_PI_LOCKED(pip));					\
968	MDI_PI(pip)->pi_flags &= ~MDI_PATHINFO_FLAGS_HIDDEN;		}
969#define	MDI_PI_FLAGS_IS_HIDDEN(pip)					\
970	((MDI_PI(pip)->pi_flags & MDI_PATHINFO_FLAGS_HIDDEN) ==		\
971	MDI_PATHINFO_FLAGS_HIDDEN)
972
973#define	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip)				{\
974	ASSERT(MDI_PI_LOCKED(pip));					\
975	MDI_PI(pip)->pi_flags |= MDI_PATHINFO_FLAGS_DEVICE_REMOVED;	}
976#define	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip)				{\
977	ASSERT(MDI_PI_LOCKED(pip));					\
978	MDI_PI(pip)->pi_flags &= ~MDI_PATHINFO_FLAGS_DEVICE_REMOVED;	}
979#define	MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)				\
980	((MDI_PI(pip)->pi_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED) ==	\
981	MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
982
983/*
984 * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures
985 * hold the vhci to phci client mappings of the on-disk vhci busconfig cache.
986 */
987
988/* phci structure of vhci cache */
989typedef struct mdi_vhcache_phci {
990	char			*cphci_path;	/* phci path name */
991	uint32_t		cphci_id;	/* used when building nvlist */
992	mdi_phci_t		*cphci_phci;	/* pointer to actual phci */
993	struct mdi_vhcache_phci	*cphci_next;	/* next in vhci phci list */
994} mdi_vhcache_phci_t;
995
996/* pathinfo structure of vhci cache */
997typedef struct mdi_vhcache_pathinfo {
998	char			*cpi_addr;	/* path address */
999	mdi_vhcache_phci_t	*cpi_cphci;	/* phci the path belongs to */
1000	struct mdi_pathinfo	*cpi_pip;	/* ptr to actual pathinfo */
1001	uint32_t		cpi_flags;	/* see below */
1002	struct mdi_vhcache_pathinfo *cpi_next;	/* next path for the client */
1003} mdi_vhcache_pathinfo_t;
1004
1005/*
1006 * cpi_flags
1007 *
1008 * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has
1009 * failed.
1010 */
1011#define	MDI_CPI_HINT_PATH_DOES_NOT_EXIST	0x0001
1012
1013/* client structure of vhci cache */
1014typedef struct mdi_vhcache_client {
1015	char			*cct_name_addr;	/* client address */
1016	mdi_vhcache_pathinfo_t	*cct_cpi_head;	/* client's path list head */
1017	mdi_vhcache_pathinfo_t	*cct_cpi_tail;	/* client's path list tail */
1018	struct mdi_vhcache_client *cct_next;	/* next in vhci client list */
1019} mdi_vhcache_client_t;
1020
1021/* vhci cache structure - one for vhci instance */
1022typedef struct mdi_vhci_cache {
1023	mdi_vhcache_phci_t	*vhcache_phci_head;	/* phci list head */
1024	mdi_vhcache_phci_t	*vhcache_phci_tail;	/* phci list tail */
1025	mdi_vhcache_client_t	*vhcache_client_head;	/* client list head */
1026	mdi_vhcache_client_t	*vhcache_client_tail;	/* client list tail */
1027	mod_hash_t		*vhcache_client_hash;	/* client hash */
1028	int			vhcache_flags;		/* see below */
1029	int64_t			vhcache_clean_time;	/* last clean time */
1030	krwlock_t		vhcache_lock;		/* cache lock */
1031} mdi_vhci_cache_t;
1032
1033/* vhcache_flags */
1034#define	MDI_VHCI_CACHE_SETUP_DONE	0x0001	/* cache setup completed */
1035
1036/* vhci bus config structure - one for vhci instance */
1037typedef struct mdi_vhci_config {
1038	char			*vhc_vhcache_filename;	/* on-disk file name */
1039	mdi_vhci_cache_t	vhc_vhcache;		/* vhci cache */
1040	kmutex_t		vhc_lock;		/* vhci config lock */
1041	kcondvar_t		vhc_cv;
1042	int			vhc_flags;		/* see below */
1043
1044	/* flush vhci cache when lbolt reaches vhc_flush_at_ticks */
1045	clock_t			vhc_flush_at_ticks;
1046
1047	/*
1048	 * Head and tail of the client list whose paths are being configured
1049	 * asynchronously. vhc_acc_count is the number of clients on this list.
1050	 * vhc_acc_thrcount is the number threads running to configure
1051	 * the paths for these clients.
1052	 */
1053	struct mdi_async_client_config *vhc_acc_list_head;
1054	struct mdi_async_client_config *vhc_acc_list_tail;
1055	int			vhc_acc_count;
1056	int			vhc_acc_thrcount;
1057
1058	/* callback id - for flushing the cache during system shutdown */
1059	callb_id_t		vhc_cbid;
1060
1061	/*
1062	 * vhc_path_discovery_boot -	number of times path discovery will be
1063	 *				attempted during early boot.
1064	 * vhc_path_discovery_postboot	number of times path discovery will be
1065	 *				attempted during late boot.
1066	 * vhc_path_discovery_cutoff_time - time at which paths were last
1067	 *				discovered  + some timeout
1068	 */
1069	int			vhc_path_discovery_boot;
1070	int			vhc_path_discovery_postboot;
1071	int64_t			vhc_path_discovery_cutoff_time;
1072} mdi_vhci_config_t;
1073
1074/* vhc_flags */
1075#define	MDI_VHC_SINGLE_THREADED		0x0001	/* config single threaded */
1076#define	MDI_VHC_EXIT			0x0002	/* exit all config activity */
1077#define	MDI_VHC_VHCACHE_DIRTY		0x0004	/* cache dirty */
1078#define	MDI_VHC_VHCACHE_FLUSH_THREAD	0x0008	/* cache flush thead running */
1079#define	MDI_VHC_VHCACHE_FLUSH_ERROR	0x0010	/* failed to flush cache */
1080#define	MDI_VHC_READONLY_FS		0x0020	/* filesys is readonly */
1081
1082typedef struct mdi_phys_path {
1083	char			*phys_path;
1084	struct mdi_phys_path	*phys_path_next;
1085} mdi_phys_path_t;
1086
1087/*
1088 * Lookup tokens are used to cache the result of the vhci cache client lookup
1089 * operations (to reduce the number of real lookup operations).
1090 */
1091typedef struct mdi_vhcache_lookup_token {
1092	mdi_vhcache_client_t	*lt_cct;		/* vhcache client */
1093	int64_t			lt_cct_lookup_time;	/* last lookup time */
1094} mdi_vhcache_lookup_token_t;
1095
1096/* asynchronous configuration of client paths */
1097typedef struct mdi_async_client_config {
1098	char			*acc_ct_name;	/* client name */
1099	char			*acc_ct_addr;	/* client address */
1100	mdi_phys_path_t		*acc_phclient_path_list_head;	/* path head */
1101	mdi_vhcache_lookup_token_t acc_token;	/* lookup token */
1102	struct mdi_async_client_config *acc_next; /* next in vhci acc list */
1103} mdi_async_client_config_t;
1104
1105/*
1106 * vHCI driver instance registration/unregistration
1107 *
1108 * mdi_vhci_register() is called by a vHCI driver to register itself as the
1109 * manager of devices from a particular 'class'.  This should be called from
1110 * attach(9e).
1111 *
1112 * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI
1113 * instance from the framework.
1114 */
1115int		mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int);
1116int		mdi_vhci_unregister(dev_info_t *, int);
1117
1118/*
1119 * Utility functions
1120 */
1121int		mdi_phci_get_path_count(dev_info_t *);
1122dev_info_t	*mdi_phci_path2devinfo(dev_info_t *, caddr_t);
1123
1124
1125/*
1126 * Path Selection Functions:
1127 *
1128 * mdi_select_path() is called by a vHCI driver to select to which path an
1129 * I/O request should be routed.  The caller passes the 'buf' structure as
1130 * one of the parameters.  The mpxio framework uses the buf's contents to
1131 * maintain per path statistics (total I/O size / count pending).  If more
1132 * than one online path is available, the framework automatically selects
1133 * a suitable one.  If a failover operation is active for this client device
1134 * the call fails, returning MDI_BUSY.
1135 *
1136 * By default this function returns a suitable path in the 'online' state,
1137 * based on the current load balancing policy.  Currently we support
1138 * LOAD_BALANCE_NONE (Previously selected online path will continue to be
1139 * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths
1140 * will be selected in a round robin fashion).  The load balancing scheme
1141 * can be configured in the vHCI driver's configuration file (driver.conf).
1142 *
1143 * vHCI drivers may override this default behavior by specifying appropriate
1144 * flags.  If start_pip is specified (non NULL), it is used as the routine's
1145 * starting point; it starts walking from there to find the next appropriate
1146 * path.
1147 *
1148 * The following values for 'flags' are currently defined, the third argument
1149 * to mdi_select_path depends on the flags used.
1150 *
1151 *   <none>:				default, arg is pip
1152 *   MDI_SELECT_ONLINE_PATH:		select an ONLINE path preferred-first,
1153 *					arg is pip
1154 *   MDI_SELECT_STANDBY_PATH:		select a STANDBY path, arg is pip
1155 *   MDI_SELECT_USER_DISABLE_PATH:	select user disable for failover and
1156 *					auto_failback
1157 *   MDI_SELECT_PATH_INSTANCE:		select a specific path, arg is
1158 *					path instance
1159 *   MDI_SELECT_NO_PREFERRED:		select path without preferred-first
1160 *
1161 * The selected paths are returned in an mdi_hold_path() state (pi_ref_cnt),
1162 * caller should release the hold by calling mdi_rele_path() at the end of
1163 * operation.
1164 */
1165int		mdi_select_path(dev_info_t *, struct buf *, int,
1166		    void *, mdi_pathinfo_t **);
1167int		mdi_set_lb_policy(dev_info_t *, client_lb_t);
1168int		mdi_set_lb_region_size(dev_info_t *, int);
1169client_lb_t	mdi_get_lb_policy(dev_info_t *);
1170
1171/*
1172 * flags for mdi_select_path() routine
1173 */
1174#define	MDI_SELECT_ONLINE_PATH		0x0001
1175#define	MDI_SELECT_STANDBY_PATH		0x0002
1176#define	MDI_SELECT_USER_DISABLE_PATH	0x0004
1177#define	MDI_SELECT_PATH_INSTANCE	0x0008
1178#define	MDI_SELECT_NO_PREFERRED		0x0010
1179
1180/*
1181 * MDI client device utility functions
1182 */
1183int		mdi_client_get_path_count(dev_info_t *);
1184dev_info_t	*mdi_client_path2devinfo(dev_info_t *, caddr_t);
1185
1186/*
1187 * Failover:
1188 *
1189 * The vHCI driver calls mdi_failover() to initiate a failover operation.
1190 * mdi_failover() calls back into the vHCI driver's vo_failover()
1191 * entry point to perform the actual failover operation.  The reason
1192 * for requiring the vHCI driver to initiate failover by calling
1193 * mdi_failover(), instead of directly executing vo_failover() itself,
1194 * is to ensure that the mdi framework can keep track of the client
1195 * state properly.  Additionally, mdi_failover() provides as a
1196 * convenience the option of performing the failover operation
1197 * synchronously or asynchronously
1198 *
1199 * Upon successful completion of the failover operation, the paths that were
1200 * previously ONLINE will be in the STANDBY state, and the newly activated
1201 * paths will be in the ONLINE state.
1202 *
1203 * The flags modifier determines whether the activation is done synchronously
1204 */
1205int mdi_failover(dev_info_t *, dev_info_t *, int);
1206
1207/*
1208 * Client device failover mode of operation
1209 */
1210#define	MDI_FAILOVER_SYNC	1	/* Synchronous Failover		*/
1211#define	MDI_FAILOVER_ASYNC	2	/* Asynchronous Failover	*/
1212
1213/*
1214 * mdi_is_dev_supported: The pHCI driver bus_config implementation calls
1215 * mdi_is_dev_supported to determine if a child device should is supported as
1216 * a vHCI child (i.e. as a client). The method used to specify the child
1217 * device, via the cinfo argument, is by agreement between the pHCI and the
1218 * vHCI.  In the case of SCSA and scsi_vhci cinfo is a pointer to the pHCI
1219 * probe dev_info node, which is decorated with the device idenity information
1220 * necessary to determine scsi_vhci support.
1221 */
1222int mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo);
1223
1224/*
1225 * mdi_pathinfo node kstat functions.
1226 */
1227int mdi_pi_kstat_exists(mdi_pathinfo_t *);
1228int mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ks_name);
1229void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *);
1230
1231/*
1232 * mdi_pathinfo node extended state change functions.
1233 */
1234int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *);
1235int mdi_pi_get_preferred(mdi_pathinfo_t *);
1236
1237/*
1238 * mdi_pathinfo node member functions
1239 */
1240void *mdi_pi_get_client_private(mdi_pathinfo_t *);
1241void mdi_pi_set_client_private(mdi_pathinfo_t *, void *);
1242void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t);
1243void mdi_pi_set_preferred(mdi_pathinfo_t *, int);
1244
1245/* get/set vhci private data */
1246void *mdi_client_get_vhci_private(dev_info_t *);
1247void mdi_client_set_vhci_private(dev_info_t *, void *);
1248void *mdi_phci_get_vhci_private(dev_info_t *);
1249void mdi_phci_set_vhci_private(dev_info_t *, void *);
1250void *mdi_pi_get_vhci_private(mdi_pathinfo_t *);
1251void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *);
1252int mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp);
1253
1254/*
1255 * mdi_pathinfo Property utilities
1256 */
1257int mdi_prop_size(mdi_pathinfo_t *, size_t *);
1258int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t);
1259
1260/* obsolete interface, to be removed */
1261void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **);
1262int mdi_get_component_type(dev_info_t *);
1263
1264#endif	/* _KERNEL */
1265
1266#ifdef	__cplusplus
1267}
1268#endif
1269
1270#endif	/* _SYS_MDI_IMPLDEFS_H */
1271