1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/note.h>
28#include <sys/debug.h>
29#include <sys/types.h>
30#include <sys/varargs.h>
31#include <sys/errno.h>
32#include <sys/cred.h>
33#include <sys/dditypes.h>
34#include <sys/devops.h>
35#include <sys/modctl.h>
36#include <sys/poll.h>
37#include <sys/conf.h>
38#include <sys/ddi.h>
39#include <sys/sunddi.h>
40#include <sys/sunndi.h>
41#include <sys/ndi_impldefs.h>
42#include <sys/stat.h>
43#include <sys/kmem.h>
44#include <sys/vmem.h>
45#include <sys/disp.h>
46#include <sys/processor.h>
47#include <sys/cheetahregs.h>
48#include <sys/cpuvar.h>
49#include <sys/mem_config.h>
50#include <sys/ddi_impldefs.h>
51#include <sys/systm.h>
52#include <sys/machsystm.h>
53#include <sys/autoconf.h>
54#include <sys/cmn_err.h>
55#include <sys/sysmacros.h>
56#include <sys/x_call.h>
57#include <sys/promif.h>
58#include <sys/prom_plat.h>
59#include <sys/membar.h>
60#include <vm/seg_kmem.h>
61#include <sys/mem_cage.h>
62#include <sys/stack.h>
63#include <sys/archsystm.h>
64#include <vm/hat_sfmmu.h>
65#include <sys/pte.h>
66#include <sys/mmu.h>
67#include <sys/cpu_module.h>
68#include <sys/obpdefs.h>
69#include <sys/mboxsc.h>
70#include <sys/plat_ecc_dimm.h>
71
72#include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
73#include <sys/schpc.h>
74#include <sys/pci.h>
75
76#include <sys/starcat.h>
77#include <sys/cpu_sgnblk_defs.h>
78#include <sys/drmach.h>
79#include <sys/dr_util.h>
80#include <sys/dr_mbx.h>
81#include <sys/sc_gptwocfg.h>
82#include <sys/iosramreg.h>
83#include <sys/iosramio.h>
84#include <sys/iosramvar.h>
85#include <sys/axq.h>
86#include <sys/post/scat_dcd.h>
87#include <sys/kobj.h>
88#include <sys/taskq.h>
89#include <sys/cmp.h>
90#include <sys/sbd_ioctl.h>
91
92#include <sys/sysevent.h>
93#include <sys/sysevent/dr.h>
94#include <sys/sysevent/eventdefs.h>
95
96#include <sys/pci/pcisch.h>
97#include <sys/pci/pci_regs.h>
98
99#include <sys/ontrap.h>
100
101/* defined in ../ml/drmach.il.cpp */
102extern void		bcopy32_il(uint64_t, uint64_t);
103extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
104extern void		flush_dcache_il(void);
105extern void		flush_icache_il(void);
106extern void		flush_pcache_il(void);
107
108/* defined in ../ml/drmach_asm.s */
109extern uint64_t		lddmcdecode(uint64_t physaddr);
110extern uint64_t		lddsafconfig(void);
111
112/* XXX here until provided by sys/dman.h */
113extern int man_dr_attach(dev_info_t *);
114extern int man_dr_detach(dev_info_t *);
115
116#define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
117#define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
118#define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
119
120#define	DRMACH_SLICE_MASK		0x1Full
121#define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
122#define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
123
124/*
125 * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
126 * available address space and the usable address space for every slice.
127 * There must be a distinction between the available and usable do to a
128 * restriction imposed by CDC memory size.
129 */
130
131#define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
132#define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
133
134#define	DRMACH_MC_NBANKS		4
135
136#define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
137#define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
138
139#define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
140#define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
141	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
142
143/*
144 * The Cheetah's Safari Configuration Register and the Schizo's
145 * Safari Control/Status Register place the LPA base and bound fields in
146 * same bit locations with in their register word. This source code takes
147 * advantage of this by defining only one set of LPA encoding/decoding macros
148 * which are shared by various Cheetah and Schizo drmach routines.
149 */
150#define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
151#define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
152
153#define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
154#define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
155#define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
156#define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
157
158#define	DRMACH_L1_SET_LPA(b)		\
159	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
160
161#define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
162#define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
163
164/*
165 * Name properties for frequently accessed device nodes.
166 */
167#define	DRMACH_CPU_NAMEPROP		"cpu"
168#define	DRMACH_CMP_NAMEPROP		"cmp"
169#define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
170#define	DRMACH_PCI_NAMEPROP		"pci"
171
172/*
173 * Maximum value of processor Safari Timeout Log (TOL) field of
174 * Safari Config reg (7 secs).
175 */
176#define	DRMACH_SAF_TOL_MAX		7 * 1000000
177
178/*
179 * drmach_board_t flag definitions
180 */
181#define	DRMACH_NULL_PROC_LPA		0x1
182
183typedef struct {
184	uint32_t	reg_addr_hi;
185	uint32_t	reg_addr_lo;
186	uint32_t	reg_size_hi;
187	uint32_t	reg_size_lo;
188} drmach_reg_t;
189
190typedef struct {
191	struct drmach_node	*node;
192	void			*data;
193} drmach_node_walk_args_t;
194
195typedef struct drmach_node {
196	void		*here;
197
198	pnode_t		 (*get_dnode)(struct drmach_node *node);
199	int		 (*walk)(struct drmach_node *node, void *data,
200				int (*cb)(drmach_node_walk_args_t *args));
201	dev_info_t	*(*n_getdip)(struct drmach_node *node);
202	int		 (*n_getproplen)(struct drmach_node *node, char *name,
203				int *len);
204	int		 (*n_getprop)(struct drmach_node *node, char *name,
205				void *buf, int len);
206	int		 (*get_parent)(struct drmach_node *node,
207				struct drmach_node *pnode);
208} drmach_node_t;
209
210typedef struct {
211	int		 min_index;
212	int		 max_index;
213	int		 arr_sz;
214	drmachid_t	*arr;
215} drmach_array_t;
216
217typedef struct {
218	void		*isa;
219
220	void		 (*dispose)(drmachid_t);
221	sbd_error_t	*(*release)(drmachid_t);
222	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
223
224	char		 name[MAXNAMELEN];
225} drmach_common_t;
226
227struct drmach_board;
228typedef struct drmach_board drmach_board_t;
229
230typedef struct {
231	drmach_common_t	 cm;
232	const char	*type;
233	drmach_board_t	*bp;
234	drmach_node_t	*node;
235	int		 portid;
236	int		 unum;
237	int		 busy;
238	int		 powered;
239} drmach_device_t;
240
241typedef struct drmach_cpu {
242	drmach_device_t	 dev;
243	uint64_t	 scr_pa;
244	processorid_t	 cpuid;
245	int		 coreid;
246} drmach_cpu_t;
247
248typedef struct drmach_mem {
249	drmach_device_t	 dev;
250	struct drmach_mem *next;
251	uint64_t	 nbytes;
252	uint64_t	 madr_pa;
253} drmach_mem_t;
254
255typedef struct drmach_io {
256	drmach_device_t	 dev;
257	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
258} drmach_io_t;
259
260struct drmach_board {
261	drmach_common_t	 cm;
262	int		 bnum;
263	int		 assigned;
264	int		 powered;
265	int		 connected;
266	int		 empty;
267	int		 cond;
268	uint_t		 cpu_impl;
269	uint_t		 flags;
270	drmach_node_t	*tree;
271	drmach_array_t	*devices;
272	drmach_mem_t	*mem;
273	uint64_t	 stardrb_offset;
274	char		 type[BD_TYPELEN];
275};
276
277typedef struct {
278	int		 flags;
279	drmach_device_t	*dp;
280	sbd_error_t	*err;
281	dev_info_t	*fdip;
282} drmach_config_args_t;
283
284typedef struct {
285	drmach_board_t	*obj;
286	int		 ndevs;
287	void		*a;
288	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
289	sbd_error_t	*err;
290} drmach_board_cb_data_t;
291
292typedef struct drmach_casmslot {
293	int	valid;
294	int	slice;
295} drmach_casmslot_t;
296
297typedef enum {
298	DRMACH_CR_OK,
299	DRMACH_CR_MC_IDLE_ERR,
300	DRMACH_CR_IOPAUSE_ERR,
301	DRMACH_CR_ONTRAP_ERR
302} drmach_cr_err_t;
303
304typedef struct {
305	void		*isa;
306	caddr_t		 data;
307	drmach_mem_t	*s_mp;
308	drmach_mem_t	*t_mp;
309	struct memlist	*c_ml;
310	uint64_t	 s_copybasepa;
311	uint64_t	 t_copybasepa;
312	drmach_cr_err_t	 ecode;
313	void		*earg;
314} drmach_copy_rename_t;
315
316/*
317 * The following global is read as a boolean value, non-zero is true.
318 * If zero, DR copy-rename and cpu poweron will not set the processor
319 * LPA settings (CBASE, CBND of Safari config register) to correspond
320 * to the current memory slice map. LPAs of processors present at boot
321 * will remain as programmed by POST. LPAs of processors on boards added
322 * by DR will remain NULL, as programmed by POST. This can be used to
323 * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
324 * POST in the LDCD (and copied to the GDCD by SMS).
325 *
326 * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
327 * to Schizo device LPAs. These are always set by DR.
328 */
329static int		 drmach_reprogram_lpa = 1;
330
331/*
332 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
333 * can fail to receive an XIR. To workaround this issue until a hardware
334 * fix is implemented, we will exclude the selection of these CPUs.
335 * Setting this to 0 will allow their selection again.
336 */
337static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
338
339static int		 drmach_initialized;
340static drmach_array_t	*drmach_boards;
341
342static int		 drmach_cpu_delay = 1000;
343static int		 drmach_cpu_ntries = 50000;
344
345static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
346static kmutex_t		 drmach_slice_table_lock;
347
348tte_t			 drmach_cpu_sram_tte[NCPU];
349caddr_t			 drmach_cpu_sram_va;
350
351/*
352 * Setting to non-zero will enable delay before all disconnect ops.
353 */
354static int		 drmach_unclaim_delay_all;
355/*
356 * Default delay is slightly greater than the max processor Safari timeout.
357 * This delay is intended to ensure the outstanding Safari activity has
358 * retired on this board prior to a board disconnect.
359 */
360static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
361
362/*
363 * By default, DR of non-Panther procs is not allowed into a Panther
364 * domain with large page sizes enabled.  Setting this to 0 will remove
365 * the restriction.
366 */
367static int		 drmach_large_page_restriction = 1;
368
369/*
370 * Used to pass updated LPA values to procs.
371 * Protocol is to clear the array before use.
372 */
373volatile uchar_t	*drmach_xt_mb;
374volatile uint64_t	 drmach_xt_ready;
375static kmutex_t		 drmach_xt_mb_lock;
376static int		 drmach_xt_mb_size;
377
378uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
379static kmutex_t		 drmach_bus_sync_lock;
380
381static sbd_error_t	*drmach_device_new(drmach_node_t *,
382				drmach_board_t *, int, drmachid_t *);
383static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
384static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
385static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
386static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
387
388static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
389static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
390				char *name, void *buf, int len);
391static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
392				char *name, int *len);
393
394static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
395static int		 drmach_node_obp_get_prop(drmach_node_t *np,
396				char *name, void *buf, int len);
397static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
398				char *name, int *len);
399
400static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
401				caddr_t obufp, int olen,
402				caddr_t ibufp, int ilen);
403
404sbd_error_t		*drmach_io_post_attach(drmachid_t id);
405sbd_error_t		*drmach_io_post_release(drmachid_t id);
406
407static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
408				drmach_device_t **dpp, cpu_flag_t *oflags);
409static int		drmach_iocage_cpu_return(drmach_device_t *dp,
410				cpu_flag_t oflags);
411static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
412void			drmach_iocage_mem_scrub(uint64_t nbytes);
413
414static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
415
416static void		drmach_slot1_lpa_set(drmach_board_t *bp);
417
418static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
419static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
420
421static void		 drmach_bus_sync_list_update(void);
422static void		 drmach_slice_table_update(drmach_board_t *, int);
423static int		 drmach_portid2bnum(int);
424
425static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
426static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
427
428static int		drmach_panther_boards(void);
429
430static int		drmach_name2type_idx(char *);
431
432#ifdef DEBUG
433
434#define	DRMACH_PR		if (drmach_debug) printf
435#define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
436int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
437#else
438
439#define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
440#define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
441#endif /* DEBUG */
442
443#define	DRMACH_OBJ(id)		((drmach_common_t *)id)
444
445#define	DRMACH_IS_BOARD_ID(id)	\
446	((id != 0) &&		\
447	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
448
449#define	DRMACH_IS_CPU_ID(id)	\
450	((id != 0) &&		\
451	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
452
453#define	DRMACH_IS_MEM_ID(id)	\
454	((id != 0) &&		\
455	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
456
457#define	DRMACH_IS_IO_ID(id)	\
458	((id != 0) &&		\
459	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
460
461#define	DRMACH_IS_DEVICE_ID(id)					\
462	((id != 0) &&						\
463	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
464	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
465	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
466
467#define	DRMACH_IS_ID(id)					\
468	((id != 0) &&						\
469	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
470	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
471	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
472	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
473
474#define	DRMACH_INTERNAL_ERROR() \
475	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
476static char		*drmach_ie_fmt = "drmach.c %d";
477
478static struct {
479	const char	 *name;
480	const char	 *type;
481	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
482} drmach_name2type[] = {
483	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
484	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
485	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
486	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
487	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
488	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
489	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
490};
491
492/*
493 * drmach autoconfiguration data structures and interfaces
494 */
495
496extern struct mod_ops mod_miscops;
497
498static struct modlmisc modlmisc = {
499	&mod_miscops,
500	"Sun Fire 15000 DR"
501};
502
503static struct modlinkage modlinkage = {
504	MODREV_1,
505	(void *)&modlmisc,
506	NULL
507};
508
509/*
510 * drmach_boards_rwlock is used to synchronize read/write
511 * access to drmach_boards array between status and board lookup
512 * as READERS, and assign, and unassign threads as WRITERS.
513 */
514static krwlock_t	drmach_boards_rwlock;
515
516static kmutex_t		drmach_i_lock;
517static kmutex_t		drmach_iocage_lock;
518static kcondvar_t 	drmach_iocage_cv;
519static int		drmach_iocage_is_busy = 0;
520uint64_t		drmach_iocage_paddr;
521static caddr_t		drmach_iocage_vaddr;
522static int		drmach_iocage_size = 0;
523static int		drmach_is_cheetah = -1;
524
525int
526_init(void)
527{
528	int	err;
529
530	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
531	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
532	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
533	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
534	    drmach_xt_mb_size, VM_SLEEP);
535	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
536	if ((err = mod_install(&modlinkage)) != 0) {
537		mutex_destroy(&drmach_i_lock);
538		rw_destroy(&drmach_boards_rwlock);
539		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
540		    drmach_xt_mb_size);
541	}
542
543	return (err);
544}
545
546int
547_fini(void)
548{
549	static void	drmach_fini(void);
550	int		err;
551
552	if ((err = mod_remove(&modlinkage)) == 0)
553		drmach_fini();
554
555	return (err);
556}
557
558int
559_info(struct modinfo *modinfop)
560{
561	return (mod_info(&modlinkage, modinfop));
562}
563
564/*
565 * drmach_node_* routines serve the purpose of separating the
566 * rest of the code from the device tree and OBP.  This is necessary
567 * because of In-Kernel-Probing.  Devices probed after stod, are probed
568 * by the in-kernel-prober, not OBP.  These devices, therefore, do not
569 * have dnode ids.
570 */
571
572static int
573drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
574{
575	pnode_t		nodeid;
576	static char	*fn = "drmach_node_obp_get_parent";
577
578	nodeid = np->get_dnode(np);
579	if (nodeid == OBP_NONODE) {
580		cmn_err(CE_WARN, "%s: invalid dnode", fn);
581		return (-1);
582	}
583
584	bcopy(np, pp, sizeof (drmach_node_t));
585
586	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
587	if (pp->here == OBP_NONODE) {
588		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
589		return (-1);
590	}
591
592	return (0);
593}
594
595static pnode_t
596drmach_node_obp_get_dnode(drmach_node_t *np)
597{
598	return ((pnode_t)(uintptr_t)np->here);
599}
600
601typedef struct {
602	drmach_node_walk_args_t	*nwargs;
603	int 			(*cb)(drmach_node_walk_args_t *args);
604	int			err;
605} drmach_node_ddi_walk_args_t;
606
607int
608drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
609{
610	drmach_node_ddi_walk_args_t	*nargs;
611
612	nargs = (drmach_node_ddi_walk_args_t *)arg;
613
614	/*
615	 * dip doesn't have to be held here as we are called
616	 * from ddi_walk_devs() which holds the dip.
617	 */
618	nargs->nwargs->node->here = (void *)dip;
619
620	nargs->err = nargs->cb(nargs->nwargs);
621
622	/*
623	 * Set "here" to NULL so that unheld dip is not accessible
624	 * outside ddi_walk_devs()
625	 */
626	nargs->nwargs->node->here = NULL;
627
628	if (nargs->err)
629		return (DDI_WALK_TERMINATE);
630	else
631		return (DDI_WALK_CONTINUE);
632}
633
634static int
635drmach_node_ddi_walk(drmach_node_t *np, void *data,
636		int (*cb)(drmach_node_walk_args_t *args))
637{
638	drmach_node_walk_args_t		args;
639	drmach_node_ddi_walk_args_t	nargs;
640
641	/* initialized args structure for callback */
642	args.node = np;
643	args.data = data;
644
645	nargs.nwargs = &args;
646	nargs.cb = cb;
647	nargs.err = 0;
648
649	/*
650	 * Root node doesn't have to be held in any way.
651	 */
652	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
653
654	return (nargs.err);
655}
656
657static int
658drmach_node_obp_walk(drmach_node_t *np, void *data,
659		int (*cb)(drmach_node_walk_args_t *args))
660{
661	pnode_t			nodeid;
662	int			rv;
663	drmach_node_walk_args_t	args;
664
665	/* initialized args structure for callback */
666	args.node = np;
667	args.data = data;
668
669	nodeid = prom_childnode(prom_rootnode());
670
671	/* save our new position within the tree */
672	np->here = (void *)(uintptr_t)nodeid;
673
674	rv = 0;
675	while (nodeid != OBP_NONODE) {
676
677		pnode_t child;
678
679		rv = (*cb)(&args);
680		if (rv)
681			break;
682
683		child = prom_childnode(nodeid);
684		np->here = (void *)(uintptr_t)child;
685
686		while (child != OBP_NONODE) {
687			rv = (*cb)(&args);
688			if (rv)
689				break;
690
691			child = prom_nextnode(child);
692			np->here = (void *)(uintptr_t)child;
693		}
694
695		nodeid = prom_nextnode(nodeid);
696
697		/* save our new position within the tree */
698		np->here = (void *)(uintptr_t)nodeid;
699	}
700
701	return (rv);
702}
703
704static int
705drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
706{
707	dev_info_t	*ndip;
708	static char	*fn = "drmach_node_ddi_get_parent";
709
710	ndip = np->n_getdip(np);
711	if (ndip == NULL) {
712		cmn_err(CE_WARN, "%s: NULL dip", fn);
713		return (-1);
714	}
715
716	bcopy(np, pp, sizeof (drmach_node_t));
717
718	pp->here = (void *)ddi_get_parent(ndip);
719	if (pp->here == NULL) {
720		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
721		return (-1);
722	}
723
724	return (0);
725}
726
727/*ARGSUSED*/
728static pnode_t
729drmach_node_ddi_get_dnode(drmach_node_t *np)
730{
731	return ((pnode_t)NULL);
732}
733
734static drmach_node_t *
735drmach_node_new(void)
736{
737	drmach_node_t *np;
738
739	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
740
741	if (drmach_initialized) {
742		np->get_dnode = drmach_node_ddi_get_dnode;
743		np->walk = drmach_node_ddi_walk;
744		np->n_getdip = drmach_node_ddi_get_dip;
745		np->n_getproplen = drmach_node_ddi_get_proplen;
746		np->n_getprop = drmach_node_ddi_get_prop;
747		np->get_parent = drmach_node_ddi_get_parent;
748	} else {
749		np->get_dnode = drmach_node_obp_get_dnode;
750		np->walk = drmach_node_obp_walk;
751		np->n_getdip = drmach_node_obp_get_dip;
752		np->n_getproplen = drmach_node_obp_get_proplen;
753		np->n_getprop = drmach_node_obp_get_prop;
754		np->get_parent = drmach_node_obp_get_parent;
755	}
756
757	return (np);
758}
759
760static void
761drmach_node_dispose(drmach_node_t *np)
762{
763	kmem_free(np, sizeof (*np));
764}
765
766/*
767 * Check if a CPU node is part of a CMP.
768 */
769static int
770drmach_is_cmp_child(dev_info_t *dip)
771{
772	dev_info_t *pdip;
773
774	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
775		return (0);
776	}
777
778	pdip = ddi_get_parent(dip);
779
780	ASSERT(pdip);
781
782	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
783		return (1);
784	}
785
786	return (0);
787}
788
789static dev_info_t *
790drmach_node_obp_get_dip(drmach_node_t *np)
791{
792	pnode_t		nodeid;
793	dev_info_t	*dip;
794
795	nodeid = np->get_dnode(np);
796	if (nodeid == OBP_NONODE)
797		return (NULL);
798
799	dip = e_ddi_nodeid_to_dip(nodeid);
800	if (dip) {
801		/*
802		 * The branch rooted at dip will have been previously
803		 * held, or it will be the child of a CMP. In either
804		 * case, the hold acquired in e_ddi_nodeid_to_dip()
805		 * is not needed.
806		 */
807		ddi_release_devi(dip);
808		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
809	}
810
811	return (dip);
812}
813
814static dev_info_t *
815drmach_node_ddi_get_dip(drmach_node_t *np)
816{
817	return ((dev_info_t *)np->here);
818}
819
820static int
821drmach_node_walk(drmach_node_t *np, void *param,
822		int (*cb)(drmach_node_walk_args_t *args))
823{
824	return (np->walk(np, param, cb));
825}
826
827static int
828drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
829{
830	int		rv = 0;
831	dev_info_t	*ndip;
832	static char	*fn = "drmach_node_ddi_get_prop";
833
834	ndip = np->n_getdip(np);
835	if (ndip == NULL) {
836		cmn_err(CE_WARN, "%s: NULL dip", fn);
837		rv = -1;
838	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
839	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
840	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
841		rv = -1;
842	}
843
844	return (rv);
845}
846
847/* ARGSUSED */
848static int
849drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
850{
851	int		rv = 0;
852	pnode_t		nodeid;
853	static char	*fn = "drmach_node_obp_get_prop";
854
855	nodeid = np->get_dnode(np);
856	if (nodeid == OBP_NONODE) {
857		cmn_err(CE_WARN, "%s: invalid dnode", fn);
858		rv = -1;
859	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
860		rv = -1;
861	} else {
862		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
863	}
864
865	return (rv);
866}
867
868static int
869drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
870{
871	int		rv = 0;
872	dev_info_t	*ndip;
873
874	ndip = np->n_getdip(np);
875	if (ndip == NULL) {
876		rv = -1;
877	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
878	    name, len) != DDI_PROP_SUCCESS) {
879		rv = -1;
880	}
881
882	return (rv);
883}
884
885static int
886drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
887{
888	pnode_t	 nodeid;
889	int	 rv;
890
891	nodeid = np->get_dnode(np);
892	if (nodeid == OBP_NONODE)
893		rv = -1;
894	else {
895		*len = prom_getproplen(nodeid, (caddr_t)name);
896		rv = (*len < 0 ? -1 : 0);
897	}
898
899	return (rv);
900}
901
902static drmachid_t
903drmach_node_dup(drmach_node_t *np)
904{
905	drmach_node_t *dup;
906
907	dup = drmach_node_new();
908	dup->here = np->here;
909	dup->get_dnode = np->get_dnode;
910	dup->walk = np->walk;
911	dup->n_getdip = np->n_getdip;
912	dup->n_getproplen = np->n_getproplen;
913	dup->n_getprop = np->n_getprop;
914	dup->get_parent = np->get_parent;
915
916	return (dup);
917}
918
919/*
920 * drmach_array provides convenient array construction, access,
921 * bounds checking and array destruction logic.
922 */
923
924static drmach_array_t *
925drmach_array_new(int min_index, int max_index)
926{
927	drmach_array_t *arr;
928
929	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
930
931	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
932	if (arr->arr_sz > 0) {
933		arr->min_index = min_index;
934		arr->max_index = max_index;
935
936		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
937		return (arr);
938	} else {
939		kmem_free(arr, sizeof (*arr));
940		return (0);
941	}
942}
943
944static int
945drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
946{
947	if (idx < arr->min_index || idx > arr->max_index)
948		return (-1);
949	else {
950		arr->arr[idx - arr->min_index] = val;
951		return (0);
952	}
953	/*NOTREACHED*/
954}
955
956static int
957drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
958{
959	if (idx < arr->min_index || idx > arr->max_index)
960		return (-1);
961	else {
962		*val = arr->arr[idx - arr->min_index];
963		return (0);
964	}
965	/*NOTREACHED*/
966}
967
968static int
969drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
970{
971	int rv;
972
973	*idx = arr->min_index;
974	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
975		*idx += 1;
976
977	return (rv);
978}
979
980static int
981drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
982{
983	int rv;
984
985	*idx += 1;
986	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
987		*idx += 1;
988
989	return (rv);
990}
991
992static void
993drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
994{
995	drmachid_t	val;
996	int		idx;
997	int		rv;
998
999	rv = drmach_array_first(arr, &idx, &val);
1000	while (rv == 0) {
1001		(*disposer)(val);
1002
1003		/* clear the array entry */
1004		rv = drmach_array_set(arr, idx, NULL);
1005		ASSERT(rv == 0);
1006
1007		rv = drmach_array_next(arr, &idx, &val);
1008	}
1009
1010	kmem_free(arr->arr, arr->arr_sz);
1011	kmem_free(arr, sizeof (*arr));
1012}
1013
1014
1015static gdcd_t *
1016drmach_gdcd_new()
1017{
1018	gdcd_t *gdcd;
1019
1020	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1021
1022	/* read the gdcd, bail if magic or ver #s are not what is expected */
1023	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1024bail:
1025		kmem_free(gdcd, sizeof (gdcd_t));
1026		return (NULL);
1027	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1028		goto bail;
1029	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1030		goto bail;
1031	}
1032
1033	return (gdcd);
1034}
1035
1036static void
1037drmach_gdcd_dispose(gdcd_t *gdcd)
1038{
1039	kmem_free(gdcd, sizeof (gdcd_t));
1040}
1041
1042/*ARGSUSED*/
1043sbd_error_t *
1044drmach_configure(drmachid_t id, int flags)
1045{
1046	drmach_device_t	*dp;
1047	dev_info_t	*rdip;
1048	sbd_error_t	*err = NULL;
1049
1050	/*
1051	 * On Starcat, there is no CPU driver, so it is
1052	 * not necessary to configure any CPU nodes.
1053	 */
1054	if (DRMACH_IS_CPU_ID(id)) {
1055		return (NULL);
1056	}
1057
1058	for (; id; ) {
1059		dev_info_t	*fdip = NULL;
1060
1061		if (!DRMACH_IS_DEVICE_ID(id))
1062			return (drerr_new(0, ESTC_INAPPROP, NULL));
1063		dp = id;
1064
1065		rdip = dp->node->n_getdip(dp->node);
1066
1067		/*
1068		 * We held this branch earlier, so at a minimum its
1069		 * root should still be present in the device tree.
1070		 */
1071		ASSERT(rdip);
1072
1073		DRMACH_PR("drmach_configure: configuring DDI branch");
1074
1075		ASSERT(e_ddi_branch_held(rdip));
1076		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1077			if (err == NULL) {
1078				/*
1079				 * Record first failure but don't stop
1080				 */
1081				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1082				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1083
1084				(void) ddi_pathname(dip, path);
1085				err = drerr_new(1, ESTC_DRVFAIL, path);
1086
1087				kmem_free(path, MAXPATHLEN);
1088			}
1089
1090			/*
1091			 * If non-NULL, fdip is returned held and must be
1092			 * released.
1093			 */
1094			if (fdip != NULL) {
1095				ddi_release_devi(fdip);
1096			}
1097		}
1098
1099		if (DRMACH_IS_MEM_ID(id)) {
1100			drmach_mem_t	*mp = id;
1101			id = mp->next;
1102		} else {
1103			id = NULL;
1104		}
1105	}
1106
1107	return (err);
1108}
1109
1110static sbd_error_t *
1111drmach_device_new(drmach_node_t *node,
1112	drmach_board_t *bp, int portid, drmachid_t *idp)
1113{
1114	int		i, rv, device_id, unum;
1115	char		name[OBP_MAXDRVNAME];
1116	drmach_device_t	proto;
1117
1118	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1119	if (rv) {
1120		sbd_error_t *err;
1121
1122		/* every node is expected to have a name */
1123		err = drerr_new(1, ESTC_GETPROP,
1124		    "dip: 0x%p: property %s",
1125		    node->n_getdip(node), OBP_NAME);
1126
1127		return (err);
1128	}
1129
1130	i = drmach_name2type_idx(name);
1131
1132	if (i < 0 || strcmp(name, "cmp") == 0) {
1133		/*
1134		 * Not a node of interest to dr - including "cmp",
1135		 * but it is in drmach_name2type[], which lets gptwocfg
1136		 * driver to check if node is OBP created.
1137		 */
1138		*idp = (drmachid_t)0;
1139		return (NULL);
1140	}
1141
1142	/*
1143	 * Derive a best-guess unit number from the portid value.
1144	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1145	 * will overwrite the prototype unum value with one that is more
1146	 * appropriate for the device.
1147	 */
1148	device_id = portid & 0x1f;
1149	if (device_id < 4)
1150		unum = device_id;
1151	else if (device_id == 8) {
1152		unum = 0;
1153	} else if (device_id == 9) {
1154		unum = 1;
1155	} else if (device_id == 0x1c) {
1156		unum = 0;
1157	} else if (device_id == 0x1d) {
1158		unum = 1;
1159	} else {
1160		return (DRMACH_INTERNAL_ERROR());
1161	}
1162
1163	bzero(&proto, sizeof (proto));
1164	proto.type = drmach_name2type[i].type;
1165	proto.bp = bp;
1166	proto.node = node;
1167	proto.portid = portid;
1168	proto.unum = unum;
1169
1170	return (drmach_name2type[i].new(&proto, idp));
1171}
1172
1173static void
1174drmach_device_dispose(drmachid_t id)
1175{
1176	drmach_device_t *self = id;
1177
1178	self->cm.dispose(id);
1179}
1180
1181static drmach_board_t *
1182drmach_board_new(int bnum)
1183{
1184	static sbd_error_t *drmach_board_release(drmachid_t);
1185	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1186
1187	drmach_board_t	*bp;
1188
1189	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1190
1191	bp->cm.isa = (void *)drmach_board_new;
1192	bp->cm.release = drmach_board_release;
1193	bp->cm.status = drmach_board_status;
1194
1195	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1196
1197	bp->bnum = bnum;
1198	bp->devices = NULL;
1199	bp->tree = drmach_node_new();
1200
1201	(void) drmach_array_set(drmach_boards, bnum, bp);
1202	return (bp);
1203}
1204
1205static void
1206drmach_board_dispose(drmachid_t id)
1207{
1208	drmach_board_t *bp;
1209
1210	ASSERT(DRMACH_IS_BOARD_ID(id));
1211	bp = id;
1212
1213	if (bp->tree)
1214		drmach_node_dispose(bp->tree);
1215
1216	if (bp->devices)
1217		drmach_array_dispose(bp->devices, drmach_device_dispose);
1218
1219	kmem_free(bp, sizeof (*bp));
1220}
1221
1222static sbd_error_t *
1223drmach_board_status(drmachid_t id, drmach_status_t *stat)
1224{
1225	sbd_error_t	*err = NULL;
1226	drmach_board_t	*bp;
1227	caddr_t		obufp;
1228	dr_showboard_t	shb;
1229
1230	if (!DRMACH_IS_BOARD_ID(id))
1231		return (drerr_new(0, ESTC_INAPPROP, NULL));
1232
1233	bp = id;
1234
1235	/*
1236	 * we need to know if the board's connected before
1237	 * issuing a showboard message.  If it's connected, we just
1238	 * reply with status composed of cached info
1239	 */
1240
1241	if (!bp->connected) {
1242		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1243		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1244		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1245		    sizeof (dr_showboard_t));
1246
1247		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1248		if (err)
1249			return (err);
1250
1251		bp->connected = (shb.bd_assigned && shb.bd_active);
1252		(void) strncpy(bp->type, shb.board_type, sizeof (bp->type));
1253		stat->assigned = bp->assigned = shb.bd_assigned;
1254		stat->powered = bp->powered = shb.power_on;
1255		stat->empty = bp->empty = shb.slot_empty;
1256
1257		switch (shb.test_status) {
1258			case DR_TEST_STATUS_UNKNOWN:
1259			case DR_TEST_STATUS_IPOST:
1260			case DR_TEST_STATUS_ABORTED:
1261				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1262				break;
1263			case DR_TEST_STATUS_PASSED:
1264				stat->cond = bp->cond = SBD_COND_OK;
1265				break;
1266			case DR_TEST_STATUS_FAILED:
1267				stat->cond = bp->cond = SBD_COND_FAILED;
1268				break;
1269			default:
1270				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1271				DRMACH_PR("Unknown test status=0x%x from SC\n",
1272				    shb.test_status);
1273				break;
1274
1275		}
1276
1277		(void) strncpy(stat->type, shb.board_type, sizeof (stat->type));
1278		(void) snprintf(stat->info, sizeof (stat->info),
1279		    "Test Level=%d", shb.test_level);
1280	} else {
1281		stat->assigned = bp->assigned;
1282		stat->powered = bp->powered;
1283		stat->empty = bp->empty;
1284		stat->cond = bp->cond;
1285		(void) strncpy(stat->type, bp->type, sizeof (stat->type));
1286	}
1287
1288	stat->busy = 0;			/* assume not busy */
1289	stat->configured = 0;		/* assume not configured */
1290	if (bp->devices) {
1291		int		 rv;
1292		int		 d_idx;
1293		drmachid_t	 d_id;
1294
1295		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1296		while (rv == 0) {
1297			drmach_status_t	d_stat;
1298
1299			err = drmach_i_status(d_id, &d_stat);
1300			if (err)
1301				break;
1302
1303			stat->busy |= d_stat.busy;
1304			stat->configured |= d_stat.configured;
1305
1306			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1307		}
1308	}
1309
1310	return (err);
1311}
1312
1313typedef struct drmach_msglist {
1314	kcondvar_t		s_cv; 		/* condvar for sending msg */
1315	kmutex_t		s_lock;		/* mutex for sending */
1316	kcondvar_t		g_cv;		/* condvar for getting reply */
1317	kmutex_t		g_lock;		/* mutex for getting reply */
1318	struct drmach_msglist	*prev;		/* link to previous entry */
1319	struct drmach_msglist	*next;		/* link to next entry */
1320	struct drmach_msglist	*link;		/* link to related entry */
1321	caddr_t			o_buf;		/* address of output buffer */
1322	caddr_t			i_buf; 		/* address of input buffer */
1323	uint32_t		o_buflen;	/* output buffer length */
1324	uint32_t		i_buflen;	/* input buffer length */
1325	uint32_t		msgid;		/* message identifier */
1326	int			o_nretry;	/* number of sending retries */
1327	int			f_error;	/* mailbox framework error */
1328	uint8_t			e_code;		/* error code returned by SC */
1329	uint8_t			p_flag	:1,	/* successfully putmsg */
1330				m_reply	:1,	/* msg reply received */
1331				unused	:6;
1332} drmach_msglist_t;
1333
1334kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1335kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1336kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1337drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1338drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1339uint32_t		drmach_msgid;		/* current message id */
1340kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1341volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1342kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1343kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1344kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1345volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1346int			drmach_mbox_istate;	/* mailbox init state */
1347int			drmach_mbox_iflag;	/* set if init'd with SC */
1348int			drmach_mbox_ipending;	/* set if reinit scheduled */
1349
1350/*
1351 * Timeout values (in seconds) used when waiting for replies (from the SC) to
1352 * requests that we sent.  Since we only receive boardevent messages, and they
1353 * are events rather than replies, there is no boardevent timeout.
1354 */
1355int	drmach_to_mbxinit	= 60;		/* 1 minute */
1356int	drmach_to_assign	= 60;		/* 1 minute */
1357int	drmach_to_unassign	= 60;		/* 1 minute */
1358int	drmach_to_claim		= 3600;		/* 1 hour */
1359int	drmach_to_unclaim	= 3600;		/* 1 hour */
1360int	drmach_to_poweron	= 480;		/* 8 minutes */
1361int	drmach_to_poweroff	= 480;		/* 8 minutes */
1362int	drmach_to_testboard	= 43200;	/* 12 hours */
1363int	drmach_to_aborttest	= 180;		/* 3 minutes */
1364int	drmach_to_showboard	= 180;		/* 3 minutes */
1365int	drmach_to_unconfig	= 180;		/* 3 minutes */
1366
1367/*
1368 * Delay (in seconds) used after receiving a non-transient error indication from
1369 * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1370 */
1371int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1372
1373/*
1374 * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1375 */
1376clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1377clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1378
1379/*
1380 * Normally, drmach_to_putmsg is set dynamically during initialization in
1381 * drmach_mbox_init.  This has the potentially undesirable side effect of
1382 * clobbering any value that might have been set in /etc/system.  To prevent
1383 * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1384 * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1385 */
1386int	drmach_use_tuned_putmsg_to	= 0;
1387
1388
1389/* maximum conceivable message size for future mailbox protocol versions */
1390#define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1391
1392/*ARGSUSED*/
1393void
1394drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1395{
1396	int		i, j;
1397	dr_memregs_t	*memregs;
1398	dr_proto_hdr_t	*php = &mbp->p_hdr;
1399	dr_msg_t	*mp = &mbp->msgdata;
1400
1401#ifdef DEBUG
1402	switch (php->command) {
1403		case DRMSG_BOARDEVENT:
1404			if (dir) {
1405				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1406			} else {
1407				DRMACH_PR("BOARDEVENT received:\n");
1408				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1409				    mp->dm_be.initialized,
1410				    mp->dm_be.board_insertion,
1411				    mp->dm_be.board_removal,
1412				    mp->dm_be.slot_assign);
1413				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1414				    mp->dm_be.slot_unassign,
1415				    mp->dm_be.slot_avail,
1416				    mp->dm_be.slot_unavail);
1417			}
1418			break;
1419		case DRMSG_MBOX_INIT:
1420			if (dir) {
1421				DRMACH_PR("MBOX_INIT Request:\n");
1422			} else {
1423				DRMACH_PR("MBOX_INIT Reply:\n");
1424			}
1425			break;
1426		case DRMSG_ASSIGN:
1427			if (dir) {
1428				DRMACH_PR("ASSIGN Request:\n");
1429			} else {
1430				DRMACH_PR("ASSIGN Reply:\n");
1431			}
1432			break;
1433		case DRMSG_UNASSIGN:
1434			if (dir) {
1435				DRMACH_PR("UNASSIGN Request:\n");
1436			} else {
1437				DRMACH_PR("UNASSIGN Reply:\n");
1438			}
1439			break;
1440		case DRMSG_CLAIM:
1441			if (!dir) {
1442				DRMACH_PR("CLAIM Reply:\n");
1443				break;
1444			}
1445
1446			DRMACH_PR("CLAIM Request:\n");
1447			for (i = 0; i < 18; ++i) {
1448				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1449				    mp->dm_cr.mem_slice[i].valid,
1450				    mp->dm_cr.mem_slice[i].slice);
1451				memregs = &(mp->dm_cr.mem_regs[i]);
1452				for (j = 0; j < S0_LPORT_COUNT; j++) {
1453					DRMACH_PR("  MC %2d: "
1454					    "MADR[%d] = 0x%lx, "
1455					    "MADR[%d] = 0x%lx\n", j,
1456					    0, DRMACH_MCREG_TO_U64(
1457					    memregs->madr[j][0]),
1458					    1, DRMACH_MCREG_TO_U64(
1459					    memregs->madr[j][1]));
1460					DRMACH_PR("       : "
1461					    "MADR[%d] = 0x%lx, "
1462					    "MADR[%d] = 0x%lx\n",
1463					    2, DRMACH_MCREG_TO_U64(
1464					    memregs->madr[j][2]),
1465					    3, DRMACH_MCREG_TO_U64(
1466					    memregs->madr[j][3]));
1467				}
1468			}
1469			break;
1470		case DRMSG_UNCLAIM:
1471			if (!dir) {
1472				DRMACH_PR("UNCLAIM Reply:\n");
1473				break;
1474			}
1475
1476			DRMACH_PR("UNCLAIM Request:\n");
1477			for (i = 0; i < 18; ++i) {
1478				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1479				    mp->dm_ur.mem_slice[i].valid,
1480				    mp->dm_ur.mem_slice[i].slice);
1481				memregs = &(mp->dm_ur.mem_regs[i]);
1482				for (j = 0; j < S0_LPORT_COUNT; j++) {
1483					DRMACH_PR("  MC %2d: "
1484					    "MADR[%d] = 0x%lx, "
1485					    "MADR[%d] = 0x%lx\n", j,
1486					    0, DRMACH_MCREG_TO_U64(
1487					    memregs->madr[j][0]),
1488					    1, DRMACH_MCREG_TO_U64(
1489					    memregs->madr[j][1]));
1490					DRMACH_PR("       : "
1491					    "MADR[%d] = 0x%lx, "
1492					    "MADR[%d] = 0x%lx\n",
1493					    2, DRMACH_MCREG_TO_U64(
1494					    memregs->madr[j][2]),
1495					    3, DRMACH_MCREG_TO_U64(
1496					    memregs->madr[j][3]));
1497				}
1498			}
1499			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1500			break;
1501		case DRMSG_UNCONFIG:
1502			if (!dir) {
1503				DRMACH_PR("UNCONFIG Reply:\n");
1504				break;
1505			}
1506
1507			DRMACH_PR("UNCONFIG Request:\n");
1508			for (i = 0; i < 18; ++i) {
1509				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1510				    mp->dm_uc.mem_slice[i].valid,
1511				    mp->dm_uc.mem_slice[i].slice);
1512				memregs = &(mp->dm_uc.mem_regs[i]);
1513				for (j = 0; j < S0_LPORT_COUNT; j++) {
1514					DRMACH_PR("  MC %2d: "
1515					    "MADR[%d] = 0x%lx, "
1516					    "MADR[%d] = 0x%lx\n", j,
1517					    0, DRMACH_MCREG_TO_U64(
1518					    memregs->madr[j][0]),
1519					    1, DRMACH_MCREG_TO_U64(
1520					    memregs->madr[j][1]));
1521					DRMACH_PR("       : "
1522					    "MADR[%d] = 0x%lx, "
1523					    "MADR[%d] = 0x%lx\n",
1524					    2, DRMACH_MCREG_TO_U64(
1525					    memregs->madr[j][2]),
1526					    3, DRMACH_MCREG_TO_U64(
1527					    memregs->madr[j][3]));
1528				}
1529			}
1530			break;
1531		case DRMSG_POWERON:
1532			if (dir) {
1533				DRMACH_PR("POWERON Request:\n");
1534			} else {
1535				DRMACH_PR("POWERON Reply:\n");
1536			}
1537			break;
1538		case DRMSG_POWEROFF:
1539			if (dir) {
1540				DRMACH_PR("POWEROFF Request:\n");
1541			} else {
1542				DRMACH_PR("POWEROFF Reply:\n");
1543			}
1544			break;
1545		case DRMSG_TESTBOARD:
1546			if (dir) {
1547				DRMACH_PR("TESTBOARD Request:\n");
1548				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1549				    mp->dm_tb.memaddrhi,
1550				    mp->dm_tb.memaddrlo);
1551				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1552				    mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1553				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1554				    mp->dm_tb.force, mp->dm_tb.immediate);
1555			} else {
1556				DRMACH_PR("TESTBOARD Reply:\n");
1557				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1558				    mp->dm_tr.memaddrhi,
1559				    mp->dm_tr.memaddrlo);
1560				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1561				    mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1562				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1563				    mp->dm_tr.cpu_recovered,
1564				    mp->dm_tr.test_status);
1565
1566			}
1567			break;
1568		case DRMSG_ABORT_TEST:
1569			if (dir) {
1570				DRMACH_PR("ABORT_TEST Request:\n");
1571			} else {
1572				DRMACH_PR("ABORT_TEST Reply:\n");
1573			}
1574
1575			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1576			    mp->dm_ta.memaddrhi,
1577			    mp->dm_ta.memaddrlo);
1578			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1579			    mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1580			break;
1581		case DRMSG_SHOWBOARD:
1582			if (dir) {
1583				DRMACH_PR("SHOWBOARD Request:\n");
1584			} else {
1585				DRMACH_PR("SHOWBOARD Reply:\n");
1586
1587				DRMACH_PR(": empty=%d power=%d assigned=%d",
1588				    mp->dm_sb.slot_empty,
1589				    mp->dm_sb.power_on,
1590				    mp->dm_sb.bd_assigned);
1591				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1592				    mp->dm_sb.bd_active,
1593				    mp->dm_sb.test_status,
1594				    mp->dm_sb.test_level);
1595				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1596			}
1597			break;
1598		default:
1599			DRMACH_PR("Unknown message type\n");
1600			break;
1601	}
1602
1603	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1604	    php->message_id, php->drproto_version, php->command,
1605	    php->expbrd, php->slot);
1606#endif
1607	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1608	    php->error_code);
1609}
1610
1611/*
1612 * Callback function passed to taskq_dispatch when a mailbox reinitialization
1613 * handshake needs to be scheduled.  The handshake can't be performed by the
1614 * thread that determines it is needed, in most cases, so this function is
1615 * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1616 * otherwise ignored, since any situation that requires a mailbox initialization
1617 * handshake will continue to request the handshake until it succeeds.
1618 */
1619static void
1620drmach_mbox_reinit(void *unused)
1621{
1622	_NOTE(ARGUNUSED(unused))
1623
1624	caddr_t		obufp = NULL;
1625	sbd_error_t	*serr = NULL;
1626
1627	DRMACH_PR("scheduled mailbox reinit running\n");
1628
1629	mutex_enter(&drmach_ri_mbox_mutex);
1630	mutex_enter(&drmach_g_mbox_mutex);
1631	if (drmach_mbox_iflag == 0) {
1632		/* need to initialize the mailbox */
1633		mutex_exit(&drmach_g_mbox_mutex);
1634
1635		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1636		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1637		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1638		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1639		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1640
1641		if (serr) {
1642			cmn_err(CE_WARN,
1643			    "mbox_init: MBOX_INIT failed ecode=0x%x",
1644			    serr->e_code);
1645			sbd_err_clear(&serr);
1646		}
1647		mutex_enter(&drmach_g_mbox_mutex);
1648		if (!serr) {
1649			drmach_mbox_iflag = 1;
1650		}
1651	}
1652	drmach_mbox_ipending = 0;
1653	mutex_exit(&drmach_g_mbox_mutex);
1654	mutex_exit(&drmach_ri_mbox_mutex);
1655}
1656
1657/*
1658 * To ensure sufficient compatibility with future versions of the DR mailbox
1659 * protocol, we use a buffer that is large enough to receive the largest message
1660 * that could possibly be sent to us.  However, since that ends up being fairly
1661 * large, allocating it on the stack is a bad idea.  Fortunately, this function
1662 * does not need to be MT-safe since it is only invoked by the mailbox
1663 * framework, which will never invoke it multiple times concurrently.  Since
1664 * that is the case, we can use a static buffer.
1665 */
1666void
1667drmach_mbox_event(void)
1668{
1669	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1670	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1671	int		err;
1672	uint32_t	type = MBOXSC_MSG_EVENT;
1673	uint32_t	command = DRMSG_BOARDEVENT;
1674	uint64_t	transid = 0;
1675	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1676	char		*hint = "";
1677	int		logsys = 0;
1678
1679	do {
1680		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1681		    &length, (void *)msg, 0);
1682	} while (err == EAGAIN);
1683
1684	/* don't try to interpret anything with the wrong version number */
1685	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1686		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1687		    msg->p_hdr.drproto_version, DRMBX_VERSION);
1688		mutex_enter(&drmach_g_mbox_mutex);
1689		drmach_mbox_iflag = 0;
1690		/* schedule a reinit handshake if one isn't pending */
1691		if (!drmach_mbox_ipending) {
1692			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1693			    NULL, TQ_NOSLEEP) != NULL) {
1694				drmach_mbox_ipending = 1;
1695			} else {
1696				cmn_err(CE_WARN,
1697				    "failed to schedule mailbox reinit");
1698			}
1699		}
1700		mutex_exit(&drmach_g_mbox_mutex);
1701		return;
1702	}
1703
1704	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1705		cmn_err(CE_WARN,
1706		    "Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1707		    err, msg->p_hdr.error_code);
1708	} else {
1709		dr_boardevent_t	*be;
1710		be = (dr_boardevent_t *)&msg->msgdata;
1711
1712		/* check for initialization event */
1713		if (be->initialized) {
1714			mutex_enter(&drmach_g_mbox_mutex);
1715			drmach_mbox_iflag = 0;
1716			/* schedule a reinit handshake if one isn't pending */
1717			if (!drmach_mbox_ipending) {
1718				if (taskq_dispatch(system_taskq,
1719				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1720				    != NULL) {
1721					drmach_mbox_ipending = 1;
1722				} else {
1723					cmn_err(CE_WARN, "failed to schedule "
1724					    "mailbox reinit");
1725				}
1726			}
1727			mutex_exit(&drmach_g_mbox_mutex);
1728			cmn_err(CE_NOTE, "!Mailbox Init event received");
1729		}
1730
1731		/* anything else will be a log_sysevent call */
1732
1733		if (be->board_insertion) {
1734			DRMACH_PR("Board Insertion event received");
1735			hint = DR_HINT_INSERT;
1736			logsys++;
1737	}
1738		if (be->board_removal) {
1739			DRMACH_PR("Board Removal event received");
1740			hint = DR_HINT_REMOVE;
1741			logsys++;
1742		}
1743		if (be->slot_assign) {
1744			DRMACH_PR("Slot Assign event received");
1745			logsys++;
1746		}
1747		if (be->slot_unassign) {
1748			DRMACH_PR("Slot Unassign event received");
1749			logsys++;
1750		}
1751		if (be->slot_avail) {
1752			DRMACH_PR("Slot Available event received");
1753			logsys++;
1754		}
1755		if (be->slot_unavail) {
1756			DRMACH_PR("Slot Unavailable event received");
1757			logsys++;
1758		}
1759		if (be->power_on) {
1760			DRMACH_PR("Power ON event received");
1761			logsys++;
1762		}
1763		if (be->power_off) {
1764			DRMACH_PR("Power OFF event received");
1765			logsys++;
1766		}
1767
1768		if (logsys)
1769			(void) drmach_log_sysevent(
1770			    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1771			    msg->p_hdr.slot), hint, SE_NOSLEEP, 1);
1772	}
1773}
1774
1775static uint32_t
1776drmach_get_msgid()
1777{
1778	uint32_t	rv;
1779	mutex_enter(&drmach_msglist_mutex);
1780	if (!(++drmach_msgid))
1781		++drmach_msgid;
1782	rv = drmach_msgid;
1783	mutex_exit(&drmach_msglist_mutex);
1784	return (rv);
1785}
1786
1787/*
1788 *	unlink an entry from the message transaction list
1789 *
1790 *	caller must hold drmach_msglist_mutex
1791 */
1792void
1793drmach_msglist_unlink(drmach_msglist_t *entry)
1794{
1795	ASSERT(mutex_owned(&drmach_msglist_mutex));
1796	if (entry->prev) {
1797		entry->prev->next = entry->next;
1798		if (entry->next)
1799			entry->next->prev = entry->prev;
1800	} else {
1801		drmach_msglist_first = entry->next;
1802		if (entry->next)
1803			entry->next->prev = NULL;
1804	}
1805	if (entry == drmach_msglist_last) {
1806		drmach_msglist_last = entry->prev;
1807	}
1808}
1809
1810void
1811drmach_msglist_link(drmach_msglist_t *entry)
1812{
1813	mutex_enter(&drmach_msglist_mutex);
1814	if (drmach_msglist_last) {
1815		entry->prev = drmach_msglist_last;
1816		drmach_msglist_last->next = entry;
1817		drmach_msglist_last = entry;
1818	} else {
1819		drmach_msglist_last = drmach_msglist_first = entry;
1820	}
1821	mutex_exit(&drmach_msglist_mutex);
1822}
1823
1824void
1825drmach_mbox_getmsg()
1826{
1827	int			err;
1828	register int		msgid;
1829	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1830	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1831	dr_proto_hdr_t		*php;
1832	drmach_msglist_t	*found, *entry;
1833	uint32_t		type = MBOXSC_MSG_REPLY;
1834	uint32_t		command;
1835	uint64_t		transid;
1836	uint32_t		length;
1837
1838	php = &msg->p_hdr;
1839
1840	while (drmach_getmsg_thread_run != 0) {
1841		/* get a reply message */
1842		command = 0;
1843		transid = 0;
1844		length = DRMACH_MAX_MBOX_MSG_SIZE;
1845		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1846		    &length, (void *)msg, drmach_to_getmsg);
1847
1848		if (err) {
1849			/*
1850			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1851			 * the "error" is really just a normal, transient
1852			 * condition and we can retry the operation right away.
1853			 * Any other error suggests a more serious problem,
1854			 * ranging from a message being too big for our buffer
1855			 * (EMSGSIZE) to total failure of the mailbox layer.
1856			 * This second class of errors is much less "transient",
1857			 * so rather than retrying over and over (and getting
1858			 * the same error over and over) as fast as we can,
1859			 * we'll sleep for a while before retrying.
1860			 */
1861			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1862				cmn_err(CE_WARN,
1863				    "mboxsc_getmsg failed, err=0x%x", err);
1864				delay(drmach_mbxerr_delay * hz);
1865			}
1866			continue;
1867		}
1868
1869		drmach_mbox_prmsg(msg, 0);
1870
1871		if (php->drproto_version != DRMBX_VERSION) {
1872			cmn_err(CE_WARN,
1873			    "mailbox version mismatch 0x%x vs 0x%x",
1874			    php->drproto_version, DRMBX_VERSION);
1875
1876			mutex_enter(&drmach_g_mbox_mutex);
1877			drmach_mbox_iflag = 0;
1878			/* schedule a reinit handshake if one isn't pending */
1879			if (!drmach_mbox_ipending) {
1880				if (taskq_dispatch(system_taskq,
1881				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1882				    != NULL) {
1883					drmach_mbox_ipending = 1;
1884				} else {
1885					cmn_err(CE_WARN, "failed to schedule "
1886					    "mailbox reinit");
1887				}
1888			}
1889			mutex_exit(&drmach_g_mbox_mutex);
1890
1891			continue;
1892		}
1893
1894		msgid = php->message_id;
1895		found = NULL;
1896		mutex_enter(&drmach_msglist_mutex);
1897		entry = drmach_msglist_first;
1898		while (entry != NULL) {
1899			if (entry->msgid == msgid) {
1900				found = entry;
1901				drmach_msglist_unlink(entry);
1902				entry = NULL;
1903			} else
1904				entry = entry->next;
1905		}
1906
1907		if (found) {
1908			mutex_enter(&found->g_lock);
1909
1910			found->e_code = php->error_code;
1911			if (found->i_buflen > 0)
1912				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1913				    found->i_buflen);
1914			found->m_reply = 1;
1915
1916			cv_signal(&found->g_cv);
1917			mutex_exit(&found->g_lock);
1918		} else {
1919			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1920			    msgid);
1921			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1922			    php->command, php->expbrd, php->slot);
1923		}
1924
1925		mutex_exit(&drmach_msglist_mutex);
1926	}
1927	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1928	mutex_enter(&drmach_msglist_mutex);
1929	entry = drmach_msglist_first;
1930	while (entry != NULL) {
1931		if (entry->p_flag == 1) {
1932			entry->f_error = -1;
1933			mutex_enter(&entry->g_lock);
1934			cv_signal(&entry->g_cv);
1935			mutex_exit(&entry->g_lock);
1936			drmach_msglist_unlink(entry);
1937		}
1938		entry = entry->next;
1939	}
1940	mutex_exit(&drmach_msglist_mutex);
1941	drmach_getmsg_thread_run = -1;
1942	thread_exit();
1943}
1944
1945void
1946drmach_mbox_sendmsg()
1947{
1948	int		err, retry;
1949	drmach_msglist_t *entry;
1950	dr_mbox_msg_t   *mp;
1951	dr_proto_hdr_t  *php;
1952
1953	while (drmach_sendmsg_thread_run != 0) {
1954		/*
1955		 * Search through the list to find entries awaiting
1956		 * transmission to the SC
1957		 */
1958		mutex_enter(&drmach_msglist_mutex);
1959		entry = drmach_msglist_first;
1960		retry = 0;
1961		while (entry != NULL) {
1962			if (entry->p_flag == 1) {
1963				entry = entry->next;
1964				continue;
1965			}
1966
1967			mutex_exit(&drmach_msglist_mutex);
1968
1969			if (!retry)
1970				mutex_enter(&entry->s_lock);
1971			mp = (dr_mbox_msg_t *)entry->o_buf;
1972			php = &mp->p_hdr;
1973
1974			drmach_mbox_prmsg(mp, 1);
1975
1976			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1977			    php->command, NULL, entry->o_buflen, (void *)mp,
1978			    drmach_to_putmsg);
1979
1980			if (err) {
1981				switch (err) {
1982
1983				case EAGAIN:
1984				case EBUSY:
1985					++retry;
1986					mutex_enter(&drmach_msglist_mutex);
1987					continue;
1988
1989				case ETIMEDOUT:
1990					if (--entry->o_nretry <= 0) {
1991						mutex_enter(
1992						    &drmach_msglist_mutex);
1993						drmach_msglist_unlink(entry);
1994						mutex_exit(
1995						    &drmach_msglist_mutex);
1996						entry->f_error = err;
1997						entry->p_flag = 1;
1998						cv_signal(&entry->s_cv);
1999					} else {
2000						++retry;
2001						mutex_enter(
2002						    &drmach_msglist_mutex);
2003						continue;
2004					}
2005					break;
2006				default:
2007					mutex_enter(&drmach_msglist_mutex);
2008					drmach_msglist_unlink(entry);
2009					mutex_exit(&drmach_msglist_mutex);
2010					entry->f_error = err;
2011					entry->p_flag = 1;
2012					cv_signal(&entry->s_cv);
2013					break;
2014				}
2015			} else {
2016				entry->p_flag = 1;
2017				cv_signal(&entry->s_cv);
2018			}
2019
2020			mutex_exit(&entry->s_lock);
2021			retry = 0;
2022			mutex_enter(&drmach_msglist_mutex);
2023			entry = drmach_msglist_first;
2024		}
2025		mutex_exit(&drmach_msglist_mutex);
2026
2027		mutex_enter(&drmach_sendmsg_mutex);
2028		(void) cv_reltimedwait(&drmach_sendmsg_cv,
2029		    &drmach_sendmsg_mutex, (5 * hz), TR_CLOCK_TICK);
2030		mutex_exit(&drmach_sendmsg_mutex);
2031	}
2032	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2033	mutex_enter(&drmach_msglist_mutex);
2034	entry = drmach_msglist_first;
2035	while (entry != NULL) {
2036		if (entry->p_flag == 0) {
2037			entry->f_error = -1;
2038			mutex_enter(&entry->s_lock);
2039			cv_signal(&entry->s_cv);
2040			mutex_exit(&entry->s_lock);
2041			drmach_msglist_unlink(entry);
2042		}
2043		entry = entry->next;
2044	}
2045	mutex_exit(&drmach_msglist_mutex);
2046	cv_destroy(&drmach_sendmsg_cv);
2047	mutex_destroy(&drmach_sendmsg_mutex);
2048
2049	drmach_sendmsg_thread_run = -1;
2050	thread_exit();
2051}
2052
2053void
2054drmach_msglist_destroy(drmach_msglist_t *listp)
2055{
2056	if (listp != NULL) {
2057		drmach_msglist_t	*entry;
2058
2059		mutex_enter(&drmach_msglist_mutex);
2060		entry = drmach_msglist_first;
2061		while (entry) {
2062			if (listp == entry) {
2063				drmach_msglist_unlink(listp);
2064				entry = NULL;
2065			} else
2066				entry = entry->next;
2067		}
2068
2069		mutex_destroy(&listp->s_lock);
2070		cv_destroy(&listp->s_cv);
2071		mutex_destroy(&listp->g_lock);
2072		cv_destroy(&listp->g_cv);
2073		kmem_free(listp, sizeof (drmach_msglist_t));
2074
2075		mutex_exit(&drmach_msglist_mutex);
2076	}
2077}
2078
2079static drmach_msglist_t	*
2080drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2081	uint32_t olen, int nrtry)
2082{
2083	drmach_msglist_t	*listp;
2084
2085	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2086	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2087	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2088	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2089	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2090	listp->o_buf = (caddr_t)hdrp;
2091	listp->o_buflen = olen;
2092	listp->i_buf = ibufp;
2093	listp->i_buflen = ilen;
2094	listp->o_nretry = nrtry;
2095	listp->msgid = hdrp->message_id;
2096
2097	return (listp);
2098}
2099
2100static drmach_msglist_t *
2101drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2102	uint32_t ilen, int timeout, int nrtry, int nosig,
2103	drmach_msglist_t *link)
2104{
2105	int		crv;
2106	drmach_msglist_t *listp;
2107	clock_t		to_val;
2108	dr_proto_hdr_t	*php;
2109
2110	/* setup transaction list entry */
2111	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2112
2113	/* send mailbox message, await reply */
2114	mutex_enter(&listp->s_lock);
2115	mutex_enter(&listp->g_lock);
2116
2117	listp->link = link;
2118	drmach_msglist_link(listp);
2119
2120	mutex_enter(&drmach_sendmsg_mutex);
2121	cv_signal(&drmach_sendmsg_cv);
2122	mutex_exit(&drmach_sendmsg_mutex);
2123
2124	while (listp->p_flag == 0) {
2125		cv_wait(&listp->s_cv, &listp->s_lock);
2126	}
2127
2128	to_val = ddi_get_lbolt() + (timeout * hz);
2129
2130	if (listp->f_error) {
2131		listp->p_flag = 0;
2132		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x", listp->f_error);
2133		php = (dr_proto_hdr_t *)listp->o_buf;
2134		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2135		    php->command, php->expbrd, php->slot);
2136	} else {
2137		while (listp->m_reply == 0 && listp->f_error == 0) {
2138			if (nosig)
2139				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2140				    to_val);
2141			else
2142				crv = cv_timedwait_sig(&listp->g_cv,
2143				    &listp->g_lock, to_val);
2144			switch (crv) {
2145				case -1: /* timed out */
2146					cmn_err(CE_WARN,
2147					    "!msgid=0x%x reply timed out",
2148					    hdrp->message_id);
2149					php = (dr_proto_hdr_t *)listp->o_buf;
2150					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2151					    "exb = %d, slot = %d", php->command,
2152					    php->expbrd, php->slot);
2153					listp->f_error = ETIMEDOUT;
2154					break;
2155				case 0: /* signal received */
2156					cmn_err(CE_WARN,
2157					    "operation interrupted by signal");
2158					listp->f_error = EINTR;
2159					break;
2160				default:
2161					break;
2162				}
2163		}
2164
2165		/*
2166		 * If link is set for this entry, check to see if
2167		 * the linked entry has been replied to.  If not,
2168		 * wait for the response.
2169		 * Currently, this is only used for ABORT_TEST functionality,
2170		 * wherein a check is made for the TESTBOARD reply when
2171		 * the ABORT_TEST reply is received.
2172		 */
2173
2174		if (link) {
2175			mutex_enter(&link->g_lock);
2176			/*
2177			 * If the reply to the linked entry hasn't been
2178			 * received, clear the existing link->f_error,
2179			 * and await the reply.
2180			 */
2181			if (link->m_reply == 0) {
2182				link->f_error = 0;
2183			}
2184			to_val =  ddi_get_lbolt() + (timeout * hz);
2185			while (link->m_reply == 0 && link->f_error == 0) {
2186				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2187				    to_val);
2188				switch (crv) {
2189				case -1: /* timed out */
2190					cmn_err(CE_NOTE,
2191					    "!link msgid=0x%x reply timed out",
2192					    link->msgid);
2193					link->f_error = ETIMEDOUT;
2194					break;
2195				default:
2196					break;
2197				}
2198			}
2199			mutex_exit(&link->g_lock);
2200		}
2201	}
2202	mutex_exit(&listp->g_lock);
2203	mutex_exit(&listp->s_lock);
2204	return (listp);
2205}
2206
2207static sbd_error_t *
2208drmach_mbx2sbderr(drmach_msglist_t *mlp)
2209{
2210	char		a_pnt[MAXNAMELEN];
2211	dr_proto_hdr_t	*php;
2212	int		bnum;
2213
2214	if (mlp->f_error) {
2215		/*
2216		 * If framework failure is due to signal, return "no error"
2217		 * error.
2218		 */
2219		if (mlp->f_error == EINTR)
2220			return (drerr_new(0, ESTC_NONE, NULL));
2221
2222		mutex_enter(&drmach_g_mbox_mutex);
2223		drmach_mbox_iflag = 0;
2224		mutex_exit(&drmach_g_mbox_mutex);
2225		if (!mlp->p_flag)
2226			return (drerr_new(1, ESTC_MBXRQST, NULL));
2227		else
2228			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2229	}
2230	php = (dr_proto_hdr_t *)mlp->o_buf;
2231	bnum = 2 * php->expbrd + php->slot;
2232	a_pnt[0] = '\0';
2233	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2234
2235	switch (mlp->e_code) {
2236		case 0:
2237			return (NULL);
2238		case DRERR_NOACL:
2239			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2240		case DRERR_NOT_ASSIGNED:
2241			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2242		case DRERR_NOT_ACTIVE:
2243			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2244		case DRERR_EMPTY_SLOT:
2245			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2246		case DRERR_POWER_OFF:
2247			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2248		case DRERR_TEST_IN_PROGRESS:
2249			return (drerr_new(0, ESTC_TEST_IN_PROGRESS, "%s",
2250			    a_pnt));
2251		case DRERR_TESTING_BUSY:
2252			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2253		case DRERR_TEST_REQUIRED:
2254			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2255		case DRERR_UNAVAILABLE:
2256			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2257		case DRERR_RECOVERABLE:
2258			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE, "%s",
2259			    a_pnt));
2260		case DRERR_UNRECOVERABLE:
2261			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE, "%s",
2262			    a_pnt));
2263		default:
2264			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2265	}
2266}
2267
2268static sbd_error_t *
2269drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2270	caddr_t ibufp, int ilen)
2271{
2272	int			timeout = 0;
2273	int			ntries = 0;
2274	int			nosignals = 0;
2275	dr_proto_hdr_t 		*hdrp;
2276	drmach_msglist_t 	*mlp;
2277	sbd_error_t		*err = NULL;
2278
2279	if (msgtype != DRMSG_MBOX_INIT) {
2280		mutex_enter(&drmach_ri_mbox_mutex);
2281		mutex_enter(&drmach_g_mbox_mutex);
2282		if (drmach_mbox_iflag == 0) {
2283			/* need to initialize the mailbox */
2284			dr_proto_hdr_t	imsg;
2285
2286			mutex_exit(&drmach_g_mbox_mutex);
2287
2288			imsg.command = DRMSG_MBOX_INIT;
2289
2290			imsg.message_id = drmach_get_msgid();
2291			imsg.drproto_version = DRMBX_VERSION;
2292			imsg.expbrd = 0;
2293			imsg.slot = 0;
2294
2295			cmn_err(CE_WARN, "!reinitializing DR mailbox");
2296			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2297			    10, 5, 0, NULL);
2298			err = drmach_mbx2sbderr(mlp);
2299			/*
2300			 * If framework failure incoming is encountered on
2301			 * the MBOX_INIT [timeout on SMS reply], the error
2302			 * type must be changed before returning to caller.
2303			 * This is to prevent drmach_board_connect() and
2304			 * drmach_board_disconnect() from marking boards
2305			 * UNUSABLE based on MBOX_INIT failures.
2306			 */
2307			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2308				cmn_err(CE_WARN,
2309				    "!Changed mbox incoming to outgoing"
2310				    " failure on reinit");
2311				sbd_err_clear(&err);
2312				err = drerr_new(0, ESTC_MBXRQST, NULL);
2313			}
2314			drmach_msglist_destroy(mlp);
2315			if (err) {
2316				mutex_exit(&drmach_ri_mbox_mutex);
2317				return (err);
2318			}
2319			mutex_enter(&drmach_g_mbox_mutex);
2320			drmach_mbox_iflag = 1;
2321		}
2322		mutex_exit(&drmach_g_mbox_mutex);
2323		mutex_exit(&drmach_ri_mbox_mutex);
2324	}
2325
2326	hdrp = (dr_proto_hdr_t *)obufp;
2327
2328	/* setup outgoing mailbox header */
2329	hdrp->command = msgtype;
2330	hdrp->message_id = drmach_get_msgid();
2331	hdrp->drproto_version = DRMBX_VERSION;
2332	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2333	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2334
2335	switch (msgtype) {
2336
2337		case DRMSG_MBOX_INIT:
2338			timeout = drmach_to_mbxinit;
2339			ntries = 1;
2340			nosignals = 0;
2341			break;
2342
2343		case DRMSG_ASSIGN:
2344			timeout = drmach_to_assign;
2345			ntries = 1;
2346			nosignals = 0;
2347			break;
2348
2349		case DRMSG_UNASSIGN:
2350			timeout = drmach_to_unassign;
2351			ntries = 1;
2352			nosignals = 0;
2353			break;
2354
2355		case DRMSG_POWERON:
2356			timeout = drmach_to_poweron;
2357			ntries = 1;
2358			nosignals = 0;
2359			break;
2360
2361		case DRMSG_POWEROFF:
2362			timeout = drmach_to_poweroff;
2363			ntries = 1;
2364			nosignals = 0;
2365			break;
2366
2367		case DRMSG_SHOWBOARD:
2368			timeout = drmach_to_showboard;
2369			ntries = 1;
2370			nosignals = 0;
2371			break;
2372
2373		case DRMSG_CLAIM:
2374			timeout = drmach_to_claim;
2375			ntries = 1;
2376			nosignals = 1;
2377			break;
2378
2379		case DRMSG_UNCLAIM:
2380			timeout = drmach_to_unclaim;
2381			ntries = 1;
2382			nosignals = 1;
2383			break;
2384
2385		case DRMSG_UNCONFIG:
2386			timeout = drmach_to_unconfig;
2387			ntries = 1;
2388			nosignals = 0;
2389			break;
2390
2391		case DRMSG_TESTBOARD:
2392			timeout = drmach_to_testboard;
2393			ntries = 1;
2394			nosignals = 0;
2395			break;
2396
2397		default:
2398			cmn_err(CE_WARN, "Unknown outgoing message type 0x%x",
2399			    msgtype);
2400			err = DRMACH_INTERNAL_ERROR();
2401			break;
2402	}
2403
2404	if (err == NULL) {
2405		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen, timeout,
2406		    ntries, nosignals, NULL);
2407		err = drmach_mbx2sbderr(mlp);
2408
2409		/*
2410		 * For DRMSG_TESTBOARD attempts which have timed out, or
2411		 * been aborted due to a signal received after mboxsc_putmsg()
2412		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2413		 * must be sent.
2414		 */
2415		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2416		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2417		    (mlp->p_flag != 0)))) {
2418			drmach_msglist_t	*abmlp;
2419			dr_abort_test_t		abibuf;
2420
2421			hdrp->command = DRMSG_ABORT_TEST;
2422			hdrp->message_id = drmach_get_msgid();
2423			abmlp = drmach_mbox_req_rply(hdrp,
2424			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2425			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2426			cmn_err(CE_WARN, "test aborted");
2427			drmach_msglist_destroy(abmlp);
2428		}
2429
2430		drmach_msglist_destroy(mlp);
2431	}
2432
2433	return (err);
2434}
2435
2436static int
2437drmach_mbox_init()
2438{
2439	int			err;
2440	caddr_t			obufp;
2441	sbd_error_t		*serr = NULL;
2442	mboxsc_timeout_range_t	mbxtoz;
2443
2444	drmach_mbox_istate = 0;
2445	/* register the outgoing mailbox */
2446	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2447	    NULL)) != 0) {
2448		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2449		return (-1);
2450	}
2451	drmach_mbox_istate = 1;
2452
2453	/* setup the mboxsc_putmsg timeout value */
2454	if (drmach_use_tuned_putmsg_to) {
2455		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2456		    drmach_to_putmsg);
2457	} else {
2458		if ((err = mboxsc_ctrl(KEY_DRSC,
2459		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2460			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2461			drmach_to_putmsg = 60000;
2462		} else {
2463			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2464			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2465			    " is 0x%lx\n", mbxtoz.min_timeout,
2466			    mbxtoz.max_timeout, drmach_to_putmsg);
2467		}
2468	}
2469
2470	/* register the incoming mailbox */
2471	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2472	    drmach_mbox_event)) != 0) {
2473		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2474		return (-1);
2475	}
2476	drmach_mbox_istate = 2;
2477
2478	/* initialize mutex for mailbox globals */
2479	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2480
2481	/* initialize mutex for mailbox re-init */
2482	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2483
2484	/* initialize mailbox message list elements */
2485	drmach_msglist_first = drmach_msglist_last = NULL;
2486	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2487
2488	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2489	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2490
2491	drmach_mbox_istate = 3;
2492
2493	/* start mailbox sendmsg thread */
2494	drmach_sendmsg_thread_run = 1;
2495	if (drmach_sendmsg_thread == NULL)
2496		drmach_sendmsg_thread = thread_create(NULL, 0,
2497		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2498		    TS_RUN, minclsyspri);
2499
2500	/* start mailbox getmsg thread */
2501	drmach_getmsg_thread_run = 1;
2502	if (drmach_getmsg_thread == NULL)
2503		drmach_getmsg_thread = thread_create(NULL, 0,
2504		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2505		    TS_RUN, minclsyspri);
2506
2507	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2508	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2509	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2510	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2511	if (serr) {
2512		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2513		    serr->e_code);
2514		sbd_err_clear(&serr);
2515		return (-1);
2516	}
2517	mutex_enter(&drmach_g_mbox_mutex);
2518	drmach_mbox_iflag = 1;
2519	drmach_mbox_ipending = 0;
2520	mutex_exit(&drmach_g_mbox_mutex);
2521
2522	return (0);
2523}
2524
2525static int
2526drmach_mbox_fini()
2527{
2528	int err, rv = 0;
2529
2530	if (drmach_mbox_istate > 2) {
2531		drmach_getmsg_thread_run = 0;
2532		drmach_sendmsg_thread_run = 0;
2533		cmn_err(CE_WARN,
2534		    "drmach_mbox_fini: waiting for mbox threads...");
2535		while ((drmach_getmsg_thread_run == 0) ||
2536		    (drmach_sendmsg_thread_run == 0)) {
2537			continue;
2538		}
2539		cmn_err(CE_WARN, "drmach_mbox_fini: mbox threads done.");
2540		mutex_destroy(&drmach_msglist_mutex);
2541
2542	}
2543	if (drmach_mbox_istate) {
2544		/* de-register the outgoing mailbox */
2545		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2546			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2547			    err);
2548			rv = -1;
2549		}
2550	}
2551	if (drmach_mbox_istate > 1) {
2552		/* de-register the incoming mailbox */
2553		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2554			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2555			    err);
2556			rv = -1;
2557		}
2558	}
2559	mutex_destroy(&drmach_g_mbox_mutex);
2560	mutex_destroy(&drmach_ri_mbox_mutex);
2561	return (rv);
2562}
2563
2564static int
2565drmach_portid2bnum(int portid)
2566{
2567	int slot;
2568
2569	switch (portid & 0x1f) {
2570	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2571	case 0x1e:			/* slot 0 axq registers */
2572		slot = 0;
2573		break;
2574
2575	case 8: case 9:			/* cpu devices */
2576	case 0x1c: case 0x1d:		/* schizo/wci devices */
2577	case 0x1f:			/* slot 1 axq registers */
2578		slot = 1;
2579		break;
2580
2581	default:
2582		ASSERT(0);		/* catch in debug kernels */
2583	}
2584
2585	return (((portid >> 4) & 0x7e) | slot);
2586}
2587
2588extern int axq_suspend_iopause;
2589
2590static int
2591hold_rele_branch(dev_info_t *rdip, void *arg)
2592{
2593	int	i;
2594	int	*holdp	= (int *)arg;
2595	char	*name = ddi_node_name(rdip);
2596
2597	/*
2598	 * For Starcat, we must be children of the root devinfo node
2599	 */
2600	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2601
2602	i = drmach_name2type_idx(name);
2603
2604	/*
2605	 * Only children of the root devinfo node need to be
2606	 * held/released since they are the only valid targets
2607	 * of tree operations. This corresponds to the node types
2608	 * listed in the drmach_name2type array.
2609	 */
2610	if (i < 0) {
2611		/* Not of interest to us */
2612		return (DDI_WALK_PRUNECHILD);
2613	}
2614
2615	if (*holdp) {
2616		ASSERT(!e_ddi_branch_held(rdip));
2617		e_ddi_branch_hold(rdip);
2618	} else {
2619		ASSERT(e_ddi_branch_held(rdip));
2620		e_ddi_branch_rele(rdip);
2621	}
2622
2623	return (DDI_WALK_PRUNECHILD);
2624}
2625
2626static int
2627drmach_init(void)
2628{
2629	pnode_t 	nodeid;
2630	gdcd_t		*gdcd;
2631	int		bnum;
2632	dev_info_t	*rdip;
2633	int		hold, circ;
2634
2635	mutex_enter(&drmach_i_lock);
2636	if (drmach_initialized) {
2637		mutex_exit(&drmach_i_lock);
2638		return (0);
2639	}
2640
2641	gdcd = drmach_gdcd_new();
2642	if (gdcd == NULL) {
2643		mutex_exit(&drmach_i_lock);
2644		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2645		return (-1);
2646	}
2647
2648	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2649
2650	nodeid = prom_childnode(prom_rootnode());
2651	do {
2652		int		 len;
2653		int		 portid;
2654		drmachid_t	 id;
2655
2656		len = prom_getproplen(nodeid, "portid");
2657		if (len != sizeof (portid))
2658			continue;
2659
2660		portid = -1;
2661		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2662		if (portid == -1)
2663			continue;
2664
2665		bnum = drmach_portid2bnum(portid);
2666
2667		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2668			/* portid translated to an invalid board number */
2669			cmn_err(CE_WARN, "OBP node 0x%x has"
2670			    " invalid property value, %s=%u",
2671			    nodeid, "portid", portid);
2672
2673			/* clean up */
2674			drmach_array_dispose(drmach_boards,
2675			    drmach_board_dispose);
2676			drmach_gdcd_dispose(gdcd);
2677			mutex_exit(&drmach_i_lock);
2678			return (-1);
2679		} else if (id == NULL) {
2680			drmach_board_t	*bp;
2681			l1_slot_stat_t	*dcd;
2682			int		exp, slot;
2683
2684			bp = drmach_board_new(bnum);
2685			bp->assigned = !drmach_initialized;
2686			bp->powered = !drmach_initialized;
2687
2688			exp = DRMACH_BNUM2EXP(bnum);
2689			slot = DRMACH_BNUM2SLOT(bnum);
2690			dcd = &gdcd->dcd_slot[exp][slot];
2691			bp->stardrb_offset =
2692			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2693			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2694			    bp->stardrb_offset);
2695
2696			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2697			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2698				bp->flags |= DRMACH_NULL_PROC_LPA;
2699				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2700			}
2701		}
2702	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2703
2704	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2705
2706	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2707		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2708		    gdcd->dcd_testcage_log2_mbytes_align);
2709		drmach_iocage_paddr =
2710		    (uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2711		drmach_iocage_size =
2712		    1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2713
2714		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2715		    drmach_iocage_size, VM_SLEEP);
2716		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2717		    mmu_btop(drmach_iocage_paddr),
2718		    PROT_READ | PROT_WRITE,
2719		    HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2720
2721		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2722		    gdcd->dcd_testcage_log2_mbytes_size,
2723		    gdcd->dcd_testcage_log2_mbytes_align,
2724		    gdcd->dcd_testcage_mbyte_PA);
2725		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2726		    drmach_iocage_size, drmach_iocage_paddr,
2727		    (void *)drmach_iocage_vaddr);
2728	}
2729
2730	if (drmach_iocage_size == 0) {
2731		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2732		drmach_boards = NULL;
2733		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2734		drmach_gdcd_dispose(gdcd);
2735		mutex_exit(&drmach_i_lock);
2736		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2737		return (-1);
2738	}
2739
2740	drmach_gdcd_dispose(gdcd);
2741
2742	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2743	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2744	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2745	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2746	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2747
2748	mutex_enter(&cpu_lock);
2749	mutex_enter(&drmach_iocage_lock);
2750	ASSERT(drmach_iocage_is_busy == 0);
2751	drmach_iocage_is_busy = 1;
2752	drmach_iocage_mem_scrub(drmach_iocage_size);
2753	drmach_iocage_is_busy = 0;
2754	cv_signal(&drmach_iocage_cv);
2755	mutex_exit(&drmach_iocage_lock);
2756	mutex_exit(&cpu_lock);
2757
2758
2759	if (drmach_mbox_init() == -1) {
2760		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2761	}
2762
2763	/*
2764	 * Walk immediate children of devinfo root node and hold
2765	 * all devinfo branches of interest.
2766	 */
2767	hold = 1;
2768	rdip = ddi_root_node();
2769
2770	ndi_devi_enter(rdip, &circ);
2771	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2772	ndi_devi_exit(rdip, circ);
2773
2774	drmach_initialized = 1;
2775
2776	/*
2777	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2778	 * rev introducing the axq_iopause_*_all interfaces should not regress
2779	 * when installed without the DR rev using those interfaces. The default
2780	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2781	 * setting the following axq flag to zero, axq will not enable iopause
2782	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2783	 * interfaces during drmach_copy_rename.
2784	 */
2785	axq_suspend_iopause = 0;
2786
2787	mutex_exit(&drmach_i_lock);
2788
2789	return (0);
2790}
2791
2792static void
2793drmach_fini(void)
2794{
2795	dev_info_t	*rdip;
2796	int		hold, circ;
2797
2798	if (drmach_initialized) {
2799		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2800		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2801		drmach_boards = NULL;
2802		rw_exit(&drmach_boards_rwlock);
2803
2804		mutex_destroy(&drmach_slice_table_lock);
2805		mutex_destroy(&drmach_xt_mb_lock);
2806		mutex_destroy(&drmach_bus_sync_lock);
2807		cv_destroy(&drmach_iocage_cv);
2808		mutex_destroy(&drmach_iocage_lock);
2809
2810		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2811
2812		/*
2813		 * Walk immediate children of the root devinfo node
2814		 * releasing holds acquired on branches in drmach_init()
2815		 */
2816		hold = 0;
2817		rdip = ddi_root_node();
2818
2819		ndi_devi_enter(rdip, &circ);
2820		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2821		ndi_devi_exit(rdip, circ);
2822
2823		drmach_initialized = 0;
2824	}
2825
2826	(void) drmach_mbox_fini();
2827	if (drmach_xt_mb != NULL) {
2828		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2829		    drmach_xt_mb_size);
2830	}
2831	rw_destroy(&drmach_boards_rwlock);
2832	mutex_destroy(&drmach_i_lock);
2833}
2834
2835static void
2836drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2837{
2838	kpreempt_disable();
2839
2840	/* get register address, read madr value */
2841	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2842		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2843	} else {
2844		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2845	}
2846
2847	kpreempt_enable();
2848}
2849
2850
2851static uint64_t *
2852drmach_prep_mc_rename(uint64_t *p, int local,
2853	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2854{
2855	int bank;
2856
2857	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2858		uint64_t madr, bank_offset;
2859
2860		/* fetch mc's bank madr register value */
2861		drmach_mem_read_madr(mp, bank, &madr);
2862		if (madr & DRMACH_MC_VALID_MASK) {
2863			uint64_t bankpa;
2864
2865			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2866			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2867			bankpa = new_basepa + bank_offset;
2868
2869			/* encode new base pa into madr */
2870			madr &= ~DRMACH_MC_UM_MASK;
2871			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2872			madr &= ~DRMACH_MC_LM_MASK;
2873			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2874
2875			if (local)
2876				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2877			else
2878				*p++ = DRMACH_MC_ADDR(mp, bank);
2879
2880			*p++ = madr;
2881		}
2882	}
2883
2884	return (p);
2885}
2886
2887static uint64_t *
2888drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2889{
2890	drmach_board_t	*bp;
2891	int		 rv;
2892	int		 idx;
2893	drmachid_t	 id;
2894	uint64_t	 last_scsr_pa = 0;
2895
2896	/* memory is always in slot 0 */
2897	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2898
2899	/* look up slot 1 board on same expander */
2900	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2901	rv = drmach_array_get(drmach_boards, idx, &id);
2902	bp = id; /* bp will be NULL if board not found */
2903
2904	/* look up should never be out of bounds */
2905	ASSERT(rv == 0);
2906
2907	/* nothing to do when board is not found or has no devices */
2908	if (rv == -1 || bp == NULL || bp->devices == NULL)
2909		return (p);
2910
2911	rv = drmach_array_first(bp->devices, &idx, &id);
2912	while (rv == 0) {
2913		if (DRMACH_IS_IO_ID(id)) {
2914			drmach_io_t *io = id;
2915
2916			/*
2917			 * Skip all non-Schizo IO devices (only IO nodes
2918			 * that are Schizo devices have non-zero scsr_pa).
2919			 * Filter out "other" leaf to avoid writing to the
2920			 * same Schizo Control/Status Register twice.
2921			 */
2922			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2923				uint64_t scsr;
2924
2925				scsr  = lddphysio(io->scsr_pa);
2926				scsr &= ~(DRMACH_LPA_BASE_MASK |
2927				    DRMACH_LPA_BND_MASK);
2928				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2929				scsr |= DRMACH_PA_TO_LPA_BND(
2930				    new_basepa + DRMACH_MEM_SLICE_SIZE);
2931
2932				*p++ = io->scsr_pa;
2933				*p++ = scsr;
2934
2935				last_scsr_pa = io->scsr_pa;
2936			}
2937		}
2938		rv = drmach_array_next(bp->devices, &idx, &id);
2939	}
2940
2941	return (p);
2942}
2943
2944/*
2945 * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
2946 * The latter is returned when drmach_rename fails to idle a Panther MC and
2947 * is used to identify the MC for error reporting.
2948 */
2949static uint64_t *
2950drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
2951{
2952	/* only slot 0 has memory */
2953	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2954	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
2955
2956	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
2957		ASSERT(DRMACH_IS_MEM_ID(mp));
2958
2959		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
2960			if (local) {
2961				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
2962				*p++ = (uintptr_t)mp;
2963			}
2964		} else if (!local) {
2965			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
2966			*p++ = (uintptr_t)mp;
2967		}
2968	}
2969
2970	return (p);
2971}
2972
2973static sbd_error_t *
2974drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
2975	uint64_t t_slice_offset, caddr_t buf, int buflen)
2976{
2977	_NOTE(ARGUNUSED(buflen))
2978
2979	uint64_t		*p = (uint64_t *)buf, *q;
2980	sbd_error_t		*err;
2981	int			 rv;
2982	drmach_mem_t		*mp, *skip_mp;
2983	uint64_t		 s_basepa, t_basepa;
2984	uint64_t		 s_new_basepa, t_new_basepa;
2985
2986	/* verify supplied buffer space is adequate */
2987	ASSERT(buflen >=
2988	    /* addr for all possible MC banks */
2989	    (sizeof (uint64_t) * 4 * 4 * 18) +
2990	    /* list section terminator */
2991	    (sizeof (uint64_t) * 1) +
2992	    /* addr/id tuple for local Panther MC idle reg */
2993	    (sizeof (uint64_t) * 2) +
2994	    /* list section terminator */
2995	    (sizeof (uint64_t) * 1) +
2996	    /* addr/id tuple for 2 boards with 4 Panther MC idle regs */
2997	    (sizeof (uint64_t) * 2 * 2 * 4) +
2998	    /* list section terminator */
2999	    (sizeof (uint64_t) * 1) +
3000	    /* addr/val tuple for 1 proc with 4 MC banks */
3001	    (sizeof (uint64_t) * 2 * 4) +
3002	    /* list section terminator */
3003	    (sizeof (uint64_t) * 1) +
3004	    /* addr/val tuple for 2 boards w/ 2 schizos each */
3005	    (sizeof (uint64_t) * 2 * 2 * 2) +
3006	    /* addr/val tuple for 2 boards w/ 16 MC banks each */
3007	    (sizeof (uint64_t) * 2 * 2 * 16) +
3008	    /* list section terminator */
3009	    (sizeof (uint64_t) * 1) +
3010	    /* addr/val tuple for 18 AXQs w/ two slots each */
3011	    (sizeof (uint64_t) * 2 * 2 * 18) +
3012	    /* list section terminator */
3013	    (sizeof (uint64_t) * 1) +
3014	    /* list terminator */
3015	    (sizeof (uint64_t) * 1));
3016
3017	/* copy bank list to rename script */
3018	mutex_enter(&drmach_bus_sync_lock);
3019	for (q = drmach_bus_sync_list; *q; q++, p++)
3020		*p = *q;
3021	mutex_exit(&drmach_bus_sync_lock);
3022
3023	/* list section terminator */
3024	*p++ = 0;
3025
3026	/*
3027	 * Write idle script for MC on this processor.  A script will be
3028	 * produced only if this is a Panther processor on the source or
3029	 * target board.
3030	 */
3031	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3032		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3033
3034	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3035		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3036
3037	/* list section terminator */
3038	*p++ = 0;
3039
3040	/*
3041	 * Write idle script for all other MCs on source and target
3042	 * Panther boards.
3043	 */
3044	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3045		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3046
3047	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3048		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3049
3050	/* list section terminator */
3051	*p++ = 0;
3052
3053	/*
3054	 * Step 1:	Write source base address to target MC
3055	 *		with present bit off.
3056	 * Step 2:	Now rewrite target reg with present bit on.
3057	 */
3058	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3059	ASSERT(err == NULL);
3060	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3061	ASSERT(err == NULL);
3062
3063	/* exchange base pa. include slice offset in new target base pa */
3064	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3065	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3066	    t_slice_offset;
3067
3068	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3069	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3070
3071	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3072	    CPU->cpu_id);
3073
3074	/*
3075	 * Write rename script for MC on this processor.  A script will
3076	 * be produced only if this processor is on the source or target
3077	 * board.
3078	 */
3079
3080	skip_mp = NULL;
3081	mp = s_mp->dev.bp->mem;
3082	while (mp != NULL && skip_mp == NULL) {
3083		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3084			skip_mp = mp;
3085			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3086			    s_new_basepa);
3087		}
3088
3089		mp = mp->next;
3090	}
3091
3092	mp = t_mp->dev.bp->mem;
3093	while (mp != NULL && skip_mp == NULL) {
3094		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3095			skip_mp = mp;
3096			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3097			    t_new_basepa);
3098		}
3099
3100		mp = mp->next;
3101	}
3102
3103	/* list section terminator */
3104	*p++ = 0;
3105
3106	/*
3107	 * Write rename script for all other MCs on source and target
3108	 * boards.
3109	 */
3110
3111	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3112		if (mp == skip_mp)
3113			continue;
3114		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3115	}
3116
3117	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3118		if (mp == skip_mp)
3119			continue;
3120		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3121	}
3122
3123	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3124	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3125	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3126
3127	/* list section terminator */
3128	*p++ = 0;
3129
3130	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3131	    DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3132	    DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3133
3134	rv = axq_do_casm_rename_script(&p,
3135	    DRMACH_PA_TO_SLICE(s_new_basepa),
3136	    DRMACH_PA_TO_SLICE(t_new_basepa));
3137	if (rv == DDI_FAILURE)
3138		return (DRMACH_INTERNAL_ERROR());
3139
3140	/* list section & final terminator */
3141	*p++ = 0;
3142	*p++ = 0;
3143
3144#ifdef DEBUG
3145	{
3146		uint64_t *q = (uint64_t *)buf;
3147
3148		/* paranoia */
3149		ASSERT((caddr_t)p <= buf + buflen);
3150
3151		DRMACH_PR("MC bank base pa list:\n");
3152		while (*q) {
3153			uint64_t a = *q++;
3154
3155			DRMACH_PR("0x%lx\n", a);
3156		}
3157
3158		/* skip terminator */
3159		q += 1;
3160
3161		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3162		while (*q) {
3163			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3164			q += 2;
3165		}
3166
3167		/* skip terminator */
3168		q += 1;
3169
3170		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3171		while (*q) {
3172			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3173			q += 2;
3174		}
3175
3176		/* skip terminator */
3177		q += 1;
3178
3179		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3180		while (*q) {
3181			uint64_t r = *q++;	/* register address */
3182			uint64_t v = *q++;	/* new register value */
3183
3184			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3185			    r, v, (long)(DRMACH_MC_UM_TO_PA(v)|
3186			    DRMACH_MC_LM_TO_PA(v)));
3187		}
3188
3189		/* skip terminator */
3190		q += 1;
3191
3192		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3193		while (*q) {
3194			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3195			q += 2;
3196		}
3197
3198		/* skip terminator */
3199		q += 1;
3200
3201		DRMACH_PR("AXQ reprogramming script:\n");
3202		while (*q) {
3203			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3204			q += 2;
3205		}
3206
3207		/* verify final terminator is present */
3208		ASSERT(*(q + 1) == 0);
3209
3210		DRMACH_PR("copy-rename script 0x%p, len %d\n",
3211		    (void *)buf, (int)((intptr_t)p - (intptr_t)buf));
3212
3213		if (drmach_debug)
3214			DELAY(10000000);
3215	}
3216#endif
3217
3218	return (NULL);
3219}
3220
3221static void
3222drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3223{
3224	int		 rv;
3225
3226	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3227
3228	if (bp->devices) {
3229		int		 d_idx;
3230		drmachid_t	 d_id;
3231
3232		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3233		while (rv == 0) {
3234			if (DRMACH_IS_CPU_ID(d_id)) {
3235				drmach_cpu_t	*cp = d_id;
3236				processorid_t	 cpuid = cp->cpuid;
3237
3238				mutex_enter(&cpu_lock);
3239				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3240					drmach_xt_mb[cpuid] = 0x80 | slice;
3241				mutex_exit(&cpu_lock);
3242			}
3243			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3244		}
3245	}
3246	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3247		drmach_board_t	*s1bp = NULL;
3248
3249		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3250		    (void *) &s1bp);
3251		if (rv == 0 && s1bp != NULL) {
3252			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3253			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3254			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3255		}
3256	}
3257}
3258
3259sbd_error_t *
3260drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3261	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3262{
3263	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3264	extern void drmach_rename_end(void);
3265
3266	drmach_mem_t	*s_mp, *t_mp;
3267	struct memlist	*x_ml;
3268	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3269	int		 len;
3270	caddr_t		 bp, wp;
3271	uint_t		*p, *q;
3272	sbd_error_t	*err;
3273	tte_t		*tte;
3274	drmach_copy_rename_t *cr;
3275
3276	if (!DRMACH_IS_MEM_ID(s_id))
3277		return (drerr_new(0, ESTC_INAPPROP, NULL));
3278	if (!DRMACH_IS_MEM_ID(t_id))
3279		return (drerr_new(0, ESTC_INAPPROP, NULL));
3280	s_mp = s_id;
3281	t_mp = t_id;
3282
3283	/* get starting physical address of target memory */
3284	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3285	if (err)
3286		return (err);
3287
3288	/* calculate slice offset mask from slice size */
3289	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3290
3291	/* calculate source and target base pa */
3292	s_copybasepa = c_ml->ml_address;
3293	t_copybasepa =
3294	    t_basepa + ((c_ml->ml_address & off_mask) - t_slice_offset);
3295
3296	/* paranoia */
3297	ASSERT((c_ml->ml_address & off_mask) >= t_slice_offset);
3298
3299	/* adjust copy memlist addresses to be relative to copy base pa */
3300	x_ml = c_ml;
3301	while (x_ml != NULL) {
3302		x_ml->ml_address -= s_copybasepa;
3303		x_ml = x_ml->ml_next;
3304	}
3305
3306#ifdef DEBUG
3307	{
3308	uint64_t s_basepa, s_size, t_size;
3309
3310	x_ml = c_ml;
3311	while (x_ml->ml_next != NULL)
3312		x_ml = x_ml->ml_next;
3313
3314	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3315	    s_copybasepa,
3316	    s_copybasepa + x_ml->ml_address + x_ml->ml_size);
3317
3318	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3319	    t_copybasepa,
3320	    t_copybasepa + x_ml->ml_address + x_ml->ml_size);
3321
3322	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3323	DRMACH_MEMLIST_DUMP(c_ml);
3324
3325	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3326	ASSERT(err == NULL);
3327
3328	err = drmach_mem_get_size(s_id, &s_size);
3329	ASSERT(err == NULL);
3330
3331	err = drmach_mem_get_size(t_id, &t_size);
3332	ASSERT(err == NULL);
3333
3334	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3335	    s_basepa, s_size);
3336	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3337	    t_basepa, t_size);
3338	}
3339#endif /* DEBUG */
3340
3341	/* Map in appropriate cpu sram page */
3342	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3343	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3344	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3345	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
3346	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
3347
3348	bp = wp = drmach_cpu_sram_va;
3349
3350	/* Make sure the rename routine will fit */
3351	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3352	ASSERT(wp + len < bp + PAGESIZE);
3353
3354	/* copy text. standard bcopy not designed to work in nc space */
3355	p = (uint_t *)wp;
3356	q = (uint_t *)drmach_rename;
3357	while (q < (uint_t *)drmach_rename_end)
3358		*p++ = *q++;
3359
3360	/* zero remainder. standard bzero not designed to work in nc space */
3361	while (p < (uint_t *)(bp + PAGESIZE))
3362		*p++ = 0;
3363
3364	DRMACH_PR("drmach_rename function 0x%p, len %d\n", (void *)wp, len);
3365	wp += (len + 15) & ~15;
3366
3367	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset, wp,
3368	    PAGESIZE - (wp - bp));
3369	if (err) {
3370cleanup:
3371		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3372		    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3373		return (err);
3374	}
3375
3376	/* disable and flush CDC */
3377	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3378		axq_cdc_enable_all();	/* paranoia */
3379		err = DRMACH_INTERNAL_ERROR();
3380		goto cleanup;
3381	}
3382
3383	/* mark both memory units busy */
3384	t_mp->dev.busy++;
3385	s_mp->dev.busy++;
3386
3387	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3388	    VM_SLEEP);
3389	cr->isa = (void *)drmach_copy_rename_init;
3390	cr->data = wp;
3391	cr->c_ml = c_ml;
3392	cr->s_mp = s_mp;
3393	cr->t_mp = t_mp;
3394	cr->s_copybasepa = s_copybasepa;
3395	cr->t_copybasepa = t_copybasepa;
3396	cr->ecode = DRMACH_CR_OK;
3397
3398	mutex_enter(&drmach_slice_table_lock);
3399
3400	mutex_enter(&drmach_xt_mb_lock);
3401	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3402
3403	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3404		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3405		    DRMACH_PA_TO_SLICE(t_copybasepa));
3406	}
3407	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3408		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3409		    DRMACH_PA_TO_SLICE(s_copybasepa));
3410	}
3411
3412	*cr_id = cr;
3413	return (NULL);
3414}
3415
3416int drmach_rename_count;
3417int drmach_rename_ntries;
3418
3419sbd_error_t *
3420drmach_copy_rename_fini(drmachid_t id)
3421{
3422	drmach_copy_rename_t	*cr = id;
3423	sbd_error_t		*err = NULL;
3424	dr_mbox_msg_t		*obufp;
3425
3426	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3427
3428	axq_cdc_enable_all();
3429
3430	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3431	    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3432
3433	switch (cr->ecode) {
3434	case DRMACH_CR_OK:
3435		break;
3436	case DRMACH_CR_MC_IDLE_ERR: {
3437		dev_info_t	*dip = NULL;
3438		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3439		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3440
3441		ASSERT(DRMACH_IS_MEM_ID(mp));
3442
3443		err = drmach_get_dip(mp, &dip);
3444
3445		ASSERT(err == NULL);
3446		ASSERT(dip != NULL);
3447
3448		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3449		(void) ddi_pathname(dip, path);
3450		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3451		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3452		kmem_free(path, MAXPATHLEN);
3453		break;
3454	}
3455	case DRMACH_CR_IOPAUSE_ERR:
3456		ASSERT((uintptr_t)cr->earg >= 0 &&
3457		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3458
3459		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3460		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3461		    " to copy-rename", (uintptr_t)cr->earg);
3462		break;
3463	case DRMACH_CR_ONTRAP_ERR:
3464		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3465		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3466		    "memory error");
3467		break;
3468	default:
3469		err = DRMACH_INTERNAL_ERROR();
3470		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3471		    cr->ecode);
3472		break;
3473	}
3474
3475#ifdef DEBUG
3476	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3477	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3478		int	i;
3479		for (i = 0; i < NCPU; i++) {
3480			if (drmach_xt_mb[i])
3481				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3482		}
3483	}
3484#endif
3485	mutex_exit(&drmach_xt_mb_lock);
3486
3487	if (cr->c_ml != NULL)
3488		memlist_delete(cr->c_ml);
3489
3490	cr->t_mp->dev.busy--;
3491	cr->s_mp->dev.busy--;
3492
3493	if (err) {
3494		mutex_exit(&drmach_slice_table_lock);
3495		goto done;
3496	}
3497
3498	/* update casm shadow for target and source board */
3499	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3500	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3501	mutex_exit(&drmach_slice_table_lock);
3502
3503	mutex_enter(&drmach_bus_sync_lock);
3504	drmach_bus_sync_list_update();
3505	mutex_exit(&drmach_bus_sync_lock);
3506
3507	/*
3508	 * Make a good-faith effort to notify the SC about the copy-rename, but
3509	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3510	 * will duplicate the update.
3511	 */
3512	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3513	mutex_enter(&drmach_slice_table_lock);
3514	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3515	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3516	mutex_exit(&drmach_slice_table_lock);
3517	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3518	    (caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3519	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3520
3521done:
3522	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3523
3524	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3525	    drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3526
3527	return (err);
3528}
3529
3530int drmach_slow_copy = 0;
3531
3532void
3533drmach_copy_rename(drmachid_t id)
3534{
3535	extern uint_t		 getpstate(void);
3536	extern void		 setpstate(uint_t);
3537
3538	extern xcfunc_t		 drmach_rename_wait;
3539	extern xcfunc_t		 drmach_rename_done;
3540	extern xcfunc_t		 drmach_rename_abort;
3541
3542	drmach_copy_rename_t	*cr = id;
3543	uint64_t		 neer;
3544	struct memlist		*ml;
3545	int			 i, count;
3546	int			 csize, lnsize;
3547	uint64_t		 caddr;
3548	cpuset_t		 cpuset;
3549	uint_t			 pstate;
3550	uint32_t		 exp = 0;
3551	on_trap_data_t		 otd;
3552	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3553
3554	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3555	ASSERT(MUTEX_HELD(&cpu_lock));
3556	ASSERT(cr->ecode == DRMACH_CR_OK);
3557
3558	/*
3559	 * Prevent slot1 IO from accessing Safari memory bus.
3560	 */
3561	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3562		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3563		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3564		cr->earg = (void *)(uintptr_t)exp;
3565		return;
3566	}
3567
3568	cpuset = cpu_ready_set;
3569	CPUSET_DEL(cpuset, CPU->cpu_id);
3570	count = ncpus - 1;
3571	drmach_rename_count = count;	/* for debug */
3572
3573	drmach_xt_ready = 0;
3574	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3575
3576	for (i = 0; i < drmach_cpu_ntries; i++) {
3577		if (drmach_xt_ready == count)
3578			break;
3579		DELAY(drmach_cpu_delay);
3580	}
3581
3582	drmach_rename_ntries = i;	/* for debug */
3583
3584	drmach_xt_ready = 0;		/* steal the line back */
3585	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3586		drmach_xt_mb[i] = drmach_xt_mb[i];
3587
3588	caddr = drmach_iocage_paddr;
3589	csize = cpunodes[CPU->cpu_id].ecache_size;
3590	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3591
3592	/* disable CE reporting */
3593	neer = get_error_enable();
3594	set_error_enable(neer & ~EN_REG_CEEN);
3595
3596	/* disable interrupts (paranoia) */
3597	pstate = getpstate();
3598	setpstate(pstate & ~PSTATE_IE);
3599
3600	/*
3601	 * Execute copy-rename under on_trap to protect against a panic due
3602	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3603	 * operation and rely on the OS to do the error reporting.
3604	 *
3605	 * In general, trap handling on any cpu once the copy begins
3606	 * can result in an inconsistent memory image on the target.
3607	 */
3608	if (on_trap(&otd, OT_DATA_EC)) {
3609		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3610		goto copy_rename_end;
3611	}
3612
3613	/*
3614	 * DO COPY.
3615	 */
3616	for (ml = cr->c_ml; ml; ml = ml->ml_next) {
3617		uint64_t	s_pa, t_pa;
3618		uint64_t	nbytes;
3619
3620		s_pa = cr->s_copybasepa + ml->ml_address;
3621		t_pa = cr->t_copybasepa + ml->ml_address;
3622		nbytes = ml->ml_size;
3623
3624		while (nbytes != 0ull) {
3625			/* copy 32 bytes at src_pa to dst_pa */
3626			bcopy32_il(s_pa, t_pa);
3627
3628			/* increment by 32 bytes */
3629			s_pa += (4 * sizeof (uint64_t));
3630			t_pa += (4 * sizeof (uint64_t));
3631
3632			/* decrement by 32 bytes */
3633			nbytes -= (4 * sizeof (uint64_t));
3634
3635			if (drmach_slow_copy) {	/* for debug */
3636				uint64_t i = 13 * 50;
3637				while (i--)
3638					;
3639			}
3640		}
3641	}
3642
3643	/*
3644	 * XXX CHEETAH SUPPORT
3645	 * For cheetah, we need to grab the iocage lock since iocage
3646	 * memory is used for e$ flush.
3647	 *
3648	 * NOTE: This code block is dangerous at this point in the
3649	 * copy-rename operation. It modifies memory after the copy
3650	 * has taken place which means that any persistent state will
3651	 * be abandoned after the rename operation. The code is also
3652	 * performing thread synchronization at a time when all but
3653	 * one processors are paused. This is a potential deadlock
3654	 * situation.
3655	 *
3656	 * This code block must be moved to drmach_copy_rename_init.
3657	 */
3658	if (drmach_is_cheetah) {
3659		mutex_enter(&drmach_iocage_lock);
3660		while (drmach_iocage_is_busy)
3661			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3662		drmach_iocage_is_busy = 1;
3663		drmach_iocage_mem_scrub(ecache_size * 2);
3664		mutex_exit(&drmach_iocage_lock);
3665	}
3666
3667	/*
3668	 * bcopy32_il is implemented as a series of ldxa/stxa via
3669	 * ASI_MEM instructions. Following the copy loop, the E$
3670	 * of the master (this) processor will have lines in state
3671	 * O that correspond to lines of home memory in state gI.
3672	 * An E$ flush is necessary to commit these lines before
3673	 * proceeding with the rename operation.
3674	 *
3675	 * Flushing the E$ will automatically flush the W$, but
3676	 * the D$ and I$ must be flushed separately and explicitly.
3677	 */
3678	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3679
3680	/*
3681	 * Each line of home memory is now in state gM, except in
3682	 * the case of a cheetah processor when the E$ flush area
3683	 * is included within the copied region. In such a case,
3684	 * the lines of home memory for the upper half of the
3685	 * flush area are in state gS.
3686	 *
3687	 * Each line of target memory is in state gM.
3688	 *
3689	 * Each line of this processor's E$ is in state I, except
3690	 * those of a cheetah processor. All lines of a cheetah
3691	 * processor's E$ are in state S and correspond to the lines
3692	 * in upper half of the E$ flush area.
3693	 *
3694	 * It is vital at this point that none of the lines in the
3695	 * home or target memories are in state gI and that none
3696	 * of the lines in this processor's E$ are in state O or Os.
3697	 * A single instance of such a condition will cause loss of
3698	 * coherency following the rename operation.
3699	 */
3700
3701	/*
3702	 * Rename
3703	 */
3704	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3705
3706	/*
3707	 * Rename operation complete. The physical address space
3708	 * of the home and target memories have been swapped, the
3709	 * routing data in the respective CASM entries have been
3710	 * swapped, and LPA settings in the processor and schizo
3711	 * devices have been reprogrammed accordingly.
3712	 *
3713	 * In the case of a cheetah processor, the E$ remains
3714	 * populated with lines in state S that correspond to the
3715	 * lines in the former home memory. Now that the physical
3716	 * addresses have been swapped, these E$ lines correspond
3717	 * to lines in the new home memory which are in state gM.
3718	 * This combination is invalid. An additional E$ flush is
3719	 * necessary to restore coherency. The E$ flush will cause
3720	 * the lines of the new home memory for the flush region
3721	 * to transition from state gM to gS. The former home memory
3722	 * remains unmodified. This additional E$ flush has no effect
3723	 * on a cheetah+ processor.
3724	 */
3725	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3726
3727	/*
3728	 * The D$ and I$ must be flushed to ensure that coherency is
3729	 * maintained. Any line in a cache that is in the valid
3730	 * state has its corresponding line of the new home memory
3731	 * in the gM state. This is an invalid condition. When the
3732	 * flushes are complete the cache line states will be
3733	 * resynchronized with those in the new home memory.
3734	 */
3735	flush_icache_il();			/* inline version */
3736	flush_dcache_il();			/* inline version */
3737	flush_pcache_il();			/* inline version */
3738
3739copy_rename_end:
3740
3741	no_trap();
3742
3743	/* enable interrupts */
3744	setpstate(pstate);
3745
3746	/* enable CE reporting */
3747	set_error_enable(neer);
3748
3749	if (cr->ecode != DRMACH_CR_OK)
3750		drmach_end_wait_xcall = drmach_rename_abort;
3751
3752	/*
3753	 * XXX CHEETAH SUPPORT
3754	 */
3755	if (drmach_is_cheetah) {
3756		mutex_enter(&drmach_iocage_lock);
3757		drmach_iocage_mem_scrub(ecache_size * 2);
3758		drmach_iocage_is_busy = 0;
3759		cv_signal(&drmach_iocage_cv);
3760		mutex_exit(&drmach_iocage_lock);
3761	}
3762
3763	axq_iopause_disable_all();
3764
3765	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3766}
3767
3768static void drmach_io_dispose(drmachid_t);
3769static sbd_error_t *drmach_io_release(drmachid_t);
3770static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3771
3772static sbd_error_t *
3773drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3774{
3775	drmach_node_t	*node = proto->node;
3776	sbd_error_t	*err;
3777	drmach_reg_t	 regs[3];
3778	int		 rv;
3779	int		 len = 0;
3780
3781	rv = node->n_getproplen(node, "reg", &len);
3782	if (rv != 0 || len != sizeof (regs)) {
3783		sbd_error_t *err;
3784
3785		/* pci nodes are expected to have regs */
3786		err = drerr_new(1, ESTC_GETPROP,
3787		    "Device Node 0x%x: property %s",
3788		    (uint_t)node->get_dnode(node), "reg");
3789		return (err);
3790	}
3791
3792	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3793	if (rv) {
3794		sbd_error_t *err;
3795
3796		err = drerr_new(1, ESTC_GETPROP,
3797		    "Device Node 0x%x: property %s",
3798		    (uint_t)node->get_dnode(node), "reg");
3799
3800		return (err);
3801	}
3802
3803	/*
3804	 * Fix up unit number so that Leaf A has a lower unit number
3805	 * than Leaf B.
3806	 */
3807	if ((proto->portid % 2) != 0) {
3808		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3809			proto->unum = 0;
3810		else
3811			proto->unum = 1;
3812	} else {
3813		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3814			proto->unum = 2;
3815		else
3816			proto->unum = 3;
3817	}
3818
3819	err = drmach_io_new(proto, idp);
3820	if (err == NULL) {
3821		drmach_io_t *self = *idp;
3822
3823		/* reassemble 64-bit base address */
3824		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3825		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3826	}
3827
3828	return (err);
3829}
3830
3831static sbd_error_t *
3832drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3833{
3834	drmach_io_t	*ip;
3835
3836	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3837	bcopy(proto, &ip->dev, sizeof (ip->dev));
3838	ip->dev.node = drmach_node_dup(proto->node);
3839	ip->dev.cm.isa = (void *)drmach_io_new;
3840	ip->dev.cm.dispose = drmach_io_dispose;
3841	ip->dev.cm.release = drmach_io_release;
3842	ip->dev.cm.status = drmach_io_status;
3843
3844	(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3845	    ip->dev.type, ip->dev.unum);
3846
3847	*idp = (drmachid_t)ip;
3848	return (NULL);
3849}
3850
3851static void
3852drmach_io_dispose(drmachid_t id)
3853{
3854	drmach_io_t *self;
3855
3856	ASSERT(DRMACH_IS_IO_ID(id));
3857
3858	self = id;
3859	if (self->dev.node)
3860		drmach_node_dispose(self->dev.node);
3861
3862	kmem_free(self, sizeof (*self));
3863}
3864
3865/*ARGSUSED*/
3866sbd_error_t *
3867drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3868{
3869	drmach_board_t	*bp = (drmach_board_t *)id;
3870	sbd_error_t	*err = NULL;
3871
3872	if (id && DRMACH_IS_BOARD_ID(id)) {
3873		switch (cmd) {
3874			case SBD_CMD_TEST:
3875			case SBD_CMD_STATUS:
3876			case SBD_CMD_GETNCM:
3877				break;
3878			case SBD_CMD_CONNECT:
3879				if (bp->connected)
3880					err = drerr_new(0, ESBD_STATE, NULL);
3881
3882				if (bp->cond == SBD_COND_UNUSABLE)
3883					err = drerr_new(0,
3884					    ESBD_FATAL_STATE, NULL);
3885				break;
3886			case SBD_CMD_DISCONNECT:
3887				if (!bp->connected)
3888					err = drerr_new(0, ESBD_STATE, NULL);
3889
3890				if (bp->cond == SBD_COND_UNUSABLE)
3891					err = drerr_new(0,
3892					    ESBD_FATAL_STATE, NULL);
3893				break;
3894			default:
3895				if (bp->cond == SBD_COND_UNUSABLE)
3896					err = drerr_new(0,
3897					    ESBD_FATAL_STATE, NULL);
3898				break;
3899
3900		}
3901	}
3902
3903	return (err);
3904}
3905
3906/*ARGSUSED*/
3907sbd_error_t *
3908drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3909{
3910	return (NULL);
3911}
3912
3913sbd_error_t *
3914drmach_board_assign(int bnum, drmachid_t *id)
3915{
3916	sbd_error_t	*err = NULL;
3917	caddr_t		obufp;
3918
3919	if (!drmach_initialized && drmach_init() == -1) {
3920		err = DRMACH_INTERNAL_ERROR();
3921	}
3922
3923	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3924
3925	if (!err) {
3926		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3927			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3928		} else {
3929			drmach_board_t	*bp;
3930
3931			if (*id)
3932				rw_downgrade(&drmach_boards_rwlock);
3933
3934			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
3935			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
3936			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
3937			kmem_free(obufp, sizeof (dr_proto_hdr_t));
3938
3939			if (!err) {
3940				bp = *id;
3941				if (!*id)
3942					bp = *id  =
3943					    (drmachid_t)drmach_board_new(bnum);
3944				bp->assigned = 1;
3945			}
3946		}
3947	}
3948	rw_exit(&drmach_boards_rwlock);
3949	return (err);
3950}
3951
3952static uint_t
3953drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
3954{
3955	uint_t	port, port_start, port_end;
3956	uint_t	non_panther_cpus = 0;
3957	uint_t	impl;
3958
3959	ASSERT(gdcd != NULL);
3960
3961	/*
3962	 * Determine PRD port indices based on slot location.
3963	 */
3964	switch (slot) {
3965	case 0:
3966		port_start = 0;
3967		port_end = 3;
3968		break;
3969	case 1:
3970		port_start = 4;
3971		port_end = 5;
3972		break;
3973	default:
3974		ASSERT(0);
3975		/* check all */
3976		port_start = 0;
3977		port_end = 5;
3978		break;
3979	}
3980
3981	for (port = port_start; port <= port_end; port++) {
3982		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
3983		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
3984			/*
3985			 * This Safari port passed POST and represents a
3986			 * cpu, so check the implementation.
3987			 */
3988			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
3989			    & 0xffff;
3990
3991			switch (impl) {
3992			case CHEETAH_IMPL:
3993			case CHEETAH_PLUS_IMPL:
3994			case JAGUAR_IMPL:
3995				non_panther_cpus++;
3996				break;
3997			case PANTHER_IMPL:
3998				break;
3999			default:
4000				ASSERT(0);
4001				non_panther_cpus++;
4002				break;
4003			}
4004		}
4005	}
4006
4007	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4008	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4009
4010	return (non_panther_cpus);
4011}
4012
4013sbd_error_t *
4014drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4015{
4016	_NOTE(ARGUNUSED(opts))
4017
4018	drmach_board_t		*bp = (drmach_board_t *)id;
4019	sbd_error_t		*err;
4020	dr_mbox_msg_t		*obufp;
4021	gdcd_t			*gdcd = NULL;
4022	uint_t			exp, slot;
4023	sc_gptwocfg_cookie_t	scc;
4024	int			panther_pages_enabled;
4025
4026	if (!DRMACH_IS_BOARD_ID(id))
4027		return (drerr_new(0, ESTC_INAPPROP, NULL));
4028
4029	/*
4030	 * Build the casm info portion of the CLAIM message.
4031	 */
4032	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4033	mutex_enter(&drmach_slice_table_lock);
4034	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4035	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4036	mutex_exit(&drmach_slice_table_lock);
4037	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4038	    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4039	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4040
4041	if (err) {
4042		/*
4043		 * if mailbox timeout or unrecoverable error from SC,
4044		 * board cannot be touched.  Mark the status as
4045		 * unusable.
4046		 */
4047		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4048		    (err->e_code == ESTC_MBXRPLY))
4049			bp->cond = SBD_COND_UNUSABLE;
4050		return (err);
4051	}
4052
4053	gdcd = drmach_gdcd_new();
4054	if (gdcd == NULL) {
4055		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4056		    bp->cm.name);
4057		return (DRMACH_INTERNAL_ERROR());
4058	}
4059
4060	/*
4061	 * Read CPU SRAM DR buffer offset from GDCD.
4062	 */
4063	exp = DRMACH_BNUM2EXP(bp->bnum);
4064	slot = DRMACH_BNUM2SLOT(bp->bnum);
4065	bp->stardrb_offset =
4066	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4067	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4068	    bp->stardrb_offset);
4069
4070	/*
4071	 * Read board LPA setting from GDCD.
4072	 */
4073	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4074	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4075	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4076		bp->flags |= DRMACH_NULL_PROC_LPA;
4077		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4078	}
4079
4080	/*
4081	 * XXX Until the Solaris large pages support heterogeneous cpu
4082	 * domains, DR needs to prevent the addition of non-Panther cpus
4083	 * to an all-Panther domain with large pages enabled.
4084	 */
4085	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4086	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4087	    panther_pages_enabled && drmach_large_page_restriction) {
4088		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4089		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4090		err = drerr_new(0, ESTC_SUPPORT, NULL);
4091	}
4092
4093	if (err == NULL) {
4094		/* do saf configurator stuff */
4095		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4096		scc = sc_probe_board(bp->bnum);
4097		if (scc == NULL)
4098			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4099	}
4100
4101	if (err) {
4102		/* flush CDC srams */
4103		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4104			goto out;
4105		}
4106
4107		/*
4108		 * Build the casm info portion of the UNCLAIM message.
4109		 */
4110		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4111		mutex_enter(&drmach_slice_table_lock);
4112		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4113		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4114		mutex_exit(&drmach_slice_table_lock);
4115		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4116		    (caddr_t)obufp, sizeof (dr_mbox_msg_t),
4117		    (caddr_t)NULL, 0);
4118
4119		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4120
4121		/*
4122		 * we clear the connected flag just in case it would have
4123		 * been set by a concurrent drmach_board_status() thread
4124		 * before the UNCLAIM completed.
4125		 */
4126		bp->connected = 0;
4127		goto out;
4128	}
4129
4130	/*
4131	 * Now that the board has been successfully attached, obtain
4132	 * platform-specific DIMM serial id information for the board.
4133	 */
4134	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4135	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4136		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4137	}
4138
4139out:
4140	if (gdcd != NULL)
4141		drmach_gdcd_dispose(gdcd);
4142
4143	return (err);
4144}
4145
4146static void
4147drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4148{
4149	static char		*axq_name = "address-extender-queue";
4150	static dev_info_t	*axq_dip = NULL;
4151	static int		 axq_exp = -1;
4152	static int		 axq_slot;
4153	int			 e, s, slice;
4154
4155	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4156
4157	e = DRMACH_BNUM2EXP(bp->bnum);
4158	if (invalidate) {
4159		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4160
4161		/* invalidate cached casm value */
4162		drmach_slice_table[e] = 0;
4163
4164		/* invalidate cached axq info if for same exp */
4165		if (e == axq_exp && axq_dip) {
4166			ndi_rele_devi(axq_dip);
4167			axq_dip = NULL;
4168		}
4169	}
4170
4171	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4172		int i, portid;
4173
4174		/* search for an attached slot0 axq instance */
4175		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4176			if (axq_dip)
4177				ndi_rele_devi(axq_dip);
4178			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4179			if (axq_dip && DDI_CF2(axq_dip)) {
4180				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4181				    DDI_PROP_DONTPASS, "portid", -1);
4182				if (portid == -1) {
4183					DRMACH_PR("cant get portid of axq "
4184					    "instance %d\n", i);
4185					continue;
4186				}
4187
4188				axq_exp = (portid >> 5) & 0x1f;
4189				axq_slot = portid & 1;
4190
4191				if (invalidate && axq_exp == e)
4192					continue;
4193
4194				if (axq_slot == 0)
4195					break;	/* found */
4196			}
4197		}
4198
4199		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4200			if (axq_dip) {
4201				ndi_rele_devi(axq_dip);
4202				axq_dip = NULL;
4203			}
4204			DRMACH_PR("drmach_slice_table_update: failed to "
4205			    "update axq dip\n");
4206			return;
4207		}
4208
4209	}
4210
4211	ASSERT(axq_dip);
4212	ASSERT(axq_slot == 0);
4213
4214	if (invalidate)
4215		return;
4216
4217	s = DRMACH_BNUM2SLOT(bp->bnum);
4218	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n", axq_exp, axq_slot,
4219	    e, s);
4220
4221	/* invalidate entry */
4222	drmach_slice_table[e] &= ~0x20;
4223
4224	/*
4225	 * find a slice that routes to expander e. If no match
4226	 * is found, drmach_slice_table[e] will remain invalid.
4227	 *
4228	 * The CASM is a routing table indexed by slice number.
4229	 * Each element in the table contains permission bits,
4230	 * a destination expander number and a valid bit. The
4231	 * valid bit must true for the element to be meaningful.
4232	 *
4233	 * CASM entry structure
4234	 *   Bits 15..6 ignored
4235	 *   Bit  5	valid
4236	 *   Bits 0..4	expander number
4237	 *
4238	 * NOTE: the for loop is really enumerating the range of slices,
4239	 * which is ALWAYS equal to the range of expanders. Hence,
4240	 * AXQ_MAX_EXP is okay to use in this loop.
4241	 */
4242	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4243		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4244
4245		if ((casm & 0x20) && (casm & 0x1f) == e)
4246			drmach_slice_table[e] = 0x20 | slice;
4247	}
4248}
4249
4250/*
4251 * Get base and bound PAs for slot 1 board lpa programming
4252 * If a cpu/mem board is present in the same expander, use slice
4253 * information corresponding to the CASM.  Otherwise, set base and
4254 * bound PAs to 0.
4255 */
4256static void
4257drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4258{
4259	drmachid_t s0id;
4260
4261	ASSERT(mutex_owned(&drmach_slice_table_lock));
4262	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4263
4264	*basep = *boundp = 0;
4265	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4266	    s0id != 0) {
4267
4268		uint32_t slice;
4269		if ((slice = drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4270		    & 0x20) {
4271			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4272			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4273		}
4274	}
4275}
4276
4277
4278/*
4279 * Reprogram slot 1 lpa's as required.
4280 * The purpose of this routine is maintain the LPA settings of the devices
4281 * in slot 1. To date we know Schizo and Cheetah are the only devices that
4282 * require this attention. The LPA setting must match the slice field in the
4283 * CASM element for the local expander. This field is guaranteed to be
4284 * programmed in accordance with the cacheable address space on the slot 0
4285 * board of the local expander. If no memory is present on the slot 0 board,
4286 * there is no cacheable address space and, hence, the CASM slice field will
4287 * be zero or its valid bit will be false (or both).
4288 */
4289
4290static void
4291drmach_slot1_lpa_set(drmach_board_t *bp)
4292{
4293	drmachid_t	id;
4294	drmach_board_t	*s1bp = NULL;
4295	int		rv, idx, is_maxcat = 1;
4296	uint64_t	last_scsr_pa = 0;
4297	uint64_t	new_basepa, new_boundpa;
4298
4299	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4300		s1bp = bp;
4301		if (s1bp->devices == NULL) {
4302			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4303			    bp->bnum);
4304			return;
4305		}
4306	} else {
4307		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4308		/* nothing to do when board is not found or has no devices */
4309		s1bp = id;
4310		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4311			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4312			    bp->bnum + 1);
4313			return;
4314		}
4315		ASSERT(DRMACH_IS_BOARD_ID(id));
4316	}
4317	mutex_enter(&drmach_slice_table_lock);
4318	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4319	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4320	    s1bp->bnum, new_basepa, new_boundpa);
4321
4322	rv = drmach_array_first(s1bp->devices, &idx, &id);
4323	while (rv == 0) {
4324		if (DRMACH_IS_IO_ID(id)) {
4325			drmach_io_t *io = id;
4326
4327			is_maxcat = 0;
4328
4329			/*
4330			 * Skip all non-Schizo IO devices (only IO nodes
4331			 * that are Schizo devices have non-zero scsr_pa).
4332			 * Filter out "other" leaf to avoid writing to the
4333			 * same Schizo Control/Status Register twice.
4334			 */
4335			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4336				uint64_t scsr;
4337
4338				scsr  = lddphysio(io->scsr_pa);
4339				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4340				    scsr);
4341				scsr &= ~(DRMACH_LPA_BASE_MASK |
4342				    DRMACH_LPA_BND_MASK);
4343				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4344				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4345
4346				stdphysio(io->scsr_pa, scsr);
4347				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4348				    scsr);
4349
4350				last_scsr_pa = io->scsr_pa;
4351			}
4352		}
4353		rv = drmach_array_next(s1bp->devices, &idx, &id);
4354	}
4355
4356	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4357		extern xcfunc_t	drmach_set_lpa;
4358
4359		DRMACH_PR("reprogramming maxcat lpa's");
4360
4361		mutex_enter(&cpu_lock);
4362		rv = drmach_array_first(s1bp->devices, &idx, &id);
4363		while (rv == 0 && id != NULL) {
4364			if (DRMACH_IS_CPU_ID(id)) {
4365				int ntries;
4366				processorid_t cpuid;
4367
4368				cpuid = ((drmach_cpu_t *)id)->cpuid;
4369
4370				/*
4371				 * Check for unconfigured or powered-off
4372				 * MCPUs.  If CPU_READY flag is clear, the
4373				 * MCPU cannot be xcalled.
4374				 */
4375				if ((cpu[cpuid] == NULL) ||
4376				    (cpu[cpuid]->cpu_flags &
4377				    CPU_READY) == 0) {
4378
4379					rv = drmach_array_next(s1bp->devices,
4380					    &idx, &id);
4381					continue;
4382				}
4383
4384				/*
4385				 * XXX CHEETAH SUPPORT
4386				 * for cheetah, we need to clear iocage
4387				 * memory since it will be used for e$ flush
4388				 * in drmach_set_lpa.
4389				 */
4390				if (drmach_is_cheetah) {
4391					mutex_enter(&drmach_iocage_lock);
4392					while (drmach_iocage_is_busy)
4393						cv_wait(&drmach_iocage_cv,
4394						    &drmach_iocage_lock);
4395					drmach_iocage_is_busy = 1;
4396					drmach_iocage_mem_scrub(ecache_size *
4397					    2);
4398					mutex_exit(&drmach_iocage_lock);
4399				}
4400
4401				/*
4402				 * drmach_slice_table[*]
4403				 *	bit 5	valid
4404				 *	bit 0:4	slice number
4405				 *
4406				 * drmach_xt_mb[*] format for drmach_set_lpa
4407				 *	bit 7	valid
4408				 *	bit 6	set null LPA
4409				 *			(overrides bits 0:4)
4410				 *	bit 0:4	slice number
4411				 *
4412				 * drmach_set_lpa derives processor CBASE and
4413				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4414				 * If bit 6 is set, then CBASE = CBND = 0.
4415				 * Otherwise, CBASE = slice number;
4416				 * CBND = slice number + 1.
4417				 * No action is taken if bit 7 is zero.
4418				 */
4419
4420				mutex_enter(&drmach_xt_mb_lock);
4421				bzero((void *)drmach_xt_mb,
4422				    drmach_xt_mb_size);
4423
4424				if (new_basepa == 0 && new_boundpa == 0)
4425					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4426				else
4427					drmach_xt_mb[cpuid] = 0x80 |
4428					    DRMACH_PA_TO_SLICE(new_basepa);
4429
4430				drmach_xt_ready = 0;
4431
4432				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4433
4434				ntries = drmach_cpu_ntries;
4435				while (!drmach_xt_ready && ntries) {
4436					DELAY(drmach_cpu_delay);
4437					ntries--;
4438				}
4439				mutex_exit(&drmach_xt_mb_lock);
4440				drmach_xt_ready = 0;
4441
4442				/*
4443				 * XXX CHEETAH SUPPORT
4444				 * for cheetah, we need to clear iocage
4445				 * memory since it was used for e$ flush
4446				 * in performed drmach_set_lpa.
4447				 */
4448				if (drmach_is_cheetah) {
4449					mutex_enter(&drmach_iocage_lock);
4450					drmach_iocage_mem_scrub(ecache_size *
4451					    2);
4452					drmach_iocage_is_busy = 0;
4453					cv_signal(&drmach_iocage_cv);
4454					mutex_exit(&drmach_iocage_lock);
4455				}
4456			}
4457			rv = drmach_array_next(s1bp->devices, &idx, &id);
4458		}
4459		mutex_exit(&cpu_lock);
4460	}
4461	mutex_exit(&drmach_slice_table_lock);
4462}
4463
4464/*
4465 * Return the number of connected Panther boards in the domain.
4466 */
4467static int
4468drmach_panther_boards(void)
4469{
4470	int		rv;
4471	int		b_idx;
4472	drmachid_t	b_id;
4473	drmach_board_t	*bp;
4474	int		npanther = 0;
4475
4476	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4477	while (rv == 0) {
4478		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4479		bp = b_id;
4480
4481		if (IS_PANTHER(bp->cpu_impl))
4482			npanther++;
4483
4484		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4485	}
4486
4487	return (npanther);
4488}
4489
4490/*ARGSUSED*/
4491sbd_error_t *
4492drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4493{
4494	drmach_board_t	*bp;
4495	dr_mbox_msg_t	*obufp;
4496	sbd_error_t	*err = NULL;
4497
4498	sc_gptwocfg_cookie_t	scc;
4499
4500	if (!DRMACH_IS_BOARD_ID(id))
4501		return (drerr_new(0, ESTC_INAPPROP, NULL));
4502	bp = id;
4503
4504	/*
4505	 * Build the casm info portion of the UNCLAIM message.
4506	 * This must be done prior to calling for saf configurator
4507	 * deprobe, to ensure that the associated axq instance
4508	 * is not detached.
4509	 */
4510	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4511	mutex_enter(&drmach_slice_table_lock);
4512	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4513
4514	/*
4515	 * If disconnecting slot 0 board, update the casm slice table
4516	 * info now, for use by drmach_slot1_lpa_set()
4517	 */
4518	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4519		drmach_slice_table_update(bp, 1);
4520
4521	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4522	mutex_exit(&drmach_slice_table_lock);
4523
4524	/*
4525	 * Update LPA information for slot1 board
4526	 */
4527	drmach_slot1_lpa_set(bp);
4528
4529	/* disable and flush CDC */
4530	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4531		axq_cdc_enable_all();	/* paranoia */
4532		err = DRMACH_INTERNAL_ERROR();
4533	}
4534
4535	/*
4536	 * call saf configurator for deprobe
4537	 * It's done now before sending an UNCLAIM message because
4538	 * IKP will probe boards it doesn't know about <present at boot>
4539	 * prior to unprobing them.  If this happens after sending the
4540	 * UNCLAIM, it will cause a dstop for domain transgression error.
4541	 */
4542
4543	if (!err) {
4544		scc = sc_unprobe_board(bp->bnum);
4545		axq_cdc_enable_all();
4546		if (scc != NULL) {
4547			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4548		}
4549	}
4550
4551	/*
4552	 * If disconnecting a board from a Panther domain, wait a fixed-
4553	 * time delay for pending Safari transactions to complete on the
4554	 * disconnecting board's processors.  The bus sync list read used
4555	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4556	 * transactions assumes no read-bypass-write mode for all memory
4557	 * controllers.  Since Panther supports read-bypass-write, a
4558	 * delay is used that is slightly larger than the maximum Safari
4559	 * timeout value in the Safari/Fireplane Config Reg.
4560	 */
4561	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4562		clock_t	stime = ddi_get_lbolt();
4563
4564		delay(drv_usectohz(drmach_unclaim_usec_delay));
4565
4566		stime = ddi_get_lbolt() - stime;
4567		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4568		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4569	}
4570
4571	if (!err) {
4572		obufp->msgdata.dm_ur.mem_clear = 0;
4573
4574		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4575		    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4576
4577		if (err) {
4578			/*
4579			 * if mailbox timeout or unrecoverable error from SC,
4580			 * board cannot be touched.  Mark the status as
4581			 * unusable.
4582			 */
4583			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4584			    (err->e_code == ESTC_MBXRPLY))
4585				bp->cond = SBD_COND_UNUSABLE;
4586			else {
4587				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4588				    bp->bnum);
4589				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4590				    bp->bnum);
4591				scc = sc_probe_board(bp->bnum);
4592				if (scc == NULL) {
4593					cmn_err(CE_WARN,
4594					"sc_probe_board failed for bnum=%d",
4595					    bp->bnum);
4596				} else {
4597					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4598						mutex_enter(
4599						    &drmach_slice_table_lock);
4600						drmach_slice_table_update(bp,
4601						    0);
4602						mutex_exit(
4603						    &drmach_slice_table_lock);
4604					}
4605					drmach_slot1_lpa_set(bp);
4606				}
4607			}
4608		} else {
4609			bp->connected = 0;
4610			/*
4611			 * Now that the board has been successfully detached,
4612			 * discard platform-specific DIMM serial id information
4613			 * for the board.
4614			 */
4615			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4616			    plat_ecc_capability_sc_get(
4617			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4618				(void) plat_discard_mem_sids(
4619				    DRMACH_BNUM2EXP(bp->bnum));
4620			}
4621		}
4622	}
4623	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4624
4625	return (err);
4626}
4627
4628static int
4629drmach_get_portid(drmach_node_t *np)
4630{
4631	drmach_node_t	pp;
4632	int		portid;
4633	char		type[OBP_MAXPROPNAME];
4634
4635	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4636		return (portid);
4637
4638	/*
4639	 * Get the device_type property to see if we should
4640	 * continue processing this node.
4641	 */
4642	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4643		return (-1);
4644
4645	/*
4646	 * If the device is a CPU without a 'portid' property,
4647	 * it is a CMP core. For such cases, the parent node
4648	 * has the portid.
4649	 */
4650	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4651		if (np->get_parent(np, &pp) != 0)
4652			return (-1);
4653
4654		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4655			return (portid);
4656	}
4657
4658	return (-1);
4659}
4660
4661/*
4662 * This is a helper function to determine if a given
4663 * node should be considered for a dr operation according
4664 * to predefined dr type nodes and the node's name.
4665 * Formal Parameter : The name of a device node.
4666 * Return Value: -1, name does not map to a valid dr type.
4667 *		 A value greater or equal to 0, name is a valid dr type.
4668 */
4669static int
4670drmach_name2type_idx(char *name)
4671{
4672	int 	index, ntypes;
4673
4674	if (name == NULL)
4675		return (-1);
4676
4677	/*
4678	 * Determine how many possible types are currently supported
4679	 * for dr.
4680	 */
4681	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4682
4683	/* Determine if the node's name correspond to a predefined type. */
4684	for (index = 0; index < ntypes; index++) {
4685		if (strcmp(drmach_name2type[index].name, name) == 0)
4686			/* The node is an allowed type for dr. */
4687			return (index);
4688	}
4689
4690	/*
4691	 * If the name of the node does not map to any of the
4692	 * types in the array drmach_name2type then the node is not of
4693	 * interest to dr.
4694	 */
4695	return (-1);
4696}
4697
4698static int
4699drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4700{
4701	drmach_node_t			*node = args->node;
4702	drmach_board_cb_data_t		*data = args->data;
4703	drmach_board_t			*obj = data->obj;
4704
4705	int		rv, portid;
4706	drmachid_t	id;
4707	drmach_device_t	*device;
4708	char	name[OBP_MAXDRVNAME];
4709
4710	portid = drmach_get_portid(node);
4711	if (portid == -1) {
4712		/*
4713		 * if the node does not have a portid property, then
4714		 * by that information alone it is known that drmach
4715		 * is not interested in it.
4716		 */
4717		return (0);
4718	}
4719	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4720
4721	/* The node must have a name */
4722	if (rv)
4723		return (0);
4724
4725	/*
4726	 * Ignore devices whose portid do not map to this board,
4727	 * or that their name property is not mapped to a valid
4728	 * dr device name.
4729	 */
4730	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4731	    (drmach_name2type_idx(name) < 0))
4732		return (0);
4733
4734	/*
4735	 * Create a device data structure from this node data.
4736	 * The call may yield nothing if the node is not of interest
4737	 * to drmach.
4738	 */
4739	data->err = drmach_device_new(node, obj, portid, &id);
4740	if (data->err)
4741		return (-1);
4742	else if (!id) {
4743		/*
4744		 * drmach_device_new examined the node we passed in
4745		 * and determined that it was either one not of
4746		 * interest to drmach or the PIM dr layer.
4747		 * So, it is skipped.
4748		 */
4749		return (0);
4750	}
4751
4752	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4753	if (rv) {
4754		data->err = DRMACH_INTERNAL_ERROR();
4755		return (-1);
4756	}
4757
4758	device = id;
4759
4760#ifdef DEBUG
4761	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4762	if (DRMACH_IS_IO_ID(id))
4763		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4764#endif
4765
4766	data->err = (*data->found)(data->a, device->type, device->unum, id);
4767	return (data->err == NULL ? 0 : -1);
4768}
4769
4770sbd_error_t *
4771drmach_board_find_devices(drmachid_t id, void *a,
4772	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4773{
4774	drmach_board_t		*bp = (drmach_board_t *)id;
4775	sbd_error_t		*err;
4776	int			 max_devices;
4777	int			 rv;
4778	drmach_board_cb_data_t	data;
4779
4780	if (!DRMACH_IS_BOARD_ID(id))
4781		return (drerr_new(0, ESTC_INAPPROP, NULL));
4782
4783	max_devices  = plat_max_cpu_units_per_board();
4784	max_devices += plat_max_mem_units_per_board();
4785	max_devices += plat_max_io_units_per_board();
4786
4787	bp->devices = drmach_array_new(0, max_devices);
4788
4789	if (bp->tree == NULL)
4790		bp->tree = drmach_node_new();
4791
4792	data.obj = bp;
4793	data.ndevs = 0;
4794	data.found = found;
4795	data.a = a;
4796	data.err = NULL;
4797
4798	mutex_enter(&drmach_slice_table_lock);
4799	mutex_enter(&drmach_bus_sync_lock);
4800
4801	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4802
4803	drmach_slice_table_update(bp, 0);
4804	drmach_bus_sync_list_update();
4805
4806	mutex_exit(&drmach_bus_sync_lock);
4807	mutex_exit(&drmach_slice_table_lock);
4808
4809	if (rv == 0) {
4810		err = NULL;
4811		drmach_slot1_lpa_set(bp);
4812	} else {
4813		drmach_array_dispose(bp->devices, drmach_device_dispose);
4814		bp->devices = NULL;
4815
4816		if (data.err)
4817			err = data.err;
4818		else
4819			err = DRMACH_INTERNAL_ERROR();
4820	}
4821
4822	return (err);
4823}
4824
4825int
4826drmach_board_lookup(int bnum, drmachid_t *id)
4827{
4828	int	rv = 0;
4829
4830	if (!drmach_initialized && drmach_init() == -1) {
4831		*id = 0;
4832		return (-1);
4833	}
4834	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4835	if (drmach_array_get(drmach_boards, bnum, id)) {
4836		*id = 0;
4837		rv = -1;
4838	} else {
4839		caddr_t		obufp;
4840		dr_showboard_t	shb;
4841		sbd_error_t	*err = NULL;
4842		drmach_board_t	*bp;
4843
4844		bp = *id;
4845
4846		if (bp)
4847			rw_downgrade(&drmach_boards_rwlock);
4848
4849		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4850		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4851		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4852		    sizeof (dr_showboard_t));
4853		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4854
4855		if (err) {
4856			if (err->e_code == ESTC_UNAVAILABLE) {
4857				*id = 0;
4858				rv = -1;
4859			}
4860			sbd_err_clear(&err);
4861		} else {
4862			if (!bp)
4863				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4864			bp->connected = (shb.bd_assigned && shb.bd_active);
4865			bp->empty = shb.slot_empty;
4866
4867			switch (shb.test_status) {
4868				case DR_TEST_STATUS_UNKNOWN:
4869				case DR_TEST_STATUS_IPOST:
4870				case DR_TEST_STATUS_ABORTED:
4871					bp->cond = SBD_COND_UNKNOWN;
4872					break;
4873				case DR_TEST_STATUS_PASSED:
4874					bp->cond = SBD_COND_OK;
4875					break;
4876				case DR_TEST_STATUS_FAILED:
4877					bp->cond = SBD_COND_FAILED;
4878					break;
4879				default:
4880					bp->cond = SBD_COND_UNKNOWN;
4881				DRMACH_PR("Unknown test status=0x%x from SC\n",
4882				    shb.test_status);
4883					break;
4884			}
4885			(void) strncpy(bp->type, shb.board_type,
4886			    sizeof (bp->type));
4887			bp->assigned = shb.bd_assigned;
4888			bp->powered = shb.power_on;
4889		}
4890	}
4891	rw_exit(&drmach_boards_rwlock);
4892	return (rv);
4893}
4894
4895sbd_error_t *
4896drmach_board_name(int bnum, char *buf, int buflen)
4897{
4898	(void) snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4899	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4900
4901	return (NULL);
4902}
4903
4904sbd_error_t *
4905drmach_board_poweroff(drmachid_t id)
4906{
4907	drmach_board_t	*bp;
4908	sbd_error_t	*err;
4909	drmach_status_t	 stat;
4910
4911	if (!DRMACH_IS_BOARD_ID(id))
4912		return (drerr_new(0, ESTC_INAPPROP, NULL));
4913	bp = id;
4914
4915	err = drmach_board_status(id, &stat);
4916	if (!err) {
4917		if (stat.configured || stat.busy)
4918			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4919		else {
4920			caddr_t	obufp;
4921
4922			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4923			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4924			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4925			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4926			if (!err)
4927				bp->powered = 0;
4928		}
4929	}
4930	return (err);
4931}
4932
4933sbd_error_t *
4934drmach_board_poweron(drmachid_t id)
4935{
4936	drmach_board_t	*bp;
4937	caddr_t		obufp;
4938	sbd_error_t	*err;
4939
4940	if (!DRMACH_IS_BOARD_ID(id))
4941		return (drerr_new(0, ESTC_INAPPROP, NULL));
4942	bp = id;
4943
4944	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4945	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
4946	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4947	if (!err)
4948		bp->powered = 1;
4949
4950	kmem_free(obufp, sizeof (dr_proto_hdr_t));
4951
4952	return (err);
4953}
4954
4955static sbd_error_t *
4956drmach_board_release(drmachid_t id)
4957{
4958	if (!DRMACH_IS_BOARD_ID(id))
4959		return (drerr_new(0, ESTC_INAPPROP, NULL));
4960	return (NULL);
4961}
4962
4963sbd_error_t *
4964drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
4965{
4966	drmach_board_t		*bp;
4967	drmach_device_t		*dp[MAX_CORES_PER_CMP];
4968	dr_mbox_msg_t		*obufp;
4969	sbd_error_t		*err;
4970	dr_testboard_reply_t	tbr;
4971	int			cpylen;
4972	char			*copts;
4973	int			is_io;
4974	cpu_flag_t		oflags[MAX_CORES_PER_CMP];
4975
4976	if (!DRMACH_IS_BOARD_ID(id))
4977		return (drerr_new(0, ESTC_INAPPROP, NULL));
4978	bp = id;
4979
4980	/*
4981	 * If the board is an I/O or MAXCAT board, setup I/O cage for
4982	 * testing. Slot 1 indicates I/O or MAXCAT board.
4983	 */
4984
4985	is_io = DRMACH_BNUM2SLOT(bp->bnum);
4986
4987	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4988
4989	if (force)
4990		obufp->msgdata.dm_tb.force = 1;
4991
4992	obufp->msgdata.dm_tb.immediate = 1;
4993
4994	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
4995		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
4996		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
4997	}
4998
4999	if (is_io) {
5000		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
5001
5002		if (err) {
5003			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5004			return (err);
5005		}
5006	}
5007
5008	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5009	    sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5010
5011	if (!err)
5012		bp->cond = SBD_COND_OK;
5013	else
5014		bp->cond = SBD_COND_UNKNOWN;
5015
5016	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5017		/* examine test status */
5018		switch (tbr.test_status) {
5019			case DR_TEST_STATUS_IPOST:
5020				bp->cond = SBD_COND_UNKNOWN;
5021				err = drerr_new(0, ESTC_TEST_IN_PROGRESS, NULL);
5022				break;
5023			case DR_TEST_STATUS_UNKNOWN:
5024				bp->cond = SBD_COND_UNKNOWN;
5025				err = drerr_new(1,
5026				    ESTC_TEST_STATUS_UNKNOWN, NULL);
5027				break;
5028			case DR_TEST_STATUS_FAILED:
5029				bp->cond = SBD_COND_FAILED;
5030				err = drerr_new(1, ESTC_TEST_FAILED, NULL);
5031				break;
5032			case DR_TEST_STATUS_ABORTED:
5033				bp->cond = SBD_COND_UNKNOWN;
5034				err = drerr_new(1, ESTC_TEST_ABORTED, NULL);
5035				break;
5036			default:
5037				bp->cond = SBD_COND_UNKNOWN;
5038				err = drerr_new(1, ESTC_TEST_RESULT_UNKNOWN,
5039				    NULL);
5040				break;
5041		}
5042	}
5043
5044	/*
5045	 * If I/O cage test was performed, check for availability of the
5046	 * cpu used.  If cpu has been returned, it's OK to proceed with
5047	 * reconfiguring it for use.
5048	 */
5049	if (is_io) {
5050		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5051		    tbr.cpu_recovered);
5052		DRMACH_PR("drmach_board_test: port id: %d",
5053		    tbr.cpu_portid);
5054
5055		/*
5056		 * Check the cpu_recovered flag in the testboard reply, or
5057		 * if the testboard request message was not sent to SMS due
5058		 * to an mboxsc_putmsg() failure, it's OK to recover the
5059		 * cpu since hpost hasn't touched it.
5060		 */
5061		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5062		    obufp->msgdata.dm_tb.cpu_portid) ||
5063		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5064
5065			int i;
5066
5067			mutex_enter(&cpu_lock);
5068			for (i = 0; i < MAX_CORES_PER_CMP; i++) {
5069				if (dp[i] != NULL) {
5070					(void) drmach_iocage_cpu_return(dp[i],
5071					    oflags[i]);
5072				}
5073			}
5074			mutex_exit(&cpu_lock);
5075		} else {
5076			cmn_err(CE_WARN, "Unable to recover port id %d "
5077			    "after I/O cage test: cpu_recovered=%d, "
5078			    "returned portid=%d",
5079			    obufp->msgdata.dm_tb.cpu_portid,
5080			    tbr.cpu_recovered, tbr.cpu_portid);
5081		}
5082		(void) drmach_iocage_mem_return(&tbr);
5083	}
5084	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5085
5086	return (err);
5087}
5088
5089sbd_error_t *
5090drmach_board_unassign(drmachid_t id)
5091{
5092	drmach_board_t	*bp;
5093	sbd_error_t	*err;
5094	drmach_status_t	 stat;
5095	caddr_t		obufp;
5096
5097	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5098
5099	if (!DRMACH_IS_BOARD_ID(id)) {
5100		rw_exit(&drmach_boards_rwlock);
5101		return (drerr_new(0, ESTC_INAPPROP, NULL));
5102	}
5103	bp = id;
5104
5105	err = drmach_board_status(id, &stat);
5106	if (err) {
5107		rw_exit(&drmach_boards_rwlock);
5108		return (err);
5109	}
5110
5111	if (stat.configured || stat.busy) {
5112		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5113	} else {
5114
5115		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5116		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5117		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5118		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5119		if (!err) {
5120			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5121				err = DRMACH_INTERNAL_ERROR();
5122			else
5123				drmach_board_dispose(bp);
5124		}
5125	}
5126	rw_exit(&drmach_boards_rwlock);
5127	return (err);
5128}
5129
5130static sbd_error_t *
5131drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5132{
5133	int		len;
5134	drmach_reg_t	reg;
5135	drmach_node_t	pp;
5136	drmach_node_t	*np = dp->node;
5137
5138	/*
5139	 * If the node does not have a portid property,
5140	 * it represents a CMP device. For a CMP, the reg
5141	 * property of the parent holds the information of
5142	 * interest.
5143	 */
5144	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5145
5146		if (dp->node->get_parent(dp->node, &pp) != 0) {
5147			return (DRMACH_INTERNAL_ERROR());
5148		}
5149		np = &pp;
5150	}
5151
5152	if (np->n_getproplen(np, "reg", &len) != 0)
5153		return (DRMACH_INTERNAL_ERROR());
5154
5155	if (len != sizeof (reg))
5156		return (DRMACH_INTERNAL_ERROR());
5157
5158	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5159		return (DRMACH_INTERNAL_ERROR());
5160
5161	/* reassemble 64-bit base address */
5162	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5163
5164	return (NULL);
5165}
5166
5167static void
5168drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5169{
5170	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5171	uint_t		*reg_read = (uint_t *)arg2;
5172
5173	*saf_config_reg = lddsafconfig();
5174	*reg_read = 0x1;
5175}
5176
5177/*
5178 * A return value of 1 indicates success and 0 indicates a failure
5179 */
5180static int
5181drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5182{
5183
5184	int 	rv = 0x0;
5185
5186	*scr = 0x0;
5187
5188	/*
5189	 * Confirm cpu was in ready set when xc was issued.
5190	 * This is done by verifying rv which is
5191	 * set to 0x1 when xc_one is successful.
5192	 */
5193	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5194	    (uint64_t)scr, (uint64_t)&rv);
5195
5196	return (rv);
5197
5198}
5199
5200static sbd_error_t *
5201drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5202{
5203	drmach_node_t	*np;
5204
5205	np = cp->dev.node;
5206
5207	/*
5208	 * If a CPU does not have a portid property, it must
5209	 * be a CMP device with a cpuid property.
5210	 */
5211	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5212
5213		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5214			return (DRMACH_INTERNAL_ERROR());
5215		}
5216	}
5217
5218	return (NULL);
5219}
5220
5221/* Starcat CMP core id is bit 2 of the cpuid */
5222#define	DRMACH_COREID_MASK	(1u << 2)
5223#define	DRMACH_CPUID2SRAM_IDX(id) \
5224		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5225
5226static sbd_error_t *
5227drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5228{
5229	static void drmach_cpu_dispose(drmachid_t);
5230	static sbd_error_t *drmach_cpu_release(drmachid_t);
5231	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
5232
5233	sbd_error_t	*err;
5234	uint64_t	scr_pa;
5235	drmach_cpu_t	*cp = NULL;
5236	pfn_t		pfn;
5237	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5238	int		idx;
5239	int		impl;
5240	processorid_t	cpuid;
5241
5242	err = drmach_read_reg_addr(proto, &scr_pa);
5243	if (err) {
5244		goto fail;
5245	}
5246
5247	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5248	bcopy(proto, &cp->dev, sizeof (cp->dev));
5249	cp->dev.node = drmach_node_dup(proto->node);
5250	cp->dev.cm.isa = (void *)drmach_cpu_new;
5251	cp->dev.cm.dispose = drmach_cpu_dispose;
5252	cp->dev.cm.release = drmach_cpu_release;
5253	cp->dev.cm.status = drmach_cpu_status;
5254	cp->scr_pa = scr_pa;
5255
5256	err = drmach_cpu_read_cpuid(cp, &cpuid);
5257	if (err) {
5258		goto fail;
5259	}
5260
5261	err = drmach_cpu_get_impl(cp, &impl);
5262	if (err) {
5263		goto fail;
5264	}
5265
5266	cp->cpuid = cpuid;
5267	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5268	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5269
5270	/*
5271	 * Init the board cpu type.  Assumes all board cpus are the same type.
5272	 */
5273	if (cp->dev.bp->cpu_impl == 0) {
5274		cp->dev.bp->cpu_impl = impl;
5275	}
5276	ASSERT(cp->dev.bp->cpu_impl == impl);
5277
5278	/*
5279	 * XXX CHEETAH SUPPORT
5280	 * determine if the domain uses Cheetah procs
5281	 */
5282	if (drmach_is_cheetah < 0) {
5283		drmach_is_cheetah = IS_CHEETAH(impl);
5284	}
5285
5286	/*
5287	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5288	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5289	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5290	 * pair. Each cpu uses 8KB according to the following layout:
5291	 *
5292	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5293	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5294	 * Page 2:	even numbered Panther/Jaguar core 1's
5295	 * Page 3:	odd numbered Panther/Jaguar core 1's
5296	 */
5297	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5298	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5299	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5300	pfn = cpu_sram_pa >> PAGESHIFT;
5301
5302	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5303	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5304	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5305	    TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5306	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5307	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5308
5309	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5310	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5311	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5312
5313	(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5314	    cp->dev.type, cp->dev.unum);
5315
5316	*idp = (drmachid_t)cp;
5317	return (NULL);
5318
5319fail:
5320	if (cp) {
5321		drmach_node_dispose(cp->dev.node);
5322		kmem_free(cp, sizeof (*cp));
5323	}
5324
5325	*idp = (drmachid_t)0;
5326	return (err);
5327}
5328
5329static void
5330drmach_cpu_dispose(drmachid_t id)
5331{
5332	drmach_cpu_t	*self;
5333	processorid_t	cpuid;
5334
5335	ASSERT(DRMACH_IS_CPU_ID(id));
5336
5337	self = id;
5338	if (self->dev.node)
5339		drmach_node_dispose(self->dev.node);
5340
5341	cpuid = self->cpuid;
5342	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5343	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5344	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5345	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5346	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5347	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5348
5349	kmem_free(self, sizeof (*self));
5350}
5351
5352static int
5353drmach_cpu_start(struct cpu *cp)
5354{
5355	extern xcfunc_t	drmach_set_lpa;
5356	extern void	restart_other_cpu(int);
5357	int		cpuid = cp->cpu_id;
5358	int		rv, bnum;
5359	drmach_board_t	*bp;
5360
5361	ASSERT(MUTEX_HELD(&cpu_lock));
5362	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5363
5364	cp->cpu_flags &= ~CPU_POWEROFF;
5365
5366	/*
5367	 * NOTE: restart_other_cpu pauses cpus during the
5368	 *	 slave cpu start.  This helps to quiesce the
5369	 *	 bus traffic a bit which makes the tick sync
5370	 *	 routine in the prom more robust.
5371	 */
5372	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5373
5374	if (prom_hotaddcpu(cpuid) != 0) {
5375		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5376		    cpuid);
5377	}
5378
5379	restart_other_cpu(cpuid);
5380
5381	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5382	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5383	if (rv == -1 || bp == NULL) {
5384		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5385		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, (void *)bp);
5386	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5387		int exp;
5388		int ntries;
5389
5390		mutex_enter(&drmach_xt_mb_lock);
5391		mutex_enter(&drmach_slice_table_lock);
5392		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5393
5394		/*
5395		 * drmach_slice_table[*]
5396		 *	bit 5	valid
5397		 *	bit 0:4	slice number
5398		 *
5399		 * drmach_xt_mb[*] format for drmach_set_lpa
5400		 *	bit 7	valid
5401		 *	bit 6	set null LPA (overrides bits 0:4)
5402		 *	bit 0:4	slice number
5403		 *
5404		 * drmach_set_lpa derives processor CBASE and CBND
5405		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5406		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5407		 * number; CBND = slice number + 1.
5408		 * No action is taken if bit 7 is zero.
5409		 */
5410		exp = (cpuid >> 5) & 0x1f;
5411		if (drmach_slice_table[exp] & 0x20) {
5412			drmach_xt_mb[cpuid] = 0x80 |
5413			    (drmach_slice_table[exp] & 0x1f);
5414		} else {
5415			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5416		}
5417
5418		drmach_xt_ready = 0;
5419
5420		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5421
5422		ntries = drmach_cpu_ntries;
5423		while (!drmach_xt_ready && ntries) {
5424			DELAY(drmach_cpu_delay);
5425			ntries--;
5426		}
5427
5428		mutex_exit(&drmach_slice_table_lock);
5429		mutex_exit(&drmach_xt_mb_lock);
5430
5431		DRMACH_PR(
5432		    "waited %d out of %d tries for drmach_set_lpa on cpu%d",
5433		    drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5434		    cp->cpu_id);
5435	}
5436
5437	xt_one(cpuid, vtag_flushpage_tl1, (uint64_t)drmach_cpu_sram_va,
5438	    (uint64_t)ksfmmup);
5439
5440	return (0);
5441}
5442
5443/*
5444 * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5445 * it has been offlined. The function of this routine is to get the cpu
5446 * spinning in a safe place. The requirement is that the system will not
5447 * reference anything on the detaching board (memory and i/o is detached
5448 * elsewhere) and that the CPU not reference anything on any other board
5449 * in the system.  This isolation is required during and after the writes
5450 * to the domain masks to remove the board from the domain.
5451 *
5452 * To accomplish this isolation the following is done:
5453 *	1) Create a locked mapping to the STARDRB data buffer located
5454 *	   in this cpu's sram. There is one TTE per cpu, initialized in
5455 *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5456 *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5457 *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5458 *	   boards. Each STARDRB buffer is logically divided by DR into one
5459 *	   8KB page per cpu (or Jaguar core).
5460 *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5461 *	3) Jump to function now in the cpu sram.
5462 *	   Function will:
5463 *	   3.1) Flush its Ecache (displacement).
5464 *	   3.2) Flush its Dcache with HW mechanism.
5465 *	   3.3) Flush its Icache with HW mechanism.
5466 *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5467 *	   3.5) Set LPA to NULL
5468 *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5469 *	        recovered by drmach_cpu_poweroff().
5470 *	4) Jump into an infinite loop.
5471 */
5472
5473static void
5474drmach_cpu_stop_self(void)
5475{
5476	extern void drmach_shutdown_asm(uint64_t, uint64_t, int, int, uint64_t);
5477	extern void drmach_shutdown_asm_end(void);
5478
5479	tte_t		*tte;
5480	uint_t		*p, *q;
5481	uint64_t	 stack_pointer;
5482
5483	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5484	    (ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5485
5486	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5487	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) && TTE_IS_PRIVILEGED(tte) &&
5488	    TTE_IS_LOCKED(tte));
5489	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
5490	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
5491
5492	/* copy text. standard bcopy not designed to work in nc space */
5493	p = (uint_t *)drmach_cpu_sram_va;
5494	q = (uint_t *)drmach_shutdown_asm;
5495	while (q < (uint_t *)drmach_shutdown_asm_end)
5496		*p++ = *q++;
5497
5498	/* zero to assist debug */
5499	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5500	while (p < q)
5501		*p++ = 0;
5502
5503	/* a parking spot for the stack pointer */
5504	stack_pointer = (uint64_t)q;
5505
5506	/* call copy of drmach_shutdown_asm */
5507	(*(void (*)())drmach_cpu_sram_va)(
5508	    stack_pointer,
5509	    drmach_iocage_paddr,
5510	    cpunodes[CPU->cpu_id].ecache_size,
5511	    cpunodes[CPU->cpu_id].ecache_linesize,
5512	    va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5513}
5514
5515static void
5516drmach_cpu_shutdown_self(void)
5517{
5518	cpu_t		*cp = CPU;
5519	int		cpuid = cp->cpu_id;
5520	extern void	flush_windows(void);
5521
5522	flush_windows();
5523
5524	(void) spl8();
5525
5526	ASSERT(cp->cpu_intr_actv == 0);
5527	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5528	    cp->cpu_thread == cp->cpu_startup_thread);
5529
5530	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5531
5532	drmach_cpu_stop_self();
5533
5534	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5535}
5536
5537static sbd_error_t *
5538drmach_cpu_release(drmachid_t id)
5539{
5540	drmach_cpu_t	*cp;
5541	struct cpu	*cpu;
5542	sbd_error_t	*err;
5543
5544	if (!DRMACH_IS_CPU_ID(id))
5545		return (drerr_new(0, ESTC_INAPPROP, NULL));
5546	cp = id;
5547
5548	ASSERT(MUTEX_HELD(&cpu_lock));
5549
5550	cpu = cpu_get(cp->cpuid);
5551	if (cpu == NULL)
5552		err = DRMACH_INTERNAL_ERROR();
5553	else
5554		err = NULL;
5555
5556	return (err);
5557}
5558
5559static sbd_error_t *
5560drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5561{
5562	drmach_cpu_t	*cp;
5563	drmach_device_t	*dp;
5564
5565	ASSERT(DRMACH_IS_CPU_ID(id));
5566	cp = id;
5567	dp = &cp->dev;
5568
5569	stat->assigned = dp->bp->assigned;
5570	stat->powered = dp->bp->powered;
5571	mutex_enter(&cpu_lock);
5572	stat->configured = (cpu_get(cp->cpuid) != NULL);
5573	mutex_exit(&cpu_lock);
5574	stat->busy = dp->busy;
5575	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
5576	stat->info[0] = '\0';
5577
5578	return (NULL);
5579}
5580
5581sbd_error_t *
5582drmach_cpu_disconnect(drmachid_t id)
5583{
5584	if (!DRMACH_IS_CPU_ID(id))
5585		return (drerr_new(0, ESTC_INAPPROP, NULL));
5586
5587	return (NULL);
5588}
5589
5590sbd_error_t *
5591drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5592{
5593	drmach_cpu_t	*cpu;
5594
5595	if (!DRMACH_IS_CPU_ID(id))
5596		return (drerr_new(0, ESTC_INAPPROP, NULL));
5597	cpu = id;
5598
5599	*cpuid = cpu->cpuid;
5600	return (NULL);
5601}
5602
5603sbd_error_t *
5604drmach_cpu_get_impl(drmachid_t id, int *ip)
5605{
5606	drmach_node_t	*np;
5607	int		impl;
5608
5609	if (!DRMACH_IS_CPU_ID(id))
5610		return (drerr_new(0, ESTC_INAPPROP, NULL));
5611
5612	np = ((drmach_device_t *)id)->node;
5613
5614	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5615		return (DRMACH_INTERNAL_ERROR());
5616	}
5617
5618	*ip = impl;
5619
5620	return (NULL);
5621}
5622
5623/*
5624 * Flush this cpu's ecache, then ensure all outstanding safari
5625 * transactions have retired.
5626 */
5627void
5628drmach_cpu_flush_ecache_sync(void)
5629{
5630	uint64_t *p;
5631
5632	ASSERT(curthread->t_bound_cpu == CPU);
5633
5634	cpu_flush_ecache();
5635
5636	mutex_enter(&drmach_bus_sync_lock);
5637	for (p = drmach_bus_sync_list; *p; p++)
5638		(void) ldphys(*p);
5639	mutex_exit(&drmach_bus_sync_lock);
5640
5641	cpu_flush_ecache();
5642}
5643
5644sbd_error_t *
5645drmach_get_dip(drmachid_t id, dev_info_t **dip)
5646{
5647	drmach_device_t	*dp;
5648
5649	if (!DRMACH_IS_DEVICE_ID(id))
5650		return (drerr_new(0, ESTC_INAPPROP, NULL));
5651	dp = id;
5652
5653	*dip = dp->node->n_getdip(dp->node);
5654	return (NULL);
5655}
5656
5657sbd_error_t *
5658drmach_io_is_attached(drmachid_t id, int *yes)
5659{
5660	drmach_device_t *dp;
5661	dev_info_t	*dip;
5662	int state;
5663
5664	if (!DRMACH_IS_IO_ID(id))
5665		return (drerr_new(0, ESTC_INAPPROP, NULL));
5666	dp = id;
5667
5668	dip = dp->node->n_getdip(dp->node);
5669	if (dip == NULL) {
5670		*yes = 0;
5671		return (NULL);
5672	}
5673
5674	state = ddi_get_devstate(dip);
5675	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5676
5677	return (NULL);
5678}
5679
5680static int
5681drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5682{
5683	char			dtype[OBP_MAXPROPNAME];
5684	int			portid;
5685	uint_t			pci_csr_base;
5686	struct pci_phys_spec	*regbuf = NULL;
5687	int			rv, len;
5688
5689	ASSERT(dip != NULL);
5690	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5691	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5692		return (0);
5693
5694	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5695	    (caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5696
5697		if (strncmp(dtype, "pci", 3) == 0) {
5698
5699			/*
5700			 * Get safari portid. All schizo/xmits 0
5701			 * safari IDs end in 0x1C.
5702			 */
5703			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid",
5704			    &len);
5705
5706			if ((rv != DDI_PROP_SUCCESS) ||
5707			    (len > sizeof (portid)))
5708				return (0);
5709
5710			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5711			    "portid", (caddr_t)&portid, &len);
5712
5713			if (rv != DDI_PROP_SUCCESS)
5714				return (0);
5715
5716			if ((portid & 0x1F) != 0x1C)
5717				return (0);
5718
5719			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5720			    DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5721			    &len) == DDI_PROP_SUCCESS) {
5722
5723				pci_csr_base = regbuf[0].pci_phys_mid &
5724				    PCI_CONF_ADDR_MASK;
5725				kmem_free(regbuf, len);
5726				/*
5727				 * All PCI B-Leafs are at configspace 0x70.0000.
5728				 */
5729				if (pci_csr_base == 0x700000)
5730					return (1);
5731			}
5732		}
5733	}
5734	return (0);
5735}
5736
5737#define	SCHIZO_BINDING_NAME		"pci108e,8001"
5738#define	XMITS_BINDING_NAME		"pci108e,8002"
5739
5740/*
5741 * Verify if the dip is an instance of MAN 'eri'.
5742 */
5743static int
5744drmach_dip_is_man_eri(dev_info_t *dip)
5745{
5746	struct pci_phys_spec	*regbuf = NULL;
5747	dev_info_t		*parent_dip;
5748	char			*name;
5749	uint_t			pci_device;
5750	uint_t			pci_function;
5751	int			len;
5752
5753	if (dip == NULL)
5754		return (0);
5755	/*
5756	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5757	 */
5758	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5759	    ((name = ddi_binding_name(parent_dip)) == NULL))
5760		return (0);
5761	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5762		/*
5763		 * This RIO could be on XMITS, so get the dip to
5764		 * XMITS PCI Leaf.
5765		 */
5766		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5767			return (0);
5768		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5769		    (strcmp(name, XMITS_BINDING_NAME) != 0)) {
5770			return (0);
5771		}
5772	}
5773	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5774		return (0);
5775	/*
5776	 * Finally make sure it is the MAN eri.
5777	 */
5778	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5779	    "reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5780
5781		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5782		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5783		kmem_free(regbuf, len);
5784
5785		/*
5786		 * The network function of the RIO ASIC will always be
5787		 * device 3 and function 1 ("network@3,1").
5788		 */
5789		if ((pci_device == 3) && (pci_function == 1))
5790			return (1);
5791	}
5792	return (0);
5793}
5794
5795typedef struct {
5796	int		iosram_inst;
5797	dev_info_t	*eri_dip;
5798	int		bnum;
5799} drmach_io_inst_t;
5800
5801int
5802drmach_board_find_io_insts(dev_info_t *dip, void *args)
5803{
5804	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5805
5806	int	rv;
5807	int	len;
5808	int	portid;
5809	char	name[OBP_MAXDRVNAME];
5810
5811	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5812
5813	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5814		return (DDI_WALK_CONTINUE);
5815	}
5816
5817	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5818	    "portid", (caddr_t)&portid, &len);
5819	if (rv != DDI_PROP_SUCCESS)
5820		return (DDI_WALK_CONTINUE);
5821
5822	/* ignore devices that are not on this board */
5823	if (drmach_portid2bnum(portid) != ios->bnum)
5824		return (DDI_WALK_CONTINUE);
5825
5826	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5827		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "name", &len);
5828		if (rv == DDI_PROP_SUCCESS) {
5829
5830			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5831			    0, "name",
5832			    (caddr_t)name, &len);
5833			if (rv != DDI_PROP_SUCCESS)
5834				return (DDI_WALK_CONTINUE);
5835
5836			if (strncmp("iosram", name, 6) == 0) {
5837				ios->iosram_inst = ddi_get_instance(dip);
5838				if (ios->eri_dip == NULL)
5839					return (DDI_WALK_CONTINUE);
5840				else
5841					return (DDI_WALK_TERMINATE);
5842			} else {
5843				if (drmach_dip_is_man_eri(dip)) {
5844					ASSERT(ios->eri_dip == NULL);
5845					ndi_hold_devi(dip);
5846					ios->eri_dip = dip;
5847					if (ios->iosram_inst < 0)
5848						return (DDI_WALK_CONTINUE);
5849					else
5850						return (DDI_WALK_TERMINATE);
5851				}
5852			}
5853		}
5854	}
5855	return (DDI_WALK_CONTINUE);
5856}
5857
5858sbd_error_t *
5859drmach_io_pre_release(drmachid_t id)
5860{
5861	drmach_io_inst_t	ios;
5862	drmach_board_t		*bp;
5863	int			rv = 0;
5864	sbd_error_t		*err = NULL;
5865	drmach_device_t		*dp;
5866	dev_info_t		*rdip;
5867	int			circ;
5868
5869	if (!DRMACH_IS_IO_ID(id))
5870		return (drerr_new(0, ESTC_INAPPROP, NULL));
5871	dp = id;
5872	bp = dp->bp;
5873
5874	rdip = dp->node->n_getdip(dp->node);
5875
5876	/* walk device tree to find iosram instance for the board */
5877	ios.iosram_inst = -1;
5878	ios.eri_dip = NULL;
5879	ios.bnum = bp->bnum;
5880
5881	ndi_devi_enter(rdip, &circ);
5882	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5883	    (void *)&ios);
5884
5885	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5886	    ios.bnum, ios.iosram_inst, (void *)ios.eri_dip);
5887	ndi_devi_exit(rdip, circ);
5888
5889	if (ios.eri_dip) {
5890		/*
5891		 * Release hold acquired in drmach_board_find_io_insts()
5892		 */
5893		ndi_rele_devi(ios.eri_dip);
5894	}
5895	if (ios.iosram_inst >= 0) {
5896		/* call for tunnel switch */
5897		do {
5898			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5899			    ios.iosram_inst);
5900			rv = iosram_switchfrom(ios.iosram_inst);
5901			if (rv)
5902				DRMACH_PR("iosram_switchfrom returned %d\n",
5903				    rv);
5904		} while (rv == EAGAIN);
5905
5906		if (rv)
5907			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5908	}
5909	return (err);
5910}
5911
5912sbd_error_t *
5913drmach_io_unrelease(drmachid_t id)
5914{
5915	dev_info_t	*dip;
5916	sbd_error_t	*err = NULL;
5917	drmach_device_t	*dp;
5918
5919	if (!DRMACH_IS_IO_ID(id))
5920		return (drerr_new(0, ESTC_INAPPROP, NULL));
5921	dp = id;
5922
5923	dip = dp->node->n_getdip(dp->node);
5924
5925	if (dip == NULL)
5926		err = DRMACH_INTERNAL_ERROR();
5927	else {
5928		int (*func)(dev_info_t *dip);
5929
5930		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
5931		    0);
5932
5933		if (func) {
5934			drmach_io_inst_t ios;
5935			dev_info_t	*pdip;
5936			int		circ;
5937
5938			/*
5939			 * Walk device tree to find rio dip for the board
5940			 * Since we are not interested in iosram instance here,
5941			 * initialize it to 0, so that the walk terminates as
5942			 * soon as eri dip is found.
5943			 */
5944			ios.iosram_inst = 0;
5945			ios.eri_dip = NULL;
5946			ios.bnum = dp->bp->bnum;
5947
5948			if (pdip = ddi_get_parent(dip)) {
5949				ndi_hold_devi(pdip);
5950				ndi_devi_enter(pdip, &circ);
5951			}
5952			/*
5953			 * Root node doesn't have to be held in any way.
5954			 */
5955			ddi_walk_devs(dip, drmach_board_find_io_insts,
5956			    (void *)&ios);
5957
5958			if (pdip) {
5959				ndi_devi_exit(pdip, circ);
5960				ndi_rele_devi(pdip);
5961			}
5962
5963			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
5964			    ios.bnum, (void *)ios.eri_dip);
5965
5966			if (ios.eri_dip) {
5967				DRMACH_PR("calling man_dr_attach\n");
5968				if ((*func)(ios.eri_dip))
5969					err = drerr_new(0, ESTC_NWSWITCH, NULL);
5970				/*
5971				 * Release hold acquired in
5972				 * drmach_board_find_io_insts()
5973				 */
5974				ndi_rele_devi(ios.eri_dip);
5975			}
5976		} else
5977			DRMACH_PR("man_dr_attach NOT present\n");
5978	}
5979	return (err);
5980}
5981
5982static sbd_error_t *
5983drmach_io_release(drmachid_t id)
5984{
5985	dev_info_t	*dip;
5986	sbd_error_t	*err = NULL;
5987	drmach_device_t	*dp;
5988
5989	if (!DRMACH_IS_IO_ID(id))
5990		return (drerr_new(0, ESTC_INAPPROP, NULL));
5991	dp = id;
5992
5993	dip = dp->node->n_getdip(dp->node);
5994
5995	if (dip == NULL)
5996		err = DRMACH_INTERNAL_ERROR();
5997	else {
5998		int (*func)(dev_info_t *dip);
5999
6000		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
6001		    0);
6002
6003		if (func) {
6004			drmach_io_inst_t ios;
6005			dev_info_t	*pdip;
6006			int		circ;
6007
6008			/*
6009			 * Walk device tree to find rio dip for the board
6010			 * Since we are not interested in iosram instance here,
6011			 * initialize it to 0, so that the walk terminates as
6012			 * soon as eri dip is found.
6013			 */
6014			ios.iosram_inst = 0;
6015			ios.eri_dip = NULL;
6016			ios.bnum = dp->bp->bnum;
6017
6018			if (pdip = ddi_get_parent(dip)) {
6019				ndi_hold_devi(pdip);
6020				ndi_devi_enter(pdip, &circ);
6021			}
6022			/*
6023			 * Root node doesn't have to be held in any way.
6024			 */
6025			ddi_walk_devs(dip, drmach_board_find_io_insts,
6026			    (void *)&ios);
6027
6028			if (pdip) {
6029				ndi_devi_exit(pdip, circ);
6030				ndi_rele_devi(pdip);
6031			}
6032
6033			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6034			    ios.bnum, (void *)ios.eri_dip);
6035
6036			if (ios.eri_dip) {
6037				DRMACH_PR("calling man_dr_detach\n");
6038				if ((*func)(ios.eri_dip))
6039					err = drerr_new(0, ESTC_NWSWITCH, NULL);
6040				/*
6041				 * Release hold acquired in
6042				 * drmach_board_find_io_insts()
6043				 */
6044				ndi_rele_devi(ios.eri_dip);
6045			}
6046		} else
6047			DRMACH_PR("man_dr_detach NOT present\n");
6048	}
6049	return (err);
6050}
6051
6052sbd_error_t *
6053drmach_io_post_release(drmachid_t id)
6054{
6055	char 		*path;
6056	dev_info_t	*rdip;
6057	drmach_device_t	*dp;
6058
6059	if (!DRMACH_IS_DEVICE_ID(id))
6060		return (drerr_new(0, ESTC_INAPPROP, NULL));
6061	dp = id;
6062
6063	rdip = dp->node->n_getdip(dp->node);
6064
6065	/*
6066	 * Always called after drmach_unconfigure() which on Starcat
6067	 * unconfigures the branch but doesn't remove it so the
6068	 * dip must always exist.
6069	 */
6070	ASSERT(rdip);
6071
6072	ASSERT(e_ddi_branch_held(rdip));
6073#ifdef DEBUG
6074	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6075	(void) ddi_pathname(rdip, path);
6076	DRMACH_PR("post_release dip path is: %s\n", path);
6077	kmem_free(path, MAXPATHLEN);
6078#endif
6079
6080	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6081		if (schpc_remove_pci(rdip)) {
6082			DRMACH_PR("schpc_remove_pci failed\n");
6083			return (drerr_new(0, ESBD_OFFLINE, NULL));
6084		} else {
6085			DRMACH_PR("schpc_remove_pci succeeded\n");
6086		}
6087	}
6088
6089	return (NULL);
6090}
6091
6092sbd_error_t *
6093drmach_io_post_attach(drmachid_t id)
6094{
6095	int		circ;
6096	dev_info_t	*dip;
6097	dev_info_t	*pdip;
6098	drmach_device_t	*dp;
6099	drmach_io_inst_t ios;
6100
6101	if (!DRMACH_IS_DEVICE_ID(id))
6102		return (drerr_new(0, ESTC_INAPPROP, NULL));
6103	dp = id;
6104
6105	dip = dp->node->n_getdip(dp->node);
6106
6107	/*
6108	 * We held the branch rooted at dip earlier, so at a minimum the
6109	 * root i.e. dip must be present in the device tree.
6110	 */
6111	ASSERT(dip);
6112
6113	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6114		if (schpc_add_pci(dip)) {
6115			DRMACH_PR("schpc_add_pci failed\n");
6116		} else {
6117			DRMACH_PR("schpc_add_pci succeeded\n");
6118		}
6119	}
6120
6121	/*
6122	 * Walk device tree to find rio dip for the board
6123	 * Since we are not interested in iosram instance here,
6124	 * initialize it to 0, so that the walk terminates as
6125	 * soon as eri dip is found.
6126	 */
6127	ios.iosram_inst = 0;
6128	ios.eri_dip = NULL;
6129	ios.bnum = dp->bp->bnum;
6130
6131	if (pdip = ddi_get_parent(dip)) {
6132		ndi_hold_devi(pdip);
6133		ndi_devi_enter(pdip, &circ);
6134	}
6135	/*
6136	 * Root node doesn't have to be held in any way.
6137	 */
6138	ddi_walk_devs(dip, drmach_board_find_io_insts, (void *)&ios);
6139	if (pdip) {
6140		ndi_devi_exit(pdip, circ);
6141		ndi_rele_devi(pdip);
6142	}
6143
6144	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n",
6145	    ios.bnum, (void *)ios.eri_dip);
6146
6147	if (ios.eri_dip) {
6148		int (*func)(dev_info_t *dip);
6149
6150		func =
6151		    (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6152
6153		if (func) {
6154			DRMACH_PR("calling man_dr_attach\n");
6155			(void) (*func)(ios.eri_dip);
6156		} else {
6157			DRMACH_PR("man_dr_attach NOT present\n");
6158		}
6159
6160		/*
6161		 * Release hold acquired in drmach_board_find_io_insts()
6162		 */
6163		ndi_rele_devi(ios.eri_dip);
6164
6165	}
6166
6167	return (NULL);
6168}
6169
6170static sbd_error_t *
6171drmach_io_status(drmachid_t id, drmach_status_t *stat)
6172{
6173	drmach_device_t *dp;
6174	sbd_error_t	*err;
6175	int		 configured;
6176
6177	ASSERT(DRMACH_IS_IO_ID(id));
6178	dp = id;
6179
6180	err = drmach_io_is_attached(id, &configured);
6181	if (err)
6182		return (err);
6183
6184	stat->assigned = dp->bp->assigned;
6185	stat->powered = dp->bp->powered;
6186	stat->configured = (configured != 0);
6187	stat->busy = dp->busy;
6188	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
6189	stat->info[0] = '\0';
6190
6191	return (NULL);
6192}
6193
6194sbd_error_t *
6195drmach_mem_init_size(drmachid_t id)
6196{
6197	drmach_mem_t	*mp;
6198	sbd_error_t	*err;
6199	gdcd_t		*gdcd;
6200	mem_chunk_t	*chunk;
6201	uint64_t	 chunks, pa, mask, sz;
6202
6203	if (!DRMACH_IS_MEM_ID(id))
6204		return (drerr_new(0, ESTC_INAPPROP, NULL));
6205	mp = id;
6206
6207	err = drmach_mem_get_base_physaddr(id, &pa);
6208	if (err)
6209		return (err);
6210
6211	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6212	pa &= mask;
6213
6214	gdcd = drmach_gdcd_new();
6215	if (gdcd == NULL)
6216		return (DRMACH_INTERNAL_ERROR());
6217
6218	sz = 0;
6219	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6220	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6221	while (chunks-- != 0) {
6222		if ((chunk->mc_base_pa & mask) == pa) {
6223			sz += chunk->mc_mbytes * 1048576;
6224		}
6225
6226		++chunk;
6227	}
6228	mp->nbytes = sz;
6229
6230	drmach_gdcd_dispose(gdcd);
6231	return (NULL);
6232}
6233
6234/*
6235 * Hardware registers are organized into consecutively
6236 * addressed registers.  The reg property's hi and lo fields
6237 * together describe the base address of the register set for
6238 * this memory-controller.  Register descriptions and offsets
6239 * (from the base address) are as follows:
6240 *
6241 * Description				Offset	Size (bytes)
6242 * Memory Timing Control Register I	0x00	8
6243 * Memory Timing Control Register II	0x08	8
6244 * Memory Address Decoding Register I	0x10	8
6245 * Memory Address Decoding Register II	0x18	8
6246 * Memory Address Decoding Register III	0x20	8
6247 * Memory Address Decoding Register IV	0x28	8
6248 * Memory Address Control Register	0x30	8
6249 * Memory Timing Control Register III	0x38	8
6250 * Memory Timing Control Register IV	0x40	8
6251 * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6252 * EMU Activity Status Register		0x50	8 (Panther only)
6253 *
6254 * Only the Memory Address Decoding Register and EMU Activity Status
6255 * Register addresses are needed for DRMACH.
6256 */
6257static sbd_error_t *
6258drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6259{
6260	static void drmach_mem_dispose(drmachid_t);
6261	static sbd_error_t *drmach_mem_release(drmachid_t);
6262	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
6263
6264	sbd_error_t	*err;
6265	uint64_t	 madr_pa;
6266	drmach_mem_t	*mp;
6267	int		 bank, count;
6268
6269	err = drmach_read_reg_addr(proto, &madr_pa);
6270	if (err)
6271		return (err);
6272
6273	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6274	bcopy(proto, &mp->dev, sizeof (mp->dev));
6275	mp->dev.node = drmach_node_dup(proto->node);
6276	mp->dev.cm.isa = (void *)drmach_mem_new;
6277	mp->dev.cm.dispose = drmach_mem_dispose;
6278	mp->dev.cm.release = drmach_mem_release;
6279	mp->dev.cm.status = drmach_mem_status;
6280	mp->madr_pa = madr_pa;
6281
6282	(void) snprintf(mp->dev.cm.name,
6283	    sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6284
6285	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6286		uint64_t madr;
6287
6288		drmach_mem_read_madr(mp, bank, &madr);
6289		if (madr & DRMACH_MC_VALID_MASK) {
6290			count += 1;
6291			break;
6292		}
6293	}
6294
6295	/*
6296	 * If none of the banks had their valid bit set, that means
6297	 * post did not configure this MC to participate in the
6298	 * domain.  So, pretend this node does not exist by returning
6299	 * a drmachid of zero.
6300	 */
6301	if (count == 0) {
6302		/* drmach_mem_dispose frees board mem list */
6303		drmach_node_dispose(mp->dev.node);
6304		kmem_free(mp, sizeof (*mp));
6305		*idp = (drmachid_t)0;
6306		return (NULL);
6307	}
6308
6309	/*
6310	 * Only one mem unit per board is exposed to the
6311	 * PIM layer.  The first mem unit encountered during
6312	 * tree walk is used to represent all mem units on
6313	 * the same board.
6314	 */
6315	if (mp->dev.bp->mem == NULL) {
6316		/* start list of mem units on this board */
6317		mp->dev.bp->mem = mp;
6318
6319		/*
6320		 * force unum to zero since this is the only mem unit
6321		 * that will be visible to the PIM layer.
6322		 */
6323		mp->dev.unum = 0;
6324
6325		/*
6326		 * board memory size kept in this mem unit only
6327		 */
6328		err = drmach_mem_init_size(mp);
6329		if (err) {
6330			mp->dev.bp->mem = NULL;
6331			/* drmach_mem_dispose frees board mem list */
6332			drmach_node_dispose(mp->dev.node);
6333			kmem_free(mp, sizeof (*mp));
6334			*idp = (drmachid_t)0;
6335			return (NULL);
6336		}
6337
6338		/*
6339		 * allow this instance (the first encountered on this board)
6340		 * to be visible to the PIM layer.
6341		 */
6342		*idp = (drmachid_t)mp;
6343	} else {
6344		drmach_mem_t *lp;
6345
6346		/* hide this mem instance behind the first. */
6347		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6348			;
6349		lp->next = mp;
6350
6351		/*
6352		 * hide this instance from the caller.
6353		 * See drmach_board_find_devices_cb() for details.
6354		 */
6355		*idp = (drmachid_t)0;
6356	}
6357
6358	return (NULL);
6359}
6360
6361static void
6362drmach_mem_dispose(drmachid_t id)
6363{
6364	drmach_mem_t *mp, *next;
6365	drmach_board_t *bp;
6366
6367	ASSERT(DRMACH_IS_MEM_ID(id));
6368
6369	mutex_enter(&drmach_bus_sync_lock);
6370
6371	mp = id;
6372	bp = mp->dev.bp;
6373
6374	do {
6375		if (mp->dev.node)
6376			drmach_node_dispose(mp->dev.node);
6377
6378		next = mp->next;
6379		kmem_free(mp, sizeof (*mp));
6380		mp = next;
6381	} while (mp);
6382
6383	bp->mem = NULL;
6384
6385	drmach_bus_sync_list_update();
6386	mutex_exit(&drmach_bus_sync_lock);
6387}
6388
6389sbd_error_t *
6390drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6391{
6392	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6393	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6394	int		rv;
6395
6396	ASSERT(size != 0);
6397
6398	if (!DRMACH_IS_MEM_ID(id))
6399		return (drerr_new(0, ESTC_INAPPROP, NULL));
6400
6401	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
6402	if (rv == ENOMEM) {
6403		cmn_err(CE_WARN, "%lu megabytes not available"
6404		    " to kernel cage", size >> 20);
6405	} else if (rv != 0) {
6406		/* catch this in debug kernels */
6407		ASSERT(0);
6408
6409		cmn_err(CE_WARN, "unexpected kcage_range_add"
6410		    " return value %d", rv);
6411	}
6412
6413	return (NULL);
6414}
6415
6416sbd_error_t *
6417drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6418{
6419	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6420	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6421	int		 rv;
6422
6423	if (!DRMACH_IS_MEM_ID(id))
6424		return (drerr_new(0, ESTC_INAPPROP, NULL));
6425
6426	if (size > 0) {
6427		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6428		if (rv != 0) {
6429			cmn_err(CE_WARN,
6430			    "unexpected kcage_range_delete_post_mem_del"
6431			    " return value %d", rv);
6432			return (DRMACH_INTERNAL_ERROR());
6433		}
6434	}
6435
6436	return (NULL);
6437}
6438
6439sbd_error_t *
6440drmach_mem_disable(drmachid_t id)
6441{
6442	if (!DRMACH_IS_MEM_ID(id))
6443		return (drerr_new(0, ESTC_INAPPROP, NULL));
6444	else
6445		return (NULL);
6446}
6447
6448sbd_error_t *
6449drmach_mem_enable(drmachid_t id)
6450{
6451	if (!DRMACH_IS_MEM_ID(id))
6452		return (drerr_new(0, ESTC_INAPPROP, NULL));
6453	else
6454		return (NULL);
6455}
6456
6457sbd_error_t *
6458drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6459{
6460#define	MB(mb) ((mb) * 1048576ull)
6461
6462	static struct {
6463		uint_t		uk;
6464		uint64_t	segsz;
6465	}  uk2segsz[] = {
6466		{ 0x003,	MB(256)	  },
6467		{ 0x007,	MB(512)	  },
6468		{ 0x00f,	MB(1024)  },
6469		{ 0x01f,	MB(2048)  },
6470		{ 0x03f,	MB(4096)  },
6471		{ 0x07f,	MB(8192)  },
6472		{ 0x0ff,	MB(16384) },
6473		{ 0x1ff,	MB(32768) },
6474		{ 0x3ff,	MB(65536) },
6475		{ 0x7ff,	MB(131072) }
6476	};
6477	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6478
6479#undef MB
6480
6481	uint64_t	 largest_sz = 0;
6482	drmach_mem_t	*mp;
6483
6484	if (!DRMACH_IS_MEM_ID(id))
6485		return (drerr_new(0, ESTC_INAPPROP, NULL));
6486
6487	/* prime the result with a default value */
6488	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6489
6490	for (mp = id; mp; mp = mp->next) {
6491		int bank;
6492
6493		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6494			int		i;
6495			uint_t		uk;
6496			uint64_t	madr;
6497
6498			/* get register value, extract uk and normalize */
6499			drmach_mem_read_madr(mp, bank, &madr);
6500
6501			if (!(madr & DRMACH_MC_VALID_MASK))
6502				continue;
6503
6504			uk = DRMACH_MC_UK(madr);
6505
6506			/* match uk value */
6507			for (i = 0; i < len; i++)
6508				if (uk == uk2segsz[i].uk)
6509					break;
6510
6511			if (i < len) {
6512				uint64_t sz = uk2segsz[i].segsz;
6513
6514				/*
6515				 * remember largest segment size,
6516				 * update mask result
6517				 */
6518				if (sz > largest_sz) {
6519					largest_sz = sz;
6520					*mask = sz - 1;
6521				}
6522			} else {
6523				/*
6524				 * uk not in table, punt using
6525				 * entire slice size. no longer any
6526				 * reason to check other banks.
6527				 */
6528				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6529				return (NULL);
6530			}
6531		}
6532	}
6533
6534	return (NULL);
6535}
6536
6537sbd_error_t *
6538drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6539{
6540	drmach_mem_t *mp;
6541
6542	if (!DRMACH_IS_MEM_ID(id))
6543		return (drerr_new(0, ESTC_INAPPROP, NULL));
6544
6545	*base_addr = (uint64_t)-1;
6546	for (mp = id; mp; mp = mp->next) {
6547		int bank;
6548
6549		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6550			uint64_t addr, madr;
6551
6552			drmach_mem_read_madr(mp, bank, &madr);
6553			if (madr & DRMACH_MC_VALID_MASK) {
6554				addr = DRMACH_MC_UM_TO_PA(madr) |
6555				    DRMACH_MC_LM_TO_PA(madr);
6556
6557				if (addr < *base_addr)
6558					*base_addr = addr;
6559			}
6560		}
6561	}
6562
6563	/* should not happen, but ... */
6564	if (*base_addr == (uint64_t)-1)
6565		return (DRMACH_INTERNAL_ERROR());
6566
6567	return (NULL);
6568}
6569
6570void
6571drmach_bus_sync_list_update(void)
6572{
6573	int		rv, idx, cnt = 0;
6574	drmachid_t	id;
6575
6576	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6577
6578	rv = drmach_array_first(drmach_boards, &idx, &id);
6579	while (rv == 0) {
6580		drmach_board_t		*bp = id;
6581		drmach_mem_t		*mp = bp->mem;
6582
6583		while (mp) {
6584			int bank;
6585
6586			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6587				uint64_t madr;
6588
6589				drmach_mem_read_madr(mp, bank, &madr);
6590				if (madr & DRMACH_MC_VALID_MASK) {
6591					uint64_t pa;
6592
6593					pa  = DRMACH_MC_UM_TO_PA(madr);
6594					pa |= DRMACH_MC_LM_TO_PA(madr);
6595
6596					/*
6597					 * The list is zero terminated.
6598					 * Offset the pa by a doubleword
6599					 * to avoid confusing a pa value of
6600					 * of zero with the terminator.
6601					 */
6602					pa += sizeof (uint64_t);
6603
6604					drmach_bus_sync_list[cnt++] = pa;
6605				}
6606			}
6607
6608			mp = mp->next;
6609		}
6610
6611		rv = drmach_array_next(drmach_boards, &idx, &id);
6612	}
6613
6614	drmach_bus_sync_list[cnt] = 0;
6615}
6616
6617sbd_error_t *
6618drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6619{
6620	sbd_error_t	*err;
6621	struct memlist	*mlist;
6622	gdcd_t		*gdcd;
6623	mem_chunk_t	*chunk;
6624	uint64_t	 chunks, pa, mask;
6625
6626	err = drmach_mem_get_base_physaddr(id, &pa);
6627	if (err)
6628		return (err);
6629
6630	gdcd = drmach_gdcd_new();
6631	if (gdcd == NULL)
6632		return (DRMACH_INTERNAL_ERROR());
6633
6634	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6635	pa &= mask;
6636
6637	mlist = NULL;
6638	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6639	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6640	while (chunks-- != 0) {
6641		if ((chunk->mc_base_pa & mask) == pa) {
6642			mlist = memlist_add_span(mlist, chunk->mc_base_pa,
6643			    chunk->mc_mbytes * 1048576);
6644		}
6645
6646		++chunk;
6647	}
6648
6649	drmach_gdcd_dispose(gdcd);
6650
6651#ifdef DEBUG
6652	DRMACH_PR("GDCD derived memlist:");
6653	memlist_dump(mlist);
6654#endif
6655
6656	*ml = mlist;
6657	return (NULL);
6658}
6659
6660sbd_error_t *
6661drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6662{
6663	drmach_mem_t	*mp;
6664
6665	if (!DRMACH_IS_MEM_ID(id))
6666		return (drerr_new(0, ESTC_INAPPROP, NULL));
6667	mp = id;
6668
6669	ASSERT(mp->nbytes != 0);
6670	*bytes = mp->nbytes;
6671
6672	return (NULL);
6673}
6674
6675sbd_error_t *
6676drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6677{
6678	sbd_error_t	*err;
6679	drmach_device_t	*mp;
6680
6681	if (!DRMACH_IS_MEM_ID(id))
6682		return (drerr_new(0, ESTC_INAPPROP, NULL));
6683	mp = id;
6684
6685	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6686		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6687			err = NULL;
6688			break;
6689
6690		case 1: *bytes = 0;
6691			err = NULL;
6692			break;
6693
6694		default:
6695			err = DRMACH_INTERNAL_ERROR();
6696			break;
6697	}
6698
6699	return (err);
6700}
6701
6702processorid_t drmach_mem_cpu_affinity_nail;
6703
6704processorid_t
6705drmach_mem_cpu_affinity(drmachid_t id)
6706{
6707	drmach_device_t	*mp;
6708	drmach_board_t	*bp;
6709	processorid_t	 cpuid;
6710
6711	if (!DRMACH_IS_MEM_ID(id))
6712		return (CPU_CURRENT);
6713
6714	if (drmach_mem_cpu_affinity_nail) {
6715		cpuid = drmach_mem_cpu_affinity_nail;
6716
6717		if (cpuid < 0 || cpuid > NCPU)
6718			return (CPU_CURRENT);
6719
6720		mutex_enter(&cpu_lock);
6721		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6722			cpuid = CPU_CURRENT;
6723		mutex_exit(&cpu_lock);
6724
6725		return (cpuid);
6726	}
6727
6728	/* try to choose a proc on the target board */
6729	mp = id;
6730	bp = mp->bp;
6731	if (bp->devices) {
6732		int		 rv;
6733		int		 d_idx;
6734		drmachid_t	 d_id;
6735
6736		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6737		while (rv == 0) {
6738			if (DRMACH_IS_CPU_ID(d_id)) {
6739				drmach_cpu_t	*cp = d_id;
6740
6741				mutex_enter(&cpu_lock);
6742				cpuid = cp->cpuid;
6743				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6744					mutex_exit(&cpu_lock);
6745					return (cpuid);
6746				} else {
6747					mutex_exit(&cpu_lock);
6748				}
6749			}
6750
6751			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6752		}
6753	}
6754
6755	/* otherwise, this proc, wherever it is */
6756	return (CPU_CURRENT);
6757}
6758
6759static sbd_error_t *
6760drmach_mem_release(drmachid_t id)
6761{
6762	if (!DRMACH_IS_MEM_ID(id))
6763		return (drerr_new(0, ESTC_INAPPROP, NULL));
6764	return (NULL);
6765}
6766
6767static sbd_error_t *
6768drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6769{
6770	drmach_mem_t	*mp;
6771	sbd_error_t	*err;
6772	uint64_t	 pa, slice_size;
6773	struct memlist	*ml;
6774
6775	ASSERT(DRMACH_IS_MEM_ID(id));
6776	mp = id;
6777
6778	/* get starting physical address of target memory */
6779	err = drmach_mem_get_base_physaddr(id, &pa);
6780	if (err)
6781		return (err);
6782
6783	/* round down to slice boundary */
6784	slice_size = DRMACH_MEM_SLICE_SIZE;
6785	pa &= ~ (slice_size - 1);
6786
6787	/* stop at first span that is in slice */
6788	memlist_read_lock();
6789	for (ml = phys_install; ml; ml = ml->ml_next)
6790		if (ml->ml_address >= pa && ml->ml_address < pa + slice_size)
6791			break;
6792	memlist_read_unlock();
6793
6794	stat->assigned = mp->dev.bp->assigned;
6795	stat->powered = mp->dev.bp->powered;
6796	stat->configured = (ml != NULL);
6797	stat->busy = mp->dev.busy;
6798	(void) strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6799	stat->info[0] = '\0';
6800
6801	return (NULL);
6802}
6803
6804sbd_error_t *
6805drmach_board_deprobe(drmachid_t id)
6806{
6807	drmach_board_t	*bp;
6808	sbd_error_t	*err = NULL;
6809
6810	if (!DRMACH_IS_BOARD_ID(id))
6811		return (drerr_new(0, ESTC_INAPPROP, NULL));
6812	bp = id;
6813
6814	if (bp->tree) {
6815		drmach_node_dispose(bp->tree);
6816		bp->tree = NULL;
6817	}
6818	if (bp->devices) {
6819		drmach_array_dispose(bp->devices, drmach_device_dispose);
6820		bp->devices = NULL;
6821		bp->mem = NULL;  /* TODO: still needed? */
6822	}
6823	return (err);
6824}
6825
6826/*ARGSUSED1*/
6827static sbd_error_t *
6828drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6829{
6830	drmach_device_t	*dp;
6831	uint64_t	val;
6832	int		err = 1;
6833
6834	if (DRMACH_IS_CPU_ID(id)) {
6835		drmach_cpu_t *cp = id;
6836		if (drmach_cpu_read_scr(cp, &val))
6837			err = 0;
6838	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6839		drmach_io_t *io = id;
6840		val = lddphysio(io->scsr_pa);
6841		err = 0;
6842	}
6843	if (err)
6844		return (drerr_new(0, ESTC_INAPPROP, NULL));
6845
6846	dp = id;
6847	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6848	    dp->bp->cm.name,
6849	    dp->cm.name,
6850	    dp->portid,
6851	    (long)(DRMACH_LPA_BASE_TO_PA(val)),
6852	    (long)(DRMACH_LPA_BND_TO_PA(val)));
6853
6854	return (NULL);
6855}
6856
6857/*ARGSUSED*/
6858static sbd_error_t *
6859drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6860{
6861	drmach_board_t		*bp = (drmach_board_t *)id;
6862	sbd_error_t		*err;
6863	sc_gptwocfg_cookie_t	scc;
6864
6865	if (!DRMACH_IS_BOARD_ID(id))
6866		return (drerr_new(0, ESTC_INAPPROP, NULL));
6867
6868	/* do saf configurator stuff */
6869	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6870	scc = sc_probe_board(bp->bnum);
6871	if (scc == NULL) {
6872		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6873		return (err);
6874	}
6875
6876	return (err);
6877}
6878
6879/*ARGSUSED*/
6880static sbd_error_t *
6881drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6882{
6883	drmach_board_t	*bp;
6884	sbd_error_t	*err = NULL;
6885	sc_gptwocfg_cookie_t	scc;
6886
6887	if (!DRMACH_IS_BOARD_ID(id))
6888		return (drerr_new(0, ESTC_INAPPROP, NULL));
6889	bp = id;
6890
6891	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6892	scc = sc_unprobe_board(bp->bnum);
6893	if (scc != NULL) {
6894		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6895	}
6896
6897	if (err == NULL)
6898		err = drmach_board_deprobe(id);
6899
6900	return (err);
6901}
6902
6903static sbd_error_t *
6904drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6905{
6906	_NOTE(ARGUNUSED(id))
6907	_NOTE(ARGUNUSED(opts))
6908
6909	struct memlist	*ml;
6910	uint64_t	src_pa;
6911	uint64_t	dst_pa;
6912	uint64_t	dst;
6913
6914	dst_pa = va_to_pa(&dst);
6915
6916	memlist_read_lock();
6917	for (ml = phys_install; ml; ml = ml->ml_next) {
6918		uint64_t	nbytes;
6919
6920		src_pa = ml->ml_address;
6921		nbytes = ml->ml_size;
6922
6923		while (nbytes != 0ull) {
6924
6925			/* copy 32 bytes at src_pa to dst_pa */
6926			bcopy32_il(src_pa, dst_pa);
6927
6928			/* increment by 32 bytes */
6929			src_pa += (4 * sizeof (uint64_t));
6930
6931			/* decrement by 32 bytes */
6932			nbytes -= (4 * sizeof (uint64_t));
6933		}
6934	}
6935	memlist_read_unlock();
6936
6937	return (NULL);
6938}
6939
6940static sbd_error_t *
6941drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
6942{
6943	_NOTE(ARGUNUSED(opts))
6944
6945	drmach_cpu_t	*cp;
6946
6947	if (!DRMACH_IS_CPU_ID(id))
6948		return (drerr_new(0, ESTC_INAPPROP, NULL));
6949	cp = id;
6950
6951	mutex_enter(&cpu_lock);
6952	(void) drmach_iocage_cpu_return(&(cp->dev),
6953	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
6954	mutex_exit(&cpu_lock);
6955
6956	return (NULL);
6957}
6958
6959/*
6960 * Starcat DR passthrus are for debugging purposes only.
6961 */
6962static struct {
6963	const char	*name;
6964	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
6965} drmach_pt_arr[] = {
6966	{ "showlpa",		drmach_pt_showlpa		},
6967	{ "ikprobe",		drmach_pt_ikprobe		},
6968	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
6969	{ "readmem",		drmach_pt_readmem		},
6970	{ "recovercpu",		drmach_pt_recovercpu		},
6971
6972	/* the following line must always be last */
6973	{ NULL,			NULL				}
6974};
6975
6976/*ARGSUSED*/
6977sbd_error_t *
6978drmach_passthru(drmachid_t id, drmach_opts_t *opts)
6979{
6980	int		i;
6981	sbd_error_t	*err;
6982
6983	i = 0;
6984	while (drmach_pt_arr[i].name != NULL) {
6985		int len = strlen(drmach_pt_arr[i].name);
6986
6987		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
6988			break;
6989
6990		i += 1;
6991	}
6992
6993	if (drmach_pt_arr[i].name == NULL)
6994		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
6995	else
6996		err = (*drmach_pt_arr[i].handler)(id, opts);
6997
6998	return (err);
6999}
7000
7001sbd_error_t *
7002drmach_release(drmachid_t id)
7003{
7004	drmach_common_t *cp;
7005
7006	if (!DRMACH_IS_DEVICE_ID(id))
7007		return (drerr_new(0, ESTC_INAPPROP, NULL));
7008	cp = id;
7009
7010	return (cp->release(id));
7011}
7012
7013sbd_error_t *
7014drmach_status(drmachid_t id, drmach_status_t *stat)
7015{
7016	drmach_common_t *cp;
7017	sbd_error_t	*err;
7018
7019	rw_enter(&drmach_boards_rwlock, RW_READER);
7020
7021	if (!DRMACH_IS_ID(id)) {
7022		rw_exit(&drmach_boards_rwlock);
7023		return (drerr_new(0, ESTC_NOTID, NULL));
7024	}
7025
7026	cp = id;
7027
7028	err = cp->status(id, stat);
7029	rw_exit(&drmach_boards_rwlock);
7030	return (err);
7031}
7032
7033static sbd_error_t *
7034drmach_i_status(drmachid_t id, drmach_status_t *stat)
7035{
7036	drmach_common_t *cp;
7037
7038	if (!DRMACH_IS_ID(id))
7039		return (drerr_new(0, ESTC_NOTID, NULL));
7040	cp = id;
7041
7042	return (cp->status(id, stat));
7043}
7044
7045/*ARGSUSED*/
7046sbd_error_t *
7047drmach_unconfigure(drmachid_t id, int flags)
7048{
7049	drmach_device_t	*dp;
7050	dev_info_t 	*rdip;
7051
7052	char	name[OBP_MAXDRVNAME];
7053	int rv;
7054
7055	/*
7056	 * Since CPU nodes are not configured, it is
7057	 * necessary to skip the unconfigure step as
7058	 * well.
7059	 */
7060	if (DRMACH_IS_CPU_ID(id)) {
7061		return (NULL);
7062	}
7063
7064	for (; id; ) {
7065		dev_info_t	*fdip = NULL;
7066
7067		if (!DRMACH_IS_DEVICE_ID(id))
7068			return (drerr_new(0, ESTC_INAPPROP, NULL));
7069		dp = id;
7070
7071		rdip = dp->node->n_getdip(dp->node);
7072
7073		/*
7074		 * drmach_unconfigure() is always called on a configured branch.
7075		 * So the root of the branch was held earlier and must exist.
7076		 */
7077		ASSERT(rdip);
7078
7079		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7080
7081		rv = dp->node->n_getprop(dp->node,
7082		    "name", name, OBP_MAXDRVNAME);
7083
7084		/* The node must have a name */
7085		if (rv)
7086			return (0);
7087
7088		if (drmach_name2type_idx(name) < 0) {
7089			if (DRMACH_IS_MEM_ID(id)) {
7090				drmach_mem_t	*mp = id;
7091				id = mp->next;
7092			} else {
7093				id = NULL;
7094			}
7095			continue;
7096		}
7097
7098		/*
7099		 * NOTE: FORCE flag is no longer needed under devfs
7100		 */
7101		ASSERT(e_ddi_branch_held(rdip));
7102		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7103			sbd_error_t *err = NULL;
7104			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7105
7106			/*
7107			 * If non-NULL, fdip is returned held and must be
7108			 * released.
7109			 */
7110			if (fdip != NULL) {
7111				(void) ddi_pathname(fdip, path);
7112				ddi_release_devi(fdip);
7113			} else {
7114				(void) ddi_pathname(rdip, path);
7115			}
7116
7117			err = drerr_new(1, ESTC_DRVFAIL, path);
7118
7119			kmem_free(path, MAXPATHLEN);
7120
7121			/*
7122			 * If we were unconfiguring an IO board, a call was
7123			 * made to man_dr_detach.  We now need to call
7124			 * man_dr_attach to regain man use of the eri.
7125			 */
7126			if (DRMACH_IS_IO_ID(id)) {
7127				int (*func)(dev_info_t *dip);
7128
7129				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7130				    ("man_dr_attach", 0);
7131
7132				if (func) {
7133					drmach_io_inst_t ios;
7134					dev_info_t 	*pdip;
7135					int		circ;
7136
7137					/*
7138					 * Walk device tree to find rio dip for
7139					 * the board
7140					 * Since we are not interested in iosram
7141					 * instance here, initialize it to 0, so
7142					 * that the walk terminates as soon as
7143					 * eri dip is found.
7144					 */
7145					ios.iosram_inst = 0;
7146					ios.eri_dip = NULL;
7147					ios.bnum = dp->bp->bnum;
7148
7149					if (pdip = ddi_get_parent(rdip)) {
7150						ndi_hold_devi(pdip);
7151						ndi_devi_enter(pdip, &circ);
7152					}
7153					/*
7154					 * Root node doesn't have to be held in
7155					 * any way.
7156					 */
7157					ASSERT(e_ddi_branch_held(rdip));
7158					ddi_walk_devs(rdip,
7159					    drmach_board_find_io_insts,
7160					    (void *)&ios);
7161
7162					DRMACH_PR("drmach_unconfigure: bnum=%d"
7163					    " eri=0x%p\n",
7164					    ios.bnum, (void *)ios.eri_dip);
7165
7166					if (pdip) {
7167						ndi_devi_exit(pdip, circ);
7168						ndi_rele_devi(pdip);
7169					}
7170
7171					if (ios.eri_dip) {
7172						DRMACH_PR("calling"
7173						    " man_dr_attach\n");
7174						(void) (*func)(ios.eri_dip);
7175						/*
7176						 * Release hold acquired in
7177						 * drmach_board_find_io_insts()
7178						 */
7179						ndi_rele_devi(ios.eri_dip);
7180					}
7181				}
7182			}
7183			return (err);
7184		}
7185
7186		if (DRMACH_IS_MEM_ID(id)) {
7187			drmach_mem_t	*mp = id;
7188			id = mp->next;
7189		} else {
7190			id = NULL;
7191		}
7192	}
7193
7194	return (NULL);
7195}
7196
7197/*
7198 * drmach interfaces to legacy Starfire platmod logic
7199 * linkage via runtime symbol look up, called from plat_cpu_power*
7200 */
7201
7202/*
7203 * Start up a cpu.  It is possible that we're attempting to restart
7204 * the cpu after an UNCONFIGURE in which case the cpu will be
7205 * spinning in its cache.  So, all we have to do is wakeup him up.
7206 * Under normal circumstances the cpu will be coming from a previous
7207 * CONNECT and thus will be spinning in OBP.  In both cases, the
7208 * startup sequence is the same.
7209 */
7210int
7211drmach_cpu_poweron(struct cpu *cp)
7212{
7213	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7214
7215	ASSERT(MUTEX_HELD(&cpu_lock));
7216
7217	if (drmach_cpu_start(cp) != 0)
7218		return (EBUSY);
7219	else
7220		return (0);
7221}
7222
7223int
7224drmach_cpu_poweroff(struct cpu *cp)
7225{
7226	int		ntries;
7227	processorid_t	cpuid;
7228	void		drmach_cpu_shutdown_self(void);
7229
7230	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7231
7232	ASSERT(MUTEX_HELD(&cpu_lock));
7233
7234	/*
7235	 * XXX CHEETAH SUPPORT
7236	 * for cheetah, we need to grab the iocage lock since iocage
7237	 * memory is used for e$ flush.
7238	 */
7239	if (drmach_is_cheetah) {
7240		mutex_enter(&drmach_iocage_lock);
7241		while (drmach_iocage_is_busy)
7242			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7243		drmach_iocage_is_busy = 1;
7244		drmach_iocage_mem_scrub(ecache_size * 2);
7245		mutex_exit(&drmach_iocage_lock);
7246	}
7247
7248	cpuid = cp->cpu_id;
7249
7250	/*
7251	 * Set affinity to ensure consistent reading and writing of
7252	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7253	 * the shutdown of the target CPU.
7254	 */
7255	affinity_set(CPU->cpu_id);
7256
7257	/*
7258	 * Capture all CPUs (except for detaching proc) to prevent
7259	 * crosscalls to the detaching proc until it has cleared its
7260	 * bit in cpu_ready_set.
7261	 *
7262	 * The CPUs remain paused and the prom_mutex is known to be free.
7263	 * This prevents blocking when doing prom IEEE-1275 calls at a
7264	 * high PIL level.
7265	 */
7266	promsafe_pause_cpus();
7267
7268	/*
7269	 * Quiesce interrupts on the target CPU. We do this by setting
7270	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7271	 * prevent it from receiving cross calls and cross traps.
7272	 * This prevents the processor from receiving any new soft interrupts.
7273	 */
7274	mp_cpu_quiesce(cp);
7275
7276	(void) prom_hotremovecpu(cpuid);
7277
7278	start_cpus();
7279
7280	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7281	drmach_xt_mb[cpuid] = 0x80;
7282
7283	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7284	    (uint64_t)drmach_cpu_shutdown_self, NULL);
7285
7286	ntries = drmach_cpu_ntries;
7287	while (drmach_xt_mb[cpuid] && ntries) {
7288		DELAY(drmach_cpu_delay);
7289		ntries--;
7290	}
7291
7292	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7293
7294	membar_sync();			/* make sure copy-back retires */
7295
7296	affinity_clear();
7297
7298	/*
7299	 * XXX CHEETAH SUPPORT
7300	 */
7301	if (drmach_is_cheetah) {
7302		mutex_enter(&drmach_iocage_lock);
7303		drmach_iocage_mem_scrub(ecache_size * 2);
7304		drmach_iocage_is_busy = 0;
7305		cv_signal(&drmach_iocage_cv);
7306		mutex_exit(&drmach_iocage_lock);
7307	}
7308
7309	DRMACH_PR("waited %d out of %d tries for "
7310	    "drmach_cpu_shutdown_self on cpu%d",
7311	    drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7312
7313	/*
7314	 * Do this here instead of drmach_cpu_shutdown_self() to
7315	 * avoid an assertion failure panic in turnstile.c.
7316	 */
7317	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7318
7319	return (0);
7320}
7321
7322void
7323drmach_iocage_mem_scrub(uint64_t nbytes)
7324{
7325	extern uint32_t drmach_bc_bzero(void*, size_t);
7326	uint32_t	rv;
7327
7328	ASSERT(MUTEX_HELD(&cpu_lock));
7329
7330	affinity_set(CPU->cpu_id);
7331
7332	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7333	if (rv != 0) {
7334		DRMACH_PR(
7335		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7336		rv = drmach_bc_bzero(drmach_iocage_vaddr, drmach_iocage_size);
7337		if (rv != 0)
7338			cmn_err(CE_PANIC,
7339			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7340			    rv);
7341	}
7342
7343	cpu_flush_ecache();
7344
7345	affinity_clear();
7346}
7347
7348#define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7349	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7350
7351static sbd_error_t *
7352drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7353{
7354	pfn_t		basepfn;
7355	pgcnt_t		npages;
7356	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7357	uint64_t	drmach_iocage_paddr_mbytes;
7358
7359	ASSERT(drmach_iocage_paddr != -1);
7360
7361	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7362	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7363
7364	(void) memscrub_delete_span(basepfn, npages);
7365
7366	mutex_enter(&cpu_lock);
7367	drmach_iocage_mem_scrub(drmach_iocage_size);
7368	mutex_exit(&cpu_lock);
7369
7370	/*
7371	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7372	 * and in megabyte units.
7373	 * The size of the cage is also in megabyte units.
7374	 */
7375	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7376
7377	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7378
7379	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7380	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7381	tbrq->memlen = drmach_iocage_size / 0x100000;
7382
7383	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7384	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7385	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7386
7387	return (NULL);
7388}
7389
7390static sbd_error_t *
7391drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7392{
7393	_NOTE(ARGUNUSED(tbr))
7394
7395	pfn_t		basepfn;
7396	pgcnt_t		npages;
7397	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7398
7399	ASSERT(drmach_iocage_paddr != -1);
7400
7401	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7402	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7403
7404	(void) memscrub_add_span(basepfn, npages);
7405
7406	mutex_enter(&cpu_lock);
7407	mutex_enter(&drmach_iocage_lock);
7408	drmach_iocage_mem_scrub(drmach_iocage_size);
7409	drmach_iocage_is_busy = 0;
7410	cv_signal(&drmach_iocage_cv);
7411	mutex_exit(&drmach_iocage_lock);
7412	mutex_exit(&cpu_lock);
7413
7414	return (NULL);
7415}
7416
7417static int
7418drmach_cpu_intr_disable(cpu_t *cp)
7419{
7420	if (cpu_intr_disable(cp) != 0)
7421		return (-1);
7422	return (0);
7423}
7424
7425static int
7426drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7427{
7428	struct cpu	*cp;
7429	processorid_t	cpuid;
7430	static char	*fn = "drmach_iocage_cpu_acquire";
7431	sbd_error_t 	*err;
7432	int 		impl;
7433
7434	ASSERT(DRMACH_IS_CPU_ID(dp));
7435	ASSERT(MUTEX_HELD(&cpu_lock));
7436
7437	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7438
7439	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7440
7441	if (dp->busy)
7442		return (-1);
7443
7444	if ((cp = cpu_get(cpuid)) == NULL) {
7445		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7446		return (-1);
7447	}
7448
7449	if (!CPU_ACTIVE(cp)) {
7450		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7451		return (-1);
7452	}
7453
7454	/*
7455	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7456	 * can fail to receive an XIR. To workaround this issue until a hardware
7457	 * fix is implemented, we will exclude the selection of these CPUs.
7458	 *
7459	 * Once a fix is implemented in hardware, this code should be updated
7460	 * to allow Jaguar CPUs that have the fix to be used. However, support
7461	 * must be retained to skip revisions that do not have this fix.
7462	 */
7463
7464	err = drmach_cpu_get_impl(dp, &impl);
7465	if (err) {
7466		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7467		sbd_err_clear(&err);
7468		return (-1);
7469	}
7470
7471	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7472	    drmach_iocage_exclude_jaguar_port_zero) {
7473		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7474		    fn, cpuid);
7475		return (-1);
7476	}
7477
7478	ASSERT(oflags);
7479	*oflags = cp->cpu_flags;
7480
7481	if (cpu_offline(cp, 0)) {
7482		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7483		return (-1);
7484	}
7485
7486	if (cpu_poweroff(cp)) {
7487		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7488		if (cpu_online(cp)) {
7489			cmn_err(CE_WARN, "failed to online CPU id %d "
7490			    "during I/O cage test selection", cpuid);
7491		}
7492		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7493		    drmach_cpu_intr_disable(cp) != 0) {
7494			cmn_err(CE_WARN, "failed to restore CPU id %d "
7495			    "no-intr during I/O cage test selection", cpuid);
7496		}
7497		return (-1);
7498	}
7499
7500	if (cpu_unconfigure(cpuid)) {
7501		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7502		    cpuid);
7503		(void) cpu_configure(cpuid);
7504		if ((cp = cpu_get(cpuid)) == NULL) {
7505			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7506			    "during I/O cage test selection", cpuid);
7507			dp->busy = 1;
7508			return (-1);
7509		}
7510		if (cpu_poweron(cp) || cpu_online(cp)) {
7511			cmn_err(CE_WARN, "failed to %s CPU id %d "
7512			    "during I/O cage test selection",
7513			    cpu_is_poweredoff(cp) ?
7514			    "poweron" : "online", cpuid);
7515		}
7516		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7517		    drmach_cpu_intr_disable(cp) != 0) {
7518			cmn_err(CE_WARN, "failed to restore CPU id %d "
7519			    "no-intr during I/O cage test selection", cpuid);
7520		}
7521		return (-1);
7522	}
7523
7524	dp->busy = 1;
7525
7526	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7527
7528	return (0);
7529}
7530
7531/*
7532 * Attempt to acquire all the CPU devices passed in. It is
7533 * assumed that all the devices in the list are the cores of
7534 * a single CMP device. Non CMP devices can be handled as a
7535 * single core CMP by passing in a one element list.
7536 *
7537 * Success is only returned if *all* the devices in the list
7538 * can be acquired. In the failure case, none of the devices
7539 * in the list will be held as acquired.
7540 */
7541static int
7542drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7543{
7544	int	curr;
7545	int	i;
7546	int	rv = 0;
7547
7548	ASSERT((dpp != NULL) && (*dpp != NULL));
7549
7550	/*
7551	 * Walk the list of CPU devices (cores of a CMP)
7552	 * and attempt to acquire them. Bail out if an
7553	 * error is encountered.
7554	 */
7555	for (curr = 0; curr < MAX_CORES_PER_CMP; curr++) {
7556
7557		/* check for the end of the list */
7558		if (dpp[curr] == NULL) {
7559			break;
7560		}
7561
7562		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7563		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7564
7565		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7566		if (rv != 0) {
7567			break;
7568		}
7569	}
7570
7571	/*
7572	 * Check for an error.
7573	 */
7574	if (rv != 0) {
7575		/*
7576		 * Make a best effort attempt to return any cores
7577		 * that were already acquired before the error was
7578		 * encountered.
7579		 */
7580		for (i = 0; i < curr; i++) {
7581			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7582		}
7583	}
7584
7585	return (rv);
7586}
7587
7588static int
7589drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7590{
7591	processorid_t	cpuid;
7592	struct cpu	*cp;
7593	int		rv = 0;
7594	static char	*fn = "drmach_iocage_cpu_return";
7595
7596	ASSERT(DRMACH_IS_CPU_ID(dp));
7597	ASSERT(MUTEX_HELD(&cpu_lock));
7598
7599	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7600
7601	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7602
7603	if (cpu_configure(cpuid)) {
7604		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7605		    "after I/O cage test", cpuid);
7606		/*
7607		 * The component was never set to unconfigured during the IO
7608		 * cage test, so we need to leave marked as busy to prevent
7609		 * further DR operations involving this component.
7610		 */
7611		return (-1);
7612	}
7613
7614	if ((cp = cpu_get(cpuid)) == NULL) {
7615		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7616		    "I/O cage test", cpuid);
7617		dp->busy = 0;
7618		return (-1);
7619	}
7620
7621	if (cpu_poweron(cp) || cpu_online(cp)) {
7622		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7623		    "cage test", cpu_is_poweredoff(cp) ?
7624		    "poweron" : "online", cpuid);
7625		rv = -1;
7626	}
7627
7628	/*
7629	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7630	 * P_NOINTR. Need to return to previous user-visible state.
7631	 */
7632	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7633	    drmach_cpu_intr_disable(cp) != 0) {
7634		cmn_err(CE_WARN, "failed to restore CPU id %d "
7635		    "no-intr after I/O cage test", cpuid);
7636		rv = -1;
7637	}
7638
7639	dp->busy = 0;
7640
7641	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7642
7643	return (rv);
7644}
7645
7646static sbd_error_t *
7647drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7648    cpu_flag_t *oflags)
7649{
7650	drmach_board_t	*bp;
7651	int		b_rv;
7652	int		b_idx;
7653	drmachid_t	b_id;
7654	int		found;
7655
7656	mutex_enter(&cpu_lock);
7657
7658	ASSERT(drmach_boards != NULL);
7659
7660	found = 0;
7661
7662	/*
7663	 * Walk the board list.
7664	 */
7665	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7666
7667	while (b_rv == 0) {
7668
7669		int		d_rv;
7670		int		d_idx;
7671		drmachid_t	d_id;
7672
7673		bp = b_id;
7674
7675		if (bp->connected == 0 || bp->devices == NULL) {
7676			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7677			continue;
7678		}
7679
7680		/* An AXQ restriction disqualifies MCPU's as candidates. */
7681		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7682			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7683			continue;
7684		}
7685
7686		/*
7687		 * Walk the device list of this board.
7688		 */
7689		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7690
7691		while (d_rv == 0) {
7692
7693			drmach_device_t	*ndp;
7694
7695			/* only interested in CPU devices */
7696			if (!DRMACH_IS_CPU_ID(d_id)) {
7697				d_rv = drmach_array_next(bp->devices, &d_idx,
7698				    &d_id);
7699				continue;
7700			}
7701
7702			/*
7703			 * The following code assumes two properties
7704			 * of a CMP device:
7705			 *
7706			 *   1. All cores of a CMP are grouped together
7707			 *	in the device list.
7708			 *
7709			 *   2. There will only be a maximum of two cores
7710			 *	present in the CMP.
7711			 *
7712			 * If either of these two properties change,
7713			 * this code will have to be revisited.
7714			 */
7715
7716			dpp[0] = d_id;
7717			dpp[1] = NULL;
7718
7719			/*
7720			 * Get the next device. It may or may not be used.
7721			 */
7722			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7723			ndp = d_id;
7724
7725			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7726				/*
7727				 * The second device is only interesting for
7728				 * this pass if it has the same portid as the
7729				 * first device. This implies that both are
7730				 * cores of the same CMP.
7731				 */
7732				if (dpp[0]->portid == ndp->portid) {
7733					dpp[1] = d_id;
7734				}
7735			}
7736
7737			/*
7738			 * Attempt to acquire all cores of the CMP.
7739			 */
7740			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7741				found = 1;
7742				break;
7743			}
7744
7745			/*
7746			 * Check if the search for the second core was
7747			 * successful. If not, the next iteration should
7748			 * use that device.
7749			 */
7750			if (dpp[1] == NULL) {
7751				continue;
7752			}
7753
7754			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7755		}
7756
7757		if (found)
7758			break;
7759
7760		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7761	}
7762
7763	mutex_exit(&cpu_lock);
7764
7765	if (!found) {
7766		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7767	}
7768
7769	tbrq->cpu_portid = (*dpp)->portid;
7770
7771	return (NULL);
7772}
7773
7774/*
7775 * Setup an iocage by acquiring a cpu and memory.
7776 */
7777static sbd_error_t *
7778drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7779    cpu_flag_t *oflags)
7780{
7781	sbd_error_t *err;
7782
7783	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7784	if (!err) {
7785		mutex_enter(&drmach_iocage_lock);
7786		while (drmach_iocage_is_busy)
7787			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7788		drmach_iocage_is_busy = 1;
7789		mutex_exit(&drmach_iocage_lock);
7790		err = drmach_iocage_mem_get(tbrq);
7791		if (err) {
7792			mutex_enter(&drmach_iocage_lock);
7793			drmach_iocage_is_busy = 0;
7794			cv_signal(&drmach_iocage_cv);
7795			mutex_exit(&drmach_iocage_lock);
7796		}
7797	}
7798	return (err);
7799}
7800
7801#define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7802#define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7803#define	DRMACH_S1P_SAMPLE_MAX		2
7804
7805typedef enum {
7806	DRMACH_POST_SUSPEND = 0,
7807	DRMACH_PRE_RESUME
7808} drmach_sr_iter_t;
7809
7810typedef struct {
7811	dev_info_t	*dip;
7812	uint32_t	portid;
7813	uint32_t	pcr_sel_save;
7814	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7815	uint64_t	reg_basepa;
7816} drmach_s1p_axq_t;
7817
7818typedef struct {
7819	dev_info_t		*dip;
7820	uint32_t		portid;
7821	uint64_t		csr_basepa;
7822	struct {
7823		uint64_t 	slot_intr_state_diag;
7824		uint64_t 	obio_intr_state_diag;
7825		uint_t		nmap_regs;
7826		uint64_t	*intr_map_regs;
7827	} regs[DRMACH_S1P_SAMPLE_MAX];
7828} drmach_s1p_pci_t;
7829
7830typedef struct {
7831	uint64_t		csr_basepa;
7832	struct {
7833		uint64_t	csr;
7834		uint64_t	errctrl;
7835		uint64_t	errlog;
7836	} regs[DRMACH_S1P_SAMPLE_MAX];
7837	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7838} drmach_s1p_schizo_t;
7839
7840typedef struct {
7841	drmach_s1p_axq_t	axq;
7842	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7843} drmach_slot1_pause_t;
7844
7845/*
7846 * Table of saved state for paused slot1 devices.
7847 */
7848static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7849static int drmach_slot1_pause_init = 1;
7850
7851#ifdef DEBUG
7852int drmach_slot1_pause_debug = 1;
7853#else
7854int drmach_slot1_pause_debug = 0;
7855#endif /* DEBUG */
7856
7857static int
7858drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7859{
7860	int		portid, exp, slot, i;
7861	drmach_reg_t	regs[2];
7862	int		reglen = sizeof (regs);
7863
7864	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7865	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7866		return (0);
7867	}
7868
7869	exp = (portid >> 5) & 0x1f;
7870	slot = portid & 0x1;
7871
7872	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7873	    strlen(DRMACH_AXQ_NAMEPROP))) {
7874		return (0);
7875	}
7876
7877	mutex_enter(&cpu_lock);
7878	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7879		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7880			/* maxcat cpu present */
7881			mutex_exit(&cpu_lock);
7882			return (0);
7883		}
7884	}
7885	mutex_exit(&cpu_lock);
7886
7887	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7888	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7889		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7890		    "axq dip=%p\n", (void *)dip);
7891		return (0);
7892	}
7893
7894	ASSERT(id && reg);
7895	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7896	*reg |= (uint64_t)regs[0].reg_addr_lo;
7897	*id = portid;
7898
7899	return (1);
7900}
7901
7902/*
7903 * Allocate an entry in the slot1_paused state table.
7904 */
7905static void
7906drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7907    uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7908{
7909	int	axq_exp;
7910	drmach_slot1_pause_t *slot1;
7911
7912	axq_exp = (axq_portid >> 5) & 0x1f;
7913
7914	ASSERT(axq_portid & 0x1);
7915	ASSERT(slot1_paused[axq_exp] == NULL);
7916	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
7917	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
7918
7919	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
7920
7921	/*
7922	 * XXX This dip should really be held (via ndi_hold_devi())
7923	 * before saving it in the axq pause structure. However that
7924	 * would prevent DR as the pause data structures persist until
7925	 * the next suspend. drmach code should be modified to free the
7926	 * the slot 1 pause data structures for a boardset when its
7927	 * slot 1 board is DRed out. The dip can then be released via
7928	 * ndi_rele_devi() when the pause data structure is freed
7929	 * allowing DR to proceed. Until this change is made, drmach
7930	 * code should be careful about dereferencing the saved dip
7931	 * as it may no longer exist.
7932	 */
7933	slot1->axq.dip = axq_dip;
7934	slot1->axq.portid = axq_portid;
7935	slot1->axq.reg_basepa = reg;
7936	slot1_paused[axq_exp] = slot1;
7937}
7938
7939static void
7940drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
7941{
7942	int	i;
7943
7944	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
7945		if (pci->regs[i].intr_map_regs != NULL) {
7946			ASSERT(pci->regs[i].nmap_regs > 0);
7947			kmem_free(pci->regs[i].intr_map_regs,
7948			    pci->regs[i].nmap_regs * sizeof (uint64_t));
7949		}
7950	}
7951}
7952
7953static void
7954drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
7955{
7956	int	i, j, k;
7957	drmach_slot1_pause_t *slot1;
7958
7959	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
7960		if ((slot1 = slot1_paused[i]) == NULL)
7961			continue;
7962
7963		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
7964			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
7965				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
7966
7967		kmem_free(slot1, sizeof (*slot1));
7968		slot1_paused[i] = NULL;
7969	}
7970}
7971
7972/*
7973 * Tree walk callback routine. If dip represents a Schizo PCI leaf,
7974 * fill in the appropriate info in the slot1_paused state table.
7975 */
7976static int
7977drmach_find_slot1_io(dev_info_t *dip, void *arg)
7978{
7979	int		portid, exp, ioc_unum, leaf_unum;
7980	char		buf[OBP_MAXDRVNAME];
7981	int		buflen = sizeof (buf);
7982	drmach_reg_t	regs[3];
7983	int		reglen = sizeof (regs);
7984	uint32_t	leaf_offset;
7985	uint64_t	schizo_csr_pa, pci_csr_pa;
7986	drmach_s1p_pci_t *pci;
7987	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
7988
7989	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7990	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
7991	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
7992		return (DDI_WALK_CONTINUE);
7993	}
7994
7995	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7996	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7997		return (DDI_WALK_CONTINUE);
7998	}
7999
8000	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8001	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
8002		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
8003		    "dip=%p\n", (void *)dip);
8004		return (DDI_WALK_CONTINUE);
8005	}
8006
8007	exp = portid >> 5;
8008	ioc_unum = portid & 0x1;
8009	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8010	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8011	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8012	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8013	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8014
8015	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8016	ASSERT(slot1_paused[exp] != NULL);
8017	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8018	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8019	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8020
8021	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8022	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8023	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8024
8025	/*
8026	 * XXX This dip should really be held (via ndi_hold_devi())
8027	 * before saving it in the pci pause structure. However that
8028	 * would prevent DR as the pause data structures persist until
8029	 * the next suspend. drmach code should be modified to free the
8030	 * the slot 1 pause data structures for a boardset when its
8031	 * slot 1 board is DRed out. The dip can then be released via
8032	 * ndi_rele_devi() when the pause data structure is freed
8033	 * allowing DR to proceed. Until this change is made, drmach
8034	 * code should be careful about dereferencing the saved dip as
8035	 * it may no longer exist.
8036	 */
8037	pci->dip = dip;
8038	pci->portid = portid;
8039	pci->csr_basepa = pci_csr_pa;
8040
8041	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8042	    buf, portid, (void *)dip);
8043
8044	return (DDI_WALK_PRUNECHILD);
8045}
8046
8047static void
8048drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8049{
8050	/*
8051	 * Root node doesn't have to be held
8052	 */
8053	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8054	    (void *)slot1_paused);
8055}
8056
8057/*
8058 * Save the interrupt mapping registers for each non-idle interrupt
8059 * represented by the bit pairs in the saved interrupt state
8060 * diagnostic registers for this PCI leaf.
8061 */
8062static void
8063drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8064{
8065	int	 i, cnt, ino;
8066	uint64_t reg;
8067	char	 *dname;
8068	uchar_t	 Xmits;
8069
8070	dname = ddi_binding_name(pci->dip);
8071	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8072
8073	/*
8074	 * 1st pass allocates, 2nd pass populates.
8075	 */
8076	for (i = 0; i < 2; i++) {
8077		cnt = ino = 0;
8078
8079		/*
8080		 * PCI slot interrupts
8081		 */
8082		reg = pci->regs[iter].slot_intr_state_diag;
8083		while (reg) {
8084			/*
8085			 * Xmits Interrupt Number Offset(ino) Assignments
8086			 *   00-17 PCI Slot Interrupts
8087			 *   18-1f Not Used
8088			 */
8089			if ((Xmits) && (ino > 0x17))
8090				break;
8091			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8092			    COMMON_CLEAR_INTR_REG_IDLE) {
8093				if (i) {
8094					pci->regs[iter].intr_map_regs[cnt] =
8095					    lddphysio(pci->csr_basepa +
8096					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8097					    ino * sizeof (reg));
8098				}
8099				++cnt;
8100			}
8101			++ino;
8102			reg >>= 2;
8103		}
8104
8105		/*
8106		 * Xmits Interrupt Number Offset(ino) Assignments
8107		 *   20-2f Not Used
8108		 *   30-37 Internal interrupts
8109		 *   38-3e Not Used
8110		 */
8111		ino = (Xmits)  ?  0x30 : 0x20;
8112
8113		/*
8114		 * OBIO and internal schizo interrupts
8115		 * Each PCI leaf has a set of mapping registers for all
8116		 * possible interrupt sources except the NewLink interrupts.
8117		 */
8118		reg = pci->regs[iter].obio_intr_state_diag;
8119		while (reg && ino <= 0x38) {
8120			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8121			    COMMON_CLEAR_INTR_REG_IDLE) {
8122				if (i) {
8123					pci->regs[iter].intr_map_regs[cnt] =
8124					    lddphysio(pci->csr_basepa +
8125					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8126					    ino * sizeof (reg));
8127				}
8128				++cnt;
8129			}
8130			++ino;
8131			reg >>= 2;
8132		}
8133
8134		if (!i) {
8135			pci->regs[iter].nmap_regs = cnt;
8136			pci->regs[iter].intr_map_regs =
8137			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8138		}
8139	}
8140}
8141
8142static void
8143drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8144{
8145	uint32_t	reg;
8146
8147	if (axq->reg_basepa == 0x0UL)
8148		return;
8149
8150	if (iter == DRMACH_POST_SUSPEND) {
8151		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8152		    AXQ_SLOT1_PERFCNT_SEL);
8153		/*
8154		 * Select l2_io_queue counter by writing L2_IO_Q mux
8155		 * input to bits 0-6 of perf cntr select reg.
8156		 */
8157		reg = axq->pcr_sel_save;
8158		reg &= ~AXQ_PIC_CLEAR_MASK;
8159		reg |= L2_IO_Q;
8160
8161		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8162	}
8163
8164	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8165
8166	if (iter == DRMACH_PRE_RESUME) {
8167		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8168		    axq->pcr_sel_save);
8169	}
8170
8171	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8172	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8173}
8174
8175static void
8176drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8177{
8178	int	i;
8179	drmach_s1p_pci_t *pci;
8180
8181	if (schizo->csr_basepa == 0x0UL)
8182		return;
8183
8184	schizo->regs[iter].csr =
8185	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8186	schizo->regs[iter].errctrl =
8187	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8188	schizo->regs[iter].errlog =
8189	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8190
8191	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8192		pci = &schizo->pci[i];
8193		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8194			pci->regs[iter].slot_intr_state_diag =
8195			    lddphysio(pci->csr_basepa +
8196			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8197
8198			pci->regs[iter].obio_intr_state_diag =
8199			    lddphysio(pci->csr_basepa +
8200			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8201
8202			drmach_s1p_intr_map_reg_save(pci, iter);
8203		}
8204	}
8205}
8206
8207/*
8208 * Called post-suspend and pre-resume to snapshot the suspend state
8209 * of slot1 AXQs and Schizos.
8210 */
8211static void
8212drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8213    drmach_sr_iter_t iter)
8214{
8215	int	i, j;
8216	drmach_slot1_pause_t *slot1;
8217
8218	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8219		if ((slot1 = slot1_paused[i]) == NULL)
8220			continue;
8221
8222		drmach_s1p_axq_update(&slot1->axq, iter);
8223		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8224			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8225	}
8226}
8227
8228/*
8229 * Starcat hPCI Schizo devices.
8230 *
8231 * The name field is overloaded. NULL means the slot (interrupt concentrator
8232 * bus) is not used. intr_mask is a bit mask representing the 4 possible
8233 * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8234 */
8235static struct {
8236	char	*name;
8237	uint8_t	intr_mask;
8238} drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8239	/* Schizo 0 */		/* Schizo 1 */
8240	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8241	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8242	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8243	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8244	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8245	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8246	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8247	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8248};
8249
8250/*
8251 * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8252 * "Interrupt Registers", Table 22-69, page 306.
8253 */
8254static char *
8255drmach_schz_internal_ino2str(int ino)
8256{
8257	int	intr;
8258
8259	ASSERT(ino >= 0x30 && ino <= 0x37);
8260
8261	intr = ino & 0x7;
8262	switch (intr) {
8263		case (0x0):	return ("Uncorrectable ECC error");
8264		case (0x1):	return ("Correctable ECC error");
8265		case (0x2):	return ("PCI Bus A Error");
8266		case (0x3):	return ("PCI Bus B Error");
8267		case (0x4):	return ("Safari Bus Error");
8268		default:	return ("Reserved");
8269	}
8270}
8271
8272#define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8273
8274static void
8275drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8276    int ino, drmach_sr_iter_t iter)
8277{
8278	uint8_t		intr_mask;
8279	char		*slot_devname;
8280	char		namebuf[OBP_MAXDRVNAME];
8281	int		slot, intr_line, slot_valid, intr_valid;
8282
8283	ASSERT(ino >= 0 && ino <= 0x1f);
8284	ASSERT((pci->regs[iter].slot_intr_state_diag &
8285	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8286	    COMMON_CLEAR_INTR_REG_IDLE);
8287
8288	slot = (ino >> 2) & 0x7;
8289	intr_line = ino & 0x3;
8290
8291	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8292	slot_valid = (slot_devname == NULL) ? 0 : 1;
8293	if (!slot_valid) {
8294		(void) snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)",
8295		    slot);
8296		slot_devname = namebuf;
8297	}
8298
8299	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8300	intr_valid = (1 << intr_line) & intr_mask;
8301
8302	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8303	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8304	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8305}
8306
8307/*
8308 * Log interrupt source device info for all valid, pending interrupts
8309 * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8310 * error in the error ctrl reg.
8311 */
8312static void
8313drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8314    int unum, drmach_sr_iter_t iter)
8315{
8316	uint64_t	reg;
8317	int		i, n, ino;
8318	drmach_s1p_pci_t *pci;
8319
8320	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8321	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8322
8323	/*
8324	 * Check the saved interrupt mapping registers. If interrupt is valid,
8325	 * map the ino to the Schizo source device and check that the pci
8326	 * slot and interrupt line are valid.
8327	 */
8328	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8329		pci = &schizo->pci[i];
8330		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8331			reg = pci->regs[iter].intr_map_regs[n];
8332			if (reg & COMMON_INTR_MAP_REG_VALID) {
8333				ino = reg & COMMON_INTR_MAP_REG_INO;
8334
8335				if (ino <= 0x1f) {
8336					/*
8337					 * PCI slot interrupt
8338					 */
8339					drmach_s1p_decode_slot_intr(exp, unum,
8340					    pci, ino, iter);
8341				} else if (ino <= 0x2f) {
8342					/*
8343					 * OBIO interrupt
8344					 */
8345					prom_printf("IO%d/P%d OBIO interrupt: "
8346					    "ino=0x%x\n", exp, unum, ino);
8347				} else if (ino <= 0x37) {
8348					/*
8349					 * Internal interrupt
8350					 */
8351					prom_printf("IO%d/P%d Internal "
8352					    "interrupt: ino=0x%x (%s)\n",
8353					    exp, unum, ino,
8354					    drmach_schz_internal_ino2str(ino));
8355				} else {
8356					/*
8357					 * NewLink interrupt
8358					 */
8359					prom_printf("IO%d/P%d NewLink "
8360					    "interrupt: ino=0x%x\n", exp,
8361					    unum, ino);
8362				}
8363
8364				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8365				    "exp=%d, schizo=%d, pci_leaf=%c, "
8366				    "ino=0x%x, intr_map_reg=0x%lx\n",
8367				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8368			}
8369		}
8370	}
8371}
8372
8373/*
8374 * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8375 * "Safari Error Control/Log Registers", Table 22-11, page 248.
8376 */
8377#define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8378
8379/*
8380 * Check for possible error indicators prior to resuming the
8381 * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8382 */
8383static void
8384drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8385    drmach_sr_iter_t iter)
8386{
8387	int	i, j;
8388	int 	errflag = 0;
8389	drmach_slot1_pause_t *slot1;
8390
8391	/*
8392	 * Check for logged schizo bus error and pending interrupts.
8393	 */
8394	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8395		if ((slot1 = slot1_paused[i]) == NULL)
8396			continue;
8397
8398		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8399			if (slot1->schizo[j].csr_basepa == 0x0UL)
8400				continue;
8401
8402			if (slot1->schizo[j].regs[iter].errlog &
8403			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8404				if (!errflag) {
8405					prom_printf("DR WARNING: interrupt "
8406					    "attempt detected during "
8407					    "copy-rename (%s):\n",
8408					    (iter == DRMACH_POST_SUSPEND) ?
8409					    "post suspend" : "pre resume");
8410					++errflag;
8411				}
8412				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8413				    i, j, iter);
8414			}
8415		}
8416	}
8417
8418	/*
8419	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8420	 */
8421	if (iter == DRMACH_PRE_RESUME) {
8422		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8423			if ((slot1 = slot1_paused[i]) == NULL)
8424				continue;
8425
8426			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8427			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8428				prom_printf("DR WARNING: IO transactions "
8429				    "detected on IO%d during copy-rename: "
8430				    "AXQ l2_io_q performance counter "
8431				    "start=%d, end=%d\n", i,
8432				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8433				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8434			}
8435		}
8436	}
8437}
8438
8439struct drmach_sr_list {
8440	dev_info_t		*dip;
8441	struct drmach_sr_list	*next;
8442	struct drmach_sr_list	*prev;
8443};
8444
8445static struct drmach_sr_ordered {
8446	char			*name;
8447	struct drmach_sr_list	*ring;
8448} drmach_sr_ordered[] = {
8449	{ "iosram",			NULL },
8450	{ "address-extender-queue",	NULL },
8451	{ NULL,				NULL }, /* terminator -- required */
8452};
8453
8454static void
8455drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8456{
8457	struct drmach_sr_list *np;
8458
8459	DRMACH_PR("drmach_sr_insert: adding dip %p\n", (void *)dip);
8460
8461	np = (struct drmach_sr_list *)kmem_alloc(
8462	    sizeof (struct drmach_sr_list), KM_SLEEP);
8463
8464	ndi_hold_devi(dip);
8465	np->dip = dip;
8466
8467	if (*lp == NULL) {
8468		/* establish list */
8469		*lp = np->next = np->prev = np;
8470	} else {
8471		/* place new node behind head node on ring list */
8472		np->prev = (*lp)->prev;
8473		np->next = *lp;
8474		np->prev->next = np;
8475		np->next->prev = np;
8476	}
8477}
8478
8479static void
8480drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8481{
8482	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", (void *)dip);
8483
8484	if (*lp) {
8485		struct drmach_sr_list *xp;
8486
8487		/* start search with mostly likely node */
8488		xp = (*lp)->prev;
8489		do {
8490			if (xp->dip == dip) {
8491				xp->prev->next = xp->next;
8492				xp->next->prev = xp->prev;
8493
8494				if (xp == *lp)
8495					*lp = xp->next;
8496				if (xp == *lp)
8497					*lp = NULL;
8498				xp->dip = NULL;
8499				ndi_rele_devi(dip);
8500				kmem_free(xp, sizeof (*xp));
8501
8502				DRMACH_PR("drmach_sr_delete:"
8503				    " disposed sr node for dip %p",
8504				    (void *)dip);
8505				return;
8506			}
8507
8508			DRMACH_PR("drmach_sr_delete: still searching\n");
8509
8510			xp = xp->prev;
8511		} while (xp != (*lp)->prev);
8512	}
8513
8514	/* every dip should be found during resume */
8515	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", (void *)dip);
8516}
8517
8518int
8519drmach_verify_sr(dev_info_t *dip, int sflag)
8520{
8521	int	rv;
8522	int	len;
8523	char    name[OBP_MAXDRVNAME];
8524
8525	if (drmach_slot1_pause_debug) {
8526		if (sflag && drmach_slot1_pause_init) {
8527			drmach_slot1_pause_free(drmach_slot1_paused);
8528			drmach_slot1_pause_init = 0;
8529		} else if (!sflag && !drmach_slot1_pause_init) {
8530			/* schedule init for next suspend */
8531			drmach_slot1_pause_init = 1;
8532		}
8533	}
8534
8535	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8536	    "name", &len);
8537	if (rv == DDI_PROP_SUCCESS) {
8538		int		portid;
8539		uint64_t	reg;
8540		struct drmach_sr_ordered *op;
8541
8542		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8543		    DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8544
8545		if (rv != DDI_PROP_SUCCESS)
8546			return (0);
8547
8548		if (drmach_slot1_pause_debug && sflag &&
8549		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8550			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8551			    drmach_slot1_paused);
8552		}
8553
8554		for (op = drmach_sr_ordered; op->name; op++) {
8555			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8556				if (sflag)
8557					drmach_sr_insert(&op->ring, dip);
8558				else
8559					drmach_sr_delete(&op->ring, dip);
8560				return (1);
8561			}
8562		}
8563	}
8564
8565	return (0);
8566}
8567
8568static void
8569drmach_sr_dip(dev_info_t *dip, int suspend)
8570{
8571	int	 rv;
8572	major_t	 maj;
8573	char	*name, *name_addr, *aka;
8574
8575	if ((name = ddi_get_name(dip)) == NULL)
8576		name = "<null name>";
8577	else if ((maj = ddi_name_to_major(name)) != -1)
8578		aka = ddi_major_to_name(maj);
8579	else
8580		aka = "<unknown>";
8581
8582	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8583		name_addr = "<null>";
8584
8585	prom_printf("\t%s %s@%s (aka %s)\n",
8586	    suspend ? "suspending" : "resuming",
8587	    name, name_addr, aka);
8588
8589	if (suspend) {
8590		rv = devi_detach(dip, DDI_SUSPEND);
8591	} else {
8592		rv = devi_attach(dip, DDI_RESUME);
8593	}
8594
8595	if (rv != DDI_SUCCESS) {
8596		prom_printf("\tFAILED to %s %s@%s\n",
8597		    suspend ? "suspend" : "resume",
8598		    name, name_addr);
8599	}
8600}
8601
8602void
8603drmach_suspend_last()
8604{
8605	struct drmach_sr_ordered *op;
8606
8607	if (drmach_slot1_pause_debug)
8608		drmach_slot1_pause_add_io(drmach_slot1_paused);
8609
8610	/*
8611	 * The ordering array declares the strict sequence in which
8612	 * the named drivers are to suspended. Each element in
8613	 * the array may have a double-linked ring list of driver
8614	 * instances (dip) in the order in which they were presented
8615	 * to drmach_verify_sr. If present, walk the list in the
8616	 * forward direction to suspend each instance.
8617	 */
8618	for (op = drmach_sr_ordered; op->name; op++) {
8619		if (op->ring) {
8620			struct drmach_sr_list *rp;
8621
8622			rp = op->ring;
8623			do {
8624				drmach_sr_dip(rp->dip, 1);
8625				rp = rp->next;
8626			} while (rp != op->ring);
8627		}
8628	}
8629
8630	if (drmach_slot1_pause_debug) {
8631		drmach_slot1_pause_update(drmach_slot1_paused,
8632		    DRMACH_POST_SUSPEND);
8633		drmach_slot1_pause_verify(drmach_slot1_paused,
8634		    DRMACH_POST_SUSPEND);
8635	}
8636}
8637
8638void
8639drmach_resume_first()
8640{
8641	struct drmach_sr_ordered *op = drmach_sr_ordered +
8642	    (sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8643
8644	if (drmach_slot1_pause_debug) {
8645		drmach_slot1_pause_update(drmach_slot1_paused,
8646		    DRMACH_PRE_RESUME);
8647		drmach_slot1_pause_verify(drmach_slot1_paused,
8648		    DRMACH_PRE_RESUME);
8649	}
8650
8651	op -= 1;	/* point at terminating element */
8652
8653	/*
8654	 * walk ordering array and rings backwards to resume dips
8655	 * in reverse order in which they were suspended
8656	 */
8657	while (--op >= drmach_sr_ordered) {
8658		if (op->ring) {
8659			struct drmach_sr_list *rp;
8660
8661			rp = op->ring->prev;
8662			do {
8663				drmach_sr_dip(rp->dip, 0);
8664				rp = rp->prev;
8665			} while (rp != op->ring->prev);
8666		}
8667	}
8668}
8669
8670/*
8671 * Log a DR sysevent.
8672 * Return value: 0 success, non-zero failure.
8673 */
8674int
8675drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8676{
8677	sysevent_t			*ev;
8678	sysevent_id_t			eid;
8679	int				rv, km_flag;
8680	sysevent_value_t		evnt_val;
8681	sysevent_attr_list_t		*evnt_attr_list = NULL;
8682	char				attach_pnt[MAXNAMELEN];
8683
8684	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8685	attach_pnt[0] = '\0';
8686	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8687		rv = -1;
8688		goto logexit;
8689	}
8690	if (verbose)
8691		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8692		    attach_pnt, hint, flag, verbose);
8693
8694	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8695	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8696		rv = -2;
8697		goto logexit;
8698	}
8699	evnt_val.value_type = SE_DATA_TYPE_STRING;
8700	evnt_val.value.sv_string = attach_pnt;
8701	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8702	    &evnt_val, km_flag)) != 0)
8703		goto logexit;
8704
8705	evnt_val.value_type = SE_DATA_TYPE_STRING;
8706	evnt_val.value.sv_string = hint;
8707	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8708	    &evnt_val, km_flag)) != 0) {
8709		sysevent_free_attr(evnt_attr_list);
8710		goto logexit;
8711	}
8712
8713	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8714
8715	/*
8716	 * Log the event but do not sleep waiting for its
8717	 * delivery. This provides insulation from syseventd.
8718	 */
8719	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8720
8721logexit:
8722	if (ev)
8723		sysevent_free(ev);
8724	if ((rv != 0) && verbose)
8725		cmn_err(CE_WARN,
8726		    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8727		    rv, attach_pnt, hint);
8728
8729	return (rv);
8730}
8731
8732/*
8733 * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8734 * Only the valid entries are modified, so the array should be zeroed out
8735 * initially.
8736 */
8737static void
8738drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8739	int	i;
8740	char	c;
8741
8742	ASSERT(mutex_owned(&drmach_slice_table_lock));
8743
8744	for (i = 0; i < AXQ_MAX_EXP; i++) {
8745		c = drmach_slice_table[i];
8746
8747		if (c & 0x20) {
8748			slice_arr[i].valid = 1;
8749			slice_arr[i].slice = c & 0x1f;
8750		}
8751	}
8752}
8753
8754/*
8755 * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8756 * Only the valid entries are modified, so the array should be zeroed out
8757 * initially.
8758 */
8759static void
8760drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8761	int		rv, exp, mcnum, bank;
8762	uint64_t	madr;
8763	drmachid_t	id;
8764	drmach_board_t	*bp;
8765	drmach_mem_t	*mp;
8766	dr_memregs_t	*memregs;
8767
8768	/* CONSTCOND */
8769	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8770
8771	for (exp = 0; exp < 18; exp++) {
8772		rv = drmach_array_get(drmach_boards,
8773		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8774		ASSERT(rv == 0);	/* should never be out of bounds */
8775		if (id == NULL) {
8776			continue;
8777		}
8778
8779		memregs = &regs_arr[exp];
8780		bp = (drmach_board_t *)id;
8781		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8782			mcnum = mp->dev.portid & 0x3;
8783			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8784				drmach_mem_read_madr(mp, bank, &madr);
8785				if (madr & DRMACH_MC_VALID_MASK) {
8786					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8787					    exp, mcnum, bank, madr);
8788					memregs->madr[mcnum][bank].hi =
8789					    DRMACH_U64_TO_MCREGHI(madr);
8790					memregs->madr[mcnum][bank].lo =
8791					    DRMACH_U64_TO_MCREGLO(madr);
8792				}
8793			}
8794		}
8795	}
8796}
8797
8798/*
8799 * Do not allow physical address range modification if either board on this
8800 * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8801 *
8802 * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8803 * install the cache line as owned/dirty as a result of the RTSR transaction.
8804 * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8805 * list before the rename after flushing local caches.  When copy-rename
8806 * requires changing the physical address ranges (i.e. smaller memory target),
8807 * the bus sync list contains physical addresses that will not exist after the
8808 * rename.  If these cache lines are owned due to a RTSR, a system error can
8809 * occur following the rename when these cache lines are evicted and a writeback
8810 * is attempted.
8811 *
8812 * Incoming parameter represents either the copy-rename source or a candidate
8813 * target memory board.  On Starcat, only slot0 boards may have memory.
8814 */
8815int
8816drmach_allow_memrange_modify(drmachid_t s0id)
8817{
8818	drmach_board_t	*s0bp, *s1bp;
8819	drmachid_t	s1id;
8820	int		rv;
8821
8822	s0bp = s0id;
8823
8824	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8825	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8826
8827	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8828		/*
8829		 * This is reason enough to fail the request, no need
8830		 * to check the device list for cpus.
8831		 */
8832		return (0);
8833	}
8834
8835	/*
8836	 * Check for MCPU board on the same expander.
8837	 *
8838	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8839	 * types, as it is derived at from the POST gdcd board flag
8840	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8841	 * ignored) for boards with no processors.  Since NULL proc LPA
8842	 * applies only to processors, we walk the devices array to detect
8843	 * MCPUs.
8844	 */
8845	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8846	s1bp = s1id;
8847	if (rv == 0 && s1bp != NULL) {
8848
8849		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8850		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8851		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8852		    DRMACH_BNUM2EXP(s1bp->bnum));
8853
8854		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8855		    s1bp->devices != NULL) {
8856			int		d_idx;
8857			drmachid_t	d_id;
8858
8859			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8860			while (rv == 0) {
8861				if (DRMACH_IS_CPU_ID(d_id)) {
8862					/*
8863					 * Fail MCPU in NULL LPA mode.
8864					 */
8865					return (0);
8866				}
8867
8868				rv = drmach_array_next(s1bp->devices, &d_idx,
8869				    &d_id);
8870			}
8871		}
8872	}
8873
8874	return (1);
8875}
8876