1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * A CPR derivative specifically for starfire/starcat
28 * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
29 */
30
31#include <sys/types.h>
32#include <sys/systm.h>
33#include <sys/machparam.h>
34#include <sys/machsystm.h>
35#include <sys/ddi.h>
36#define	SUNDDI_IMPL
37#include <sys/sunddi.h>
38#include <sys/sunndi.h>
39#include <sys/devctl.h>
40#include <sys/time.h>
41#include <sys/kmem.h>
42#include <nfs/lm.h>
43#include <sys/ddi_impldefs.h>
44#include <sys/ndi_impldefs.h>
45#include <sys/obpdefs.h>
46#include <sys/cmn_err.h>
47#include <sys/debug.h>
48#include <sys/errno.h>
49#include <sys/callb.h>
50#include <sys/clock.h>
51#include <sys/x_call.h>
52#include <sys/cpuvar.h>
53#include <sys/epm.h>
54#include <sys/vfs.h>
55#include <sys/promif.h>
56#include <sys/conf.h>
57#include <sys/cyclic.h>
58
59#include <sys/dr.h>
60#include <sys/dr_util.h>
61
62extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
63extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
64extern int	is_pseudo_device(dev_info_t *dip);
65
66extern kmutex_t	cpu_lock;
67extern dr_unsafe_devs_t dr_unsafe_devs;
68
69static int		dr_is_real_device(dev_info_t *dip);
70static int		dr_is_unsafe_major(major_t major);
71static int		dr_bypass_device(char *dname);
72static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
73static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
74				char *alias);
75static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
76				int majors);
77static int		dr_add_int(uint64_t *arr, int idx, int len,
78				uint64_t val);
79
80int dr_pt_test_suspend(dr_handle_t *hp);
81
82/*
83 * dr_quiesce.c interface
84 * NOTE: states used internally by dr_suspend and dr_resume
85 */
86typedef enum dr_suspend_state {
87	DR_SRSTATE_BEGIN = 0,
88	DR_SRSTATE_USER,
89	DR_SRSTATE_DRIVER,
90	DR_SRSTATE_FULL
91} suspend_state_t;
92
93struct dr_sr_handle {
94	dr_handle_t		*sr_dr_handlep;
95	dev_info_t		*sr_failed_dip;
96	suspend_state_t		sr_suspend_state;
97	uint_t			sr_flags;
98	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
99	int			sr_err_idx;
100};
101
102#define	SR_FLAG_WATCHDOG	0x1
103
104/*
105 * XXX
106 * This hack will go away before RTI.  Just for testing.
107 * List of drivers to bypass when performing a suspend.
108 */
109static char *dr_bypass_list[] = {
110	""
111};
112
113
114#define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
115
116/*
117 * dr_skip_user_threads is used to control if user threads should
118 * be suspended.  If dr_skip_user_threads is true, the rest of the
119 * flags are not used; if it is false, dr_check_user_stop_result
120 * will be used to control whether or not we need to check suspend
121 * result, and dr_allow_blocked_threads will be used to control
122 * whether or not we allow suspend to continue if there are blocked
123 * threads.  We allow all combinations of dr_check_user_stop_result
124 * and dr_allow_block_threads, even though it might not make much
125 * sense to not allow block threads when we don't even check stop
126 * result.
127 */
128static int	dr_skip_user_threads = 0;	/* default to FALSE */
129static int	dr_check_user_stop_result = 1;	/* default to TRUE */
130static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
131
132#define	DR_CPU_LOOP_MSEC	1000
133
134static void
135dr_stop_intr(void)
136{
137	ASSERT(MUTEX_HELD(&cpu_lock));
138
139	kpreempt_disable();
140	cyclic_suspend();
141}
142
143static void
144dr_enable_intr(void)
145{
146	ASSERT(MUTEX_HELD(&cpu_lock));
147
148	cyclic_resume();
149	kpreempt_enable();
150}
151
152dr_sr_handle_t *
153dr_get_sr_handle(dr_handle_t *hp)
154{
155	dr_sr_handle_t *srh;
156
157	srh = GETSTRUCT(dr_sr_handle_t, 1);
158	srh->sr_dr_handlep = hp;
159
160	return (srh);
161}
162
163void
164dr_release_sr_handle(dr_sr_handle_t *srh)
165{
166	ASSERT(srh->sr_failed_dip == NULL);
167	FREESTRUCT(srh, dr_sr_handle_t, 1);
168}
169
170static int
171dr_is_real_device(dev_info_t *dip)
172{
173	struct regspec *regbuf = NULL;
174	int length = 0;
175	int rc;
176
177	if (ddi_get_driver(dip) == NULL)
178		return (0);
179
180	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
181		return (1);
182	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
183		return (0);
184
185	/*
186	 * now the general case
187	 */
188	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
189	    (caddr_t)&regbuf, &length);
190	ASSERT(rc != DDI_PROP_NO_MEMORY);
191	if (rc != DDI_PROP_SUCCESS) {
192		return (0);
193	} else {
194		if ((length > 0) && (regbuf != NULL))
195			kmem_free(regbuf, length);
196		return (1);
197	}
198}
199
200static int
201dr_is_unsafe_major(major_t major)
202{
203	char    *dname, **cpp;
204	int	i, ndevs;
205
206	if ((dname = ddi_major_to_name(major)) == NULL) {
207		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
208		return (0);
209	}
210
211	ndevs = dr_unsafe_devs.ndevs;
212	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
213		if (strcmp(dname, *cpp++) == 0)
214			return (1);
215	}
216	return (0);
217}
218
219static int
220dr_bypass_device(char *dname)
221{
222	int i;
223	char **lname;
224
225	if (dname == NULL)
226		return (0);
227
228	/* check the bypass list */
229	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
230		if (strcmp(dname, dr_bypass_list[i++]) == 0)
231			return (1);
232	}
233	return (0);
234}
235
236static int
237dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
238{
239	major_t	devmajor;
240	char	*aka, *name;
241
242	*buffer = *alias = 0;
243
244	if (dip == NULL)
245		return (-1);
246
247	if ((name = ddi_get_name(dip)) == NULL)
248		name = "<null name>";
249
250	aka = name;
251
252	if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
253		aka = ddi_major_to_name(devmajor);
254
255	(void) strcpy(buffer, name);
256
257	if (strcmp(name, aka))
258		(void) strcpy(alias, aka);
259	else
260		*alias = 0;
261
262	return (0);
263}
264
265struct dr_ref {
266	int		*refcount;
267	int		*refcount_non_gldv3;
268	uint64_t	*arr;
269	int		*idx;
270	int		len;
271};
272
273/* ARGSUSED */
274static int
275dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
276{
277	major_t		major;
278	char		*dname;
279	struct dr_ref	*rp = (struct dr_ref *)arg;
280
281	if (dip == NULL)
282		return (DDI_WALK_CONTINUE);
283
284	if (!dr_is_real_device(dip))
285		return (DDI_WALK_CONTINUE);
286
287	dname = ddi_binding_name(dip);
288
289	if (dr_bypass_device(dname))
290		return (DDI_WALK_CONTINUE);
291
292	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
293		if (ref && rp->refcount) {
294			*rp->refcount += ref;
295			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
296			    major, ref);
297		}
298		if (ref && rp->refcount_non_gldv3) {
299			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
300				*rp->refcount_non_gldv3 += ref;
301		}
302		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
303			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
304			    major);
305			if (rp->arr != NULL && rp->idx != NULL)
306				*rp->idx = dr_add_int(rp->arr, *rp->idx,
307				    rp->len, (uint64_t)major);
308		}
309	}
310	return (DDI_WALK_CONTINUE);
311}
312
313static int
314dr_check_unsafe_major(dev_info_t *dip, void *arg)
315{
316	return (dr_check_dip(dip, arg, 0));
317}
318
319
320/*ARGSUSED*/
321void
322dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
323    uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
324{
325	struct dr_ref bref = {0};
326
327	if (dip == NULL)
328		return;
329
330	bref.refcount = refcount;
331	bref.refcount_non_gldv3 = refcount_non_gldv3;
332	bref.arr = arr;
333	bref.idx = idx;
334	bref.len = len;
335
336	ASSERT(e_ddi_branch_held(dip));
337	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
338}
339
340/*
341 * The "dip" argument's parent (if it exists) must be held busy.
342 */
343static int
344dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
345{
346	dr_handle_t	*handle;
347	major_t		major;
348	char		*dname;
349	int		circ;
350
351	/*
352	 * If dip is the root node, it has no siblings and it is
353	 * always held. If dip is not the root node, dr_suspend_devices()
354	 * will be invoked with the parent held busy.
355	 */
356	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
357		char	d_name[40], d_alias[40], *d_info;
358
359		ndi_devi_enter(dip, &circ);
360		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
361			ndi_devi_exit(dip, circ);
362			return (ENXIO);
363		}
364		ndi_devi_exit(dip, circ);
365
366		if (!dr_is_real_device(dip))
367			continue;
368
369		major = (major_t)-1;
370		if ((dname = ddi_binding_name(dip)) != NULL)
371			major = ddi_name_to_major(dname);
372
373		if (dr_bypass_device(dname)) {
374			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
375			    major);
376			continue;
377		}
378
379		if (drmach_verify_sr(dip, 1)) {
380			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
381			    major);
382			continue;
383		}
384
385		if ((d_info = ddi_get_name_addr(dip)) == NULL)
386			d_info = "<null>";
387
388		d_name[0] = 0;
389		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
390			if (d_alias[0] != 0) {
391				prom_printf("\tsuspending %s@%s (aka %s)\n",
392				    d_name, d_info, d_alias);
393			} else {
394				prom_printf("\tsuspending %s@%s\n", d_name,
395				    d_info);
396			}
397		} else {
398			prom_printf("\tsuspending %s@%s\n", dname, d_info);
399		}
400
401		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
402			prom_printf("\tFAILED to suspend %s@%s\n",
403			    d_name[0] ? d_name : dname, d_info);
404
405			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
406			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
407
408			ndi_hold_devi(dip);
409			srh->sr_failed_dip = dip;
410
411			handle = srh->sr_dr_handlep;
412			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
413			    d_name[0] ? d_name : dname, d_info);
414
415			return (DDI_FAILURE);
416		}
417	}
418
419	return (DDI_SUCCESS);
420}
421
422static void
423dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
424{
425	dr_handle_t	*handle;
426	dev_info_t	*dip, *next, *last = NULL;
427	major_t		major;
428	char		*bn;
429	int		circ;
430
431	major = (major_t)-1;
432
433	/* attach in reverse device tree order */
434	while (last != start) {
435		dip = start;
436		next = ddi_get_next_sibling(dip);
437		while (next != last && dip != srh->sr_failed_dip) {
438			dip = next;
439			next = ddi_get_next_sibling(dip);
440		}
441		if (dip == srh->sr_failed_dip) {
442			/* release hold acquired in dr_suspend_devices() */
443			srh->sr_failed_dip = NULL;
444			ndi_rele_devi(dip);
445		} else if (dr_is_real_device(dip) &&
446		    srh->sr_failed_dip == NULL) {
447
448			if ((bn = ddi_binding_name(dip)) != NULL) {
449				major = ddi_name_to_major(bn);
450			} else {
451				bn = "<null>";
452			}
453			if (!dr_bypass_device(bn) &&
454			    !drmach_verify_sr(dip, 0)) {
455				char	d_name[40], d_alias[40], *d_info;
456
457				d_name[0] = 0;
458				d_info = ddi_get_name_addr(dip);
459				if (d_info == NULL)
460					d_info = "<null>";
461
462				if (!dr_resolve_devname(dip, d_name, d_alias)) {
463					if (d_alias[0] != 0) {
464						prom_printf("\tresuming "
465						    "%s@%s (aka %s)\n", d_name,
466						    d_info, d_alias);
467					} else {
468						prom_printf("\tresuming "
469						    "%s@%s\n", d_name, d_info);
470					}
471				} else {
472					prom_printf("\tresuming %s@%s\n", bn,
473					    d_info);
474				}
475
476				if (devi_attach(dip, DDI_RESUME) !=
477				    DDI_SUCCESS) {
478					/*
479					 * Print a console warning,
480					 * set an e_code of ESBD_RESUME,
481					 * and save the driver major
482					 * number in the e_rsc.
483					 */
484					prom_printf("\tFAILED to resume %s@%s",
485					    d_name[0] ? d_name : bn, d_info);
486
487					srh->sr_err_idx =
488					    dr_add_int(srh->sr_err_ints,
489					    srh->sr_err_idx, DR_MAX_ERR_INT,
490					    (uint64_t)major);
491
492					handle = srh->sr_dr_handlep;
493
494					dr_op_err(CE_IGNORE, handle,
495					    ESBD_RESUME, "%s@%s",
496					    d_name[0] ? d_name : bn, d_info);
497				}
498			}
499		}
500
501		/* Hold parent busy while walking its children */
502		ndi_devi_enter(dip, &circ);
503		dr_resume_devices(ddi_get_child(dip), srh);
504		ndi_devi_exit(dip, circ);
505		last = dip;
506	}
507}
508
509/*
510 * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
511 * but from DR point of view.  These user threads are waiting in
512 * the kernel.  Once they complete in the kernel, they will process
513 * the stop signal and stop.
514 */
515#define	DR_VSTOPPED(t)			\
516	((t)->t_state == TS_SLEEP &&	\
517	(t)->t_wchan != NULL &&		\
518	(t)->t_astflag &&		\
519	((t)->t_proc_flag & TP_CHKPT))
520
521/* ARGSUSED */
522static int
523dr_stop_user_threads(dr_sr_handle_t *srh)
524{
525	int		count;
526	int		bailout;
527	dr_handle_t	*handle = srh->sr_dr_handlep;
528	static fn_t	f = "dr_stop_user_threads";
529	kthread_id_t 	tp;
530
531	extern void add_one_utstop();
532	extern void utstop_timedwait(clock_t);
533	extern void utstop_init(void);
534
535#define	DR_UTSTOP_RETRY	4
536#define	DR_UTSTOP_WAIT	hz
537
538	if (dr_skip_user_threads)
539		return (DDI_SUCCESS);
540
541	utstop_init();
542
543	/* we need to try a few times to get past fork, etc. */
544	srh->sr_err_idx = 0;
545	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
546		/* walk the entire threadlist */
547		mutex_enter(&pidlock);
548		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
549			proc_t *p = ttoproc(tp);
550
551			/* handle kernel threads separately */
552			if (p->p_as == &kas || p->p_stat == SZOMB)
553				continue;
554
555			mutex_enter(&p->p_lock);
556			thread_lock(tp);
557
558			if (tp->t_state == TS_STOPPED) {
559				/* add another reason to stop this thread */
560				tp->t_schedflag &= ~TS_RESUME;
561			} else {
562				tp->t_proc_flag |= TP_CHKPT;
563
564				thread_unlock(tp);
565				mutex_exit(&p->p_lock);
566				add_one_utstop();
567				mutex_enter(&p->p_lock);
568				thread_lock(tp);
569
570				aston(tp);
571
572				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
573					setrun_locked(tp);
574				}
575
576			}
577
578			/* grab thread if needed */
579			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
580				poke_cpu(tp->t_cpu->cpu_id);
581
582
583			thread_unlock(tp);
584			mutex_exit(&p->p_lock);
585		}
586		mutex_exit(&pidlock);
587
588
589		/* let everything catch up */
590		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
591
592
593		/* now, walk the threadlist again to see if we are done */
594		mutex_enter(&pidlock);
595		for (tp = curthread->t_next, bailout = 0;
596		    tp != curthread; tp = tp->t_next) {
597			proc_t *p = ttoproc(tp);
598
599			/* handle kernel threads separately */
600			if (p->p_as == &kas || p->p_stat == SZOMB)
601				continue;
602
603			/*
604			 * If this thread didn't stop, and we don't allow
605			 * unstopped blocked threads, bail.
606			 */
607			thread_lock(tp);
608			if (!CPR_ISTOPPED(tp) &&
609			    !(dr_allow_blocked_threads &&
610			    DR_VSTOPPED(tp))) {
611				bailout = 1;
612				if (count == DR_UTSTOP_RETRY - 1) {
613					/*
614					 * save the pid for later reporting
615					 */
616					srh->sr_err_idx =
617					    dr_add_int(srh->sr_err_ints,
618					    srh->sr_err_idx, DR_MAX_ERR_INT,
619					    (uint64_t)p->p_pid);
620
621					cmn_err(CE_WARN, "%s: "
622					    "failed to stop thread: "
623					    "process=%s, pid=%d",
624					    f, p->p_user.u_psargs, p->p_pid);
625
626					PR_QR("%s: failed to stop thread: "
627					    "process=%s, pid=%d, t_id=0x%p, "
628					    "t_state=0x%x, t_proc_flag=0x%x, "
629					    "t_schedflag=0x%x\n",
630					    f, p->p_user.u_psargs, p->p_pid,
631					    (void *)tp, tp->t_state,
632					    tp->t_proc_flag, tp->t_schedflag);
633				}
634
635			}
636			thread_unlock(tp);
637		}
638		mutex_exit(&pidlock);
639
640		/* were all the threads stopped? */
641		if (!bailout)
642			break;
643	}
644
645	/* were we unable to stop all threads after a few tries? */
646	if (bailout) {
647		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
648		    srh->sr_err_idx, 0);
649		return (ESRCH);
650	}
651
652	return (DDI_SUCCESS);
653}
654
655static void
656dr_start_user_threads(void)
657{
658	kthread_id_t tp;
659
660	mutex_enter(&pidlock);
661
662	/* walk all threads and release them */
663	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
664		proc_t *p = ttoproc(tp);
665
666		/* skip kernel threads */
667		if (ttoproc(tp)->p_as == &kas)
668			continue;
669
670		mutex_enter(&p->p_lock);
671		tp->t_proc_flag &= ~TP_CHKPT;
672		mutex_exit(&p->p_lock);
673
674		thread_lock(tp);
675		if (CPR_ISTOPPED(tp)) {
676			/* back on the runq */
677			tp->t_schedflag |= TS_RESUME;
678			setrun_locked(tp);
679		}
680		thread_unlock(tp);
681	}
682
683	mutex_exit(&pidlock);
684}
685
686static void
687dr_signal_user(int sig)
688{
689	struct proc *p;
690
691	mutex_enter(&pidlock);
692
693	for (p = practive; p != NULL; p = p->p_next) {
694		/* only user threads */
695		if (p->p_exec == NULL || p->p_stat == SZOMB ||
696		    p == proc_init || p == ttoproc(curthread))
697			continue;
698
699		mutex_enter(&p->p_lock);
700		sigtoproc(p, NULL, sig);
701		mutex_exit(&p->p_lock);
702	}
703
704	mutex_exit(&pidlock);
705
706	/* add a bit of delay */
707	delay(hz);
708}
709
710void
711dr_resume(dr_sr_handle_t *srh)
712{
713	switch (srh->sr_suspend_state) {
714	case DR_SRSTATE_FULL:
715
716		ASSERT(MUTEX_HELD(&cpu_lock));
717
718		/*
719		 * Prevent false alarm in tod_validate() due to tod
720		 * value change between suspend and resume
721		 */
722		mutex_enter(&tod_lock);
723		tod_status_set(TOD_DR_RESUME_DONE);
724		mutex_exit(&tod_lock);
725
726		dr_enable_intr(); 	/* enable intr & clock */
727
728		start_cpus();
729		mutex_exit(&cpu_lock);
730
731		/*
732		 * This should only be called if drmach_suspend_last()
733		 * was called and state transitioned to DR_SRSTATE_FULL
734		 * to prevent resume attempts on device instances that
735		 * were not previously suspended.
736		 */
737		drmach_resume_first();
738
739		/* FALLTHROUGH */
740
741	case DR_SRSTATE_DRIVER:
742		/*
743		 * resume drivers
744		 */
745		srh->sr_err_idx = 0;
746
747		/* no parent dip to hold busy */
748		dr_resume_devices(ddi_root_node(), srh);
749
750		if (srh->sr_err_idx && srh->sr_dr_handlep) {
751			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
752			    srh->sr_err_ints, srh->sr_err_idx, 1);
753		}
754
755		/*
756		 * resume the lock manager
757		 */
758		lm_cprresume();
759
760		/* FALLTHROUGH */
761
762	case DR_SRSTATE_USER:
763		/*
764		 * finally, resume user threads
765		 */
766		if (!dr_skip_user_threads) {
767			prom_printf("DR: resuming user threads...\n");
768			dr_start_user_threads();
769		}
770		/* FALLTHROUGH */
771
772	case DR_SRSTATE_BEGIN:
773	default:
774		/*
775		 * let those who care know that we've just resumed
776		 */
777		PR_QR("sending SIGTHAW...\n");
778		dr_signal_user(SIGTHAW);
779		break;
780	}
781
782	prom_printf("DR: resume COMPLETED\n");
783}
784
785int
786dr_suspend(dr_sr_handle_t *srh)
787{
788	dr_handle_t	*handle;
789	int		force;
790	int		dev_errs_idx;
791	uint64_t	dev_errs[DR_MAX_ERR_INT];
792	int		rc = DDI_SUCCESS;
793
794	handle = srh->sr_dr_handlep;
795
796	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
797
798	prom_printf("\nDR: suspending user threads...\n");
799	srh->sr_suspend_state = DR_SRSTATE_USER;
800	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
801	    dr_check_user_stop_result) {
802		dr_resume(srh);
803		return (rc);
804	}
805
806	if (!force) {
807		struct dr_ref drc = {0};
808
809		prom_printf("\nDR: checking devices...\n");
810		dev_errs_idx = 0;
811
812		drc.arr = dev_errs;
813		drc.idx = &dev_errs_idx;
814		drc.len = DR_MAX_ERR_INT;
815
816		/*
817		 * Since the root node can never go away, it
818		 * doesn't have to be held.
819		 */
820		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
821		if (dev_errs_idx) {
822			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
823			    dev_errs_idx, 1);
824			dr_resume(srh);
825			return (DDI_FAILURE);
826		}
827		PR_QR("done\n");
828	} else {
829		prom_printf("\nDR: dr_suspend invoked with force flag\n");
830	}
831
832#ifndef	SKIP_SYNC
833	/*
834	 * This sync swap out all user pages
835	 */
836	vfs_sync(SYNC_ALL);
837#endif
838
839	/*
840	 * special treatment for lock manager
841	 */
842	lm_cprsuspend();
843
844#ifndef	SKIP_SYNC
845	/*
846	 * sync the file system in case we never make it back
847	 */
848	sync();
849#endif
850
851	/*
852	 * now suspend drivers
853	 */
854	prom_printf("DR: suspending drivers...\n");
855	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
856	srh->sr_err_idx = 0;
857	/* No parent to hold busy */
858	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
859		if (srh->sr_err_idx && srh->sr_dr_handlep) {
860			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
861			    srh->sr_err_ints, srh->sr_err_idx, 1);
862		}
863		dr_resume(srh);
864		return (rc);
865	}
866
867	drmach_suspend_last();
868
869	/*
870	 * finally, grab all cpus
871	 */
872	srh->sr_suspend_state = DR_SRSTATE_FULL;
873
874	mutex_enter(&cpu_lock);
875	pause_cpus(NULL);
876	dr_stop_intr();
877
878	return (rc);
879}
880
881int
882dr_pt_test_suspend(dr_handle_t *hp)
883{
884	dr_sr_handle_t *srh;
885	int		err;
886	uint_t		psmerr;
887	static fn_t	f = "dr_pt_test_suspend";
888
889	PR_QR("%s...\n", f);
890
891	srh = dr_get_sr_handle(hp);
892	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
893		dr_resume(srh);
894		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
895			PR_QR("%s: error on dr_resume()", f);
896			switch (psmerr) {
897			case ESBD_RESUME:
898				PR_QR("Couldn't resume devices: %s\n",
899				    DR_GET_E_RSC(hp->h_err));
900				break;
901
902			case ESBD_KTHREAD:
903				PR_ALL("psmerr is ESBD_KTHREAD\n");
904				break;
905			default:
906				PR_ALL("Resume error unknown = %d\n", psmerr);
907				break;
908			}
909		}
910	} else {
911		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
912		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
913		switch (psmerr) {
914		case ESBD_UNSAFE:
915			PR_ALL("Unsafe devices (major #): %s\n",
916			    DR_GET_E_RSC(hp->h_err));
917			break;
918
919		case ESBD_RTTHREAD:
920			PR_ALL("RT threads (PIDs): %s\n",
921			    DR_GET_E_RSC(hp->h_err));
922			break;
923
924		case ESBD_UTHREAD:
925			PR_ALL("User threads (PIDs): %s\n",
926			    DR_GET_E_RSC(hp->h_err));
927			break;
928
929		case ESBD_SUSPEND:
930			PR_ALL("Non-suspendable devices (major #): %s\n",
931			    DR_GET_E_RSC(hp->h_err));
932			break;
933
934		case ESBD_RESUME:
935			PR_ALL("Could not resume devices (major #): %s\n",
936			    DR_GET_E_RSC(hp->h_err));
937			break;
938
939		case ESBD_KTHREAD:
940			PR_ALL("psmerr is ESBD_KTHREAD\n");
941			break;
942
943		case ESBD_NOERROR:
944			PR_ALL("sbd_error_t error code not set\n");
945			break;
946
947		default:
948			PR_ALL("Unknown error psmerr = %d\n", psmerr);
949			break;
950		}
951	}
952	dr_release_sr_handle(srh);
953
954	return (0);
955}
956
957/*
958 * Add a new integer value to the end of an array.  Don't allow duplicates to
959 * appear in the array, and don't allow the array to overflow.  Return the new
960 * total number of entries in the array.
961 */
962static int
963dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
964{
965	int i;
966
967	if (arr == NULL)
968		return (0);
969
970	if (idx >= len)
971		return (idx);
972
973	for (i = 0; i < idx; i++) {
974		if (arr[i] == val)
975			return (idx);
976	}
977
978	arr[idx++] = val;
979
980	return (idx);
981}
982
983/*
984 * Construct an sbd_error_t featuring a string representation of an array of
985 * integers as its e_rsc.
986 */
987static sbd_error_t *
988drerr_int(int e_code, uint64_t *arr, int idx, int majors)
989{
990	int		i, n, buf_len, buf_idx, buf_avail;
991	char		*dname;
992	char		*buf;
993	sbd_error_t	*new_sbd_err;
994	static char	s_ellipsis[] = "...";
995
996	if (arr == NULL || idx <= 0)
997		return (NULL);
998
999	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1000	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1001
1002	/*
1003	 * This is the total working area of the buffer.  It must be computed
1004	 * as the size of 'buf', minus reserved space for the null terminator
1005	 * and the ellipsis string.
1006	 */
1007	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1008
1009	/* Construct a string representation of the array values */
1010	for (buf_idx = 0, i = 0; i < idx; i++) {
1011		buf_avail = buf_len - buf_idx;
1012		if (majors) {
1013			dname = ddi_major_to_name(arr[i]);
1014			if (dname) {
1015				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1016				    dname);
1017			} else {
1018				n = snprintf(&buf[buf_idx], buf_avail,
1019				    "major %" PRIu64 ", ", arr[i]);
1020			}
1021		} else {
1022			n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
1023			    arr[i]);
1024		}
1025
1026		/* An ellipsis gets appended when no more values fit */
1027		if (n >= buf_avail) {
1028			(void) strcpy(&buf[buf_idx], s_ellipsis);
1029			break;
1030		}
1031
1032		buf_idx += n;
1033	}
1034
1035	/* If all the contents fit, remove the trailing comma */
1036	if (n < buf_avail) {
1037		buf[--buf_idx] = '\0';
1038		buf[--buf_idx] = '\0';
1039	}
1040
1041	/* Return an sbd_error_t with the buffer and e_code */
1042	new_sbd_err = drerr_new(1, e_code, buf);
1043	kmem_free(buf, MAXPATHLEN);
1044	return (new_sbd_err);
1045}
1046