1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * The main CPU-control loops, used to control masters and slaves.
30 */
31
32#include <sys/types.h>
33
34#include <kmdb/kaif.h>
35#include <kmdb/kaif_start.h>
36#include <kmdb/kmdb_asmutil.h>
37#include <kmdb/kmdb_dpi_impl.h>
38#include <kmdb/kmdb_kdi.h>
39
40#define	KAIF_SLAVE_CMD_SPIN	0
41#define	KAIF_SLAVE_CMD_SWITCH	1
42#define	KAIF_SLAVE_CMD_RESUME	2
43#define	KAIF_SLAVE_CMD_FLUSH	3
44#define	KAIF_SLAVE_CMD_REBOOT	4
45#if defined(__sparc)
46#define	KAIF_SLAVE_CMD_ACK	5
47#endif
48
49
50/*
51 * Used to synchronize attempts to set kaif_master_cpuid.  kaif_master_cpuid may
52 * be read without kaif_master_lock, and may be written by the current master
53 * CPU.
54 */
55int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
56static uintptr_t kaif_master_lock = 0;
57
58/*
59 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
60 * be held to write kaif_looping, but need not be held to read it.
61 */
62static volatile uint_t kaif_looping;
63static uintptr_t kaif_loop_lock;
64
65static volatile int kaif_slave_cmd;
66static volatile int kaif_slave_tgt;	/* target cpuid for CMD_SWITCH */
67
68static void
69kaif_lock_enter(uintptr_t *lock)
70{
71	while (cas(lock, 0, 1) != 0)
72		continue;
73	membar_producer();
74}
75
76static void
77kaif_lock_exit(uintptr_t *lock)
78{
79	*lock = 0;
80	membar_producer();
81}
82
83static void
84kaif_start_slaves(int cmd)
85{
86	kaif_slave_cmd = cmd;
87	kmdb_kdi_start_slaves();
88}
89
90static int
91kaif_master_loop(kaif_cpusave_t *cpusave)
92{
93	int notflushed, i;
94
95#if defined(__sparc)
96	kaif_prom_rearm();
97#endif
98	kaif_trap_set_debugger();
99
100	/*
101	 * If we re-entered due to a ::switch, we need to tell the slave CPUs
102	 * to sleep again.
103	 */
104	kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 0);
105
106master_loop:
107	switch (kmdb_dpi_reenter()) {
108	case KMDB_DPI_CMD_SWITCH_CPU:
109		/*
110		 * We assume that the target CPU is a valid slave.  There's no
111		 * easy way to complain here, so we'll assume that the caller
112		 * has done the proper checking.
113		 */
114		if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
115			break;
116
117		kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
118		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
119		membar_producer();
120
121		/*
122		 * Switch back to the saved trap table before we switch CPUs --
123		 * we need to make sure that only one CPU is on the debugger's
124		 * table at a time.
125		 */
126		kaif_trap_set_saved(cpusave);
127
128		kaif_start_slaves(KAIF_SLAVE_CMD_SWITCH);
129
130		/* The new master is now awake */
131		return (KAIF_CPU_CMD_SWITCH);
132
133	case KMDB_DPI_CMD_RESUME_ALL:
134	case KMDB_DPI_CMD_RESUME_UNLOAD:
135		/*
136		 * Resume everyone, clean up for next entry.
137		 */
138		kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
139		membar_producer();
140		kaif_start_slaves(KAIF_SLAVE_CMD_RESUME);
141
142		if (kmdb_dpi_work_required())
143			kmdb_dpi_wrintr_fire();
144
145		kaif_trap_set_saved(cpusave);
146
147		return (KAIF_CPU_CMD_RESUME);
148
149	case KMDB_DPI_CMD_RESUME_MASTER:
150		/*
151		 * Single-CPU resume, which is performed on the debugger's
152		 * trap table (so no need to switch back).
153		 */
154		return (KAIF_CPU_CMD_RESUME_MASTER);
155
156	case KMDB_DPI_CMD_FLUSH_CACHES:
157		kaif_start_slaves(KAIF_SLAVE_CMD_FLUSH);
158
159		/*
160		 * Wait for the other cpus to finish flushing their caches.
161		 */
162		do {
163			notflushed = 0;
164			for (i = 0; i < kaif_ncpusave; i++) {
165				kaif_cpusave_t *save = &kaif_cpusave[i];
166
167				if (save->krs_cpu_state ==
168				    KAIF_CPU_STATE_SLAVE &&
169				    !save->krs_cpu_flushed) {
170					notflushed++;
171					break;
172				}
173			}
174		} while (notflushed > 0);
175
176		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
177		break;
178
179#if defined(__i386) || defined(__amd64)
180	case KMDB_DPI_CMD_REBOOT:
181		/*
182		 * Reboot must be initiated by CPU 0.  I could ask why, but I'm
183		 * afraid that I don't want to know the answer.
184		 */
185		if (cpusave->krs_cpu_id == 0)
186			kmdb_kdi_reboot();
187
188		kaif_start_slaves(KAIF_SLAVE_CMD_REBOOT);
189
190		/*
191		 * Spin forever, waiting for CPU 0 (apparently a slave) to
192		 * reboot the system.
193		 */
194		for (;;)
195			continue;
196
197		/*NOTREACHED*/
198		break;
199#endif
200	}
201
202	goto master_loop;
203}
204
205static int
206kaif_slave_loop(kaif_cpusave_t *cpusave)
207{
208	int slavecmd, rv;
209
210#if defined(__sparc)
211	/*
212	 * If the user elects to drop to OBP from the debugger, some OBP
213	 * implementations will cross-call the slaves.  We have to turn
214	 * IE back on so we can receive the cross-calls.  If we don't,
215	 * some OBP implementations will wait forever.
216	 */
217	interrupts_on();
218#endif
219
220	/* Wait for duty to call */
221	for (;;) {
222		slavecmd = kaif_slave_cmd;
223
224		if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
225		    kaif_slave_tgt == cpusave->krs_cpu_id) {
226			kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
227			cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
228			rv = KAIF_CPU_CMD_SWITCH;
229			break;
230
231		} else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
232			kmdb_kdi_flush_caches();
233			cpusave->krs_cpu_flushed = 1;
234			continue;
235
236#if defined(__i386) || defined(__amd64)
237		} else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
238		    cpusave->krs_cpu_id == 0) {
239			rv = 0;
240			kmdb_kdi_reboot();
241			break;
242#endif
243
244		} else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
245			rv = KAIF_CPU_CMD_RESUME;
246			break;
247#if defined(__sparc)
248		} else if (slavecmd == KAIF_SLAVE_CMD_ACK) {
249			cpusave->krs_cpu_acked = 1;
250		} else if (cpusave->krs_cpu_acked &&
251			slavecmd == KAIF_SLAVE_CMD_SPIN) {
252			cpusave->krs_cpu_acked = 0;
253#endif
254		}
255
256		kmdb_kdi_slave_wait();
257	}
258
259#if defined(__sparc)
260	interrupts_off();
261#endif
262
263	return (rv);
264}
265
266static void
267kaif_select_master(kaif_cpusave_t *cpusave)
268{
269	kaif_lock_enter(&kaif_master_lock);
270
271	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
272		/* This is the master. */
273		kaif_master_cpuid = cpusave->krs_cpu_id;
274		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
275		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
276
277		membar_producer();
278
279		kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 1);
280	} else {
281		/* The master was already chosen - go be a slave */
282		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
283		membar_producer();
284	}
285
286	kaif_lock_exit(&kaif_master_lock);
287}
288
289int
290kaif_main_loop(kaif_cpusave_t *cpusave)
291{
292	int cmd;
293
294	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
295		if (!kmdb_dpi_resume_requested &&
296		    kmdb_kdi_get_unload_request()) {
297			/*
298			 * Special case: Unload requested before first debugger
299			 * entry.  Don't stop the world, as there's nothing to
300			 * clean up that can't be handled by the running kernel.
301			 */
302			cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
303			return (KAIF_CPU_CMD_RESUME);
304		}
305
306		kaif_select_master(cpusave);
307
308#ifdef __sparc
309		if (kaif_master_cpuid == cpusave->krs_cpu_id) {
310			/*
311			 * Everyone has arrived, so we can disarm the post-PROM
312			 * entry point.
313			 */
314			*kaif_promexitarmp = 0;
315			membar_producer();
316		}
317#endif
318	} else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
319		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
320	} else {
321		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
322	}
323
324	cpusave->krs_cpu_flushed = 0;
325
326	kaif_lock_enter(&kaif_loop_lock);
327	kaif_looping++;
328	kaif_lock_exit(&kaif_loop_lock);
329
330	/*
331	 * We know who the master and slaves are, so now they can go off
332	 * to their respective loops.
333	 */
334	do {
335		if (kaif_master_cpuid == cpusave->krs_cpu_id)
336			cmd = kaif_master_loop(cpusave);
337		else
338			cmd = kaif_slave_loop(cpusave);
339	} while (cmd == KAIF_CPU_CMD_SWITCH);
340
341	kaif_lock_enter(&kaif_loop_lock);
342	kaif_looping--;
343	kaif_lock_exit(&kaif_loop_lock);
344
345	cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
346
347	if (cmd == KAIF_CPU_CMD_RESUME) {
348		/*
349		 * By this point, the master has directed the slaves to resume,
350		 * and everyone is making their way to this point.  We're going
351		 * to block here until all CPUs leave the master and slave
352		 * loops.  When all have arrived, we'll turn them all loose.
353		 * This barrier is required for two reasons:
354		 *
355		 * 1. There exists a race condition whereby a CPU could reenter
356		 *    the debugger while another CPU is still in the slave loop
357		 *    from this debugger entry.  This usually happens when the
358		 *    current master releases the slaves, and makes it back to
359		 *    the world before the slaves notice the release.  The
360		 *    former master then triggers a debugger entry, and attempts
361		 *    to stop the slaves for this entry before they've even
362		 *    resumed from the last one.  When the slaves arrive here,
363		 *    they'll have re-disabled interrupts, and will thus ignore
364		 *    cross-calls until they finish resuming.
365		 *
366		 * 2. At the time of this writing, there exists a SPARC bug that
367		 *    causes an apparently unsolicited interrupt vector trap
368		 *    from OBP to one of the slaves.  This wouldn't normally be
369		 *    a problem but for the fact that the cross-called CPU
370		 *    encounters some sort of failure while in OBP.  OBP
371		 *    recovers by executing the debugger-hook word, which sends
372		 *    the slave back into the debugger, triggering a debugger
373		 *    fault.  This problem seems to only happen during resume,
374		 *    the result being that all CPUs save for the cross-called
375		 *    one make it back into the world, while the cross-called
376		 *    one is stuck at the debugger fault prompt.  Leave the
377		 *    world in that state too long, and you'll get a mondo
378		 *    timeout panic.  If we hold everyone here, we can give the
379		 *    the user a chance to trigger a panic for further analysis.
380		 *    To trigger the bug, "pool_unlock:b :c" and "while : ; do
381		 *    psrset -p ; done".
382		 *
383		 * When the second item is fixed, the barrier can move into
384		 * kaif_select_master(), immediately prior to the setting of
385		 * kaif_master_cpuid.
386		 */
387		while (kaif_looping != 0)
388			continue;
389	}
390
391	return (cmd);
392}
393
394
395#if defined(__sparc)
396
397static int slave_loop_barrier_failures = 0;	/* for debug */
398
399/*
400 * There exist a race condition observed by some
401 * platforms where the kmdb master cpu exits to OBP via
402 * prom_enter_mon (e.g. "$q" command) and then later re-enter
403 * kmdb (typing "go") while the slaves are still proceeding
404 * from the OBP idle-loop back to the kmdb slave loop. The
405 * problem arises when the master cpu now back in kmdb proceed
406 * to re-enter OBP (e.g. doing a prom_read() from the kmdb main
407 * loop) while the slaves are still trying to get out of (the
408 * previous trip in) OBP into the safety of the kmdb slave loop.
409 * This routine forces the slaves to explicitly acknowledge
410 * that they are back in the slave loop. The master cpu can
411 * call this routine to ensure that all slave cpus are back
412 * in the slave loop before proceeding.
413 */
414void
415kaif_slave_loop_barrier(void)
416{
417	extern void kdi_usecwait(clock_t);
418	int i;
419	int not_acked;
420	int timeout_count = 0;
421
422	kaif_start_slaves(KAIF_SLAVE_CMD_ACK);
423
424	/*
425	 * Wait for slave cpus to explicitly acknowledge
426	 * that they are spinning in the slave loop.
427	 */
428	do {
429		not_acked = 0;
430		for (i = 0; i < kaif_ncpusave; i++) {
431			kaif_cpusave_t *save = &kaif_cpusave[i];
432
433			if (save->krs_cpu_state ==
434			    KAIF_CPU_STATE_SLAVE &&
435			    !save->krs_cpu_acked) {
436				not_acked++;
437				break;
438			}
439		}
440
441		if (not_acked == 0)
442			break;
443
444		/*
445		 * Play it safe and do a timeout delay.
446		 * We will do at most kaif_ncpusave delays before
447		 * bailing out of this barrier.
448		 */
449		kdi_usecwait(200);
450
451	} while (++timeout_count < kaif_ncpusave);
452
453	if (not_acked > 0)
454		/*
455		 * we cannot establish a barrier with all
456		 * the slave cpus coming back from OBP
457		 * Record this fact for future debugging
458		 */
459		slave_loop_barrier_failures++;
460
461	kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
462}
463#endif
464