x_call.c revision 3446:5903aece022d
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * Facilities for cross-processor subroutine calls using "mailbox" interrupts.
30 *
31 */
32
33#include <sys/types.h>
34#include <sys/param.h>
35#include <sys/t_lock.h>
36#include <sys/thread.h>
37#include <sys/cpuvar.h>
38#include <sys/x_call.h>
39#include <sys/cpu.h>
40#include <sys/psw.h>
41#include <sys/sunddi.h>
42#include <sys/debug.h>
43#include <sys/systm.h>
44#include <sys/archsystm.h>
45#include <sys/machsystm.h>
46#include <sys/mutex_impl.h>
47#include <sys/traptrace.h>
48
49
50static struct	xc_mbox xc_mboxes[X_CALL_LEVELS];
51static kmutex_t xc_mbox_lock[X_CALL_LEVELS];
52static uint_t 	xc_xlat_xcptoipl[X_CALL_LEVELS] = {
53	XC_LO_PIL,
54	XC_MED_PIL,
55	XC_HI_PIL
56};
57
58static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t,
59    int, cpuset_t, int);
60
61static int	xc_initialized = 0;
62
63void
64xc_init()
65{
66	/*
67	 * By making these mutexes type MUTEX_DRIVER, the ones below
68	 * LOCK_LEVEL will be implemented as adaptive mutexes, and the
69	 * ones above LOCK_LEVEL will be spin mutexes.
70	 */
71	mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER,
72	    (void *)ipltospl(XC_LO_PIL));
73	mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER,
74	    (void *)ipltospl(XC_MED_PIL));
75	mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER,
76	    (void *)ipltospl(XC_HI_PIL));
77
78	xc_initialized = 1;
79}
80
81#if defined(TRAPTRACE)
82
83/*
84 * When xc_traptrace is on, put x-call records into the trap trace buffer.
85 */
86int xc_traptrace;
87
88void
89xc_make_trap_trace_entry(uint8_t marker, int pri, ulong_t arg)
90{
91	trap_trace_rec_t *ttr;
92	struct _xc_entry *xce;
93
94	if (xc_traptrace == 0)
95		return;
96
97	ttr = trap_trace_get_traceptr(TT_XCALL,
98	    (ulong_t)caller(), (ulong_t)getfp());
99	xce = &(ttr->ttr_info.xc_entry);
100
101	xce->xce_marker = marker;
102	xce->xce_pri = pri;
103	xce->xce_arg = arg;
104
105	if ((uint_t)pri < X_CALL_LEVELS) {
106		struct machcpu *mcpu = &CPU->cpu_m;
107
108		xce->xce_pend = mcpu->xc_pend[pri];
109		xce->xce_ack = mcpu->xc_ack[pri];
110		xce->xce_state = mcpu->xc_state[pri];
111		xce->xce_retval = mcpu->xc_retval[pri];
112		xce->xce_func = (uintptr_t)xc_mboxes[pri].func;
113	}
114}
115#endif
116
117#define	CAPTURE_CPU_ARG	~0UL
118
119/*
120 * X-call interrupt service routine.
121 *
122 * arg == X_CALL_MEDPRI	-  capture cpus.
123 *
124 * We're protected against changing CPUs by being a high-priority interrupt.
125 */
126/*ARGSUSED*/
127uint_t
128xc_serv(caddr_t arg1, caddr_t arg2)
129{
130	int op;
131	int pri = (int)(uintptr_t)arg1;
132	struct cpu *cpup = CPU;
133	xc_arg_t arg2val;
134
135	XC_TRACE(TT_XC_SVC_BEGIN, pri, (ulong_t)arg2);
136
137	if (pri == X_CALL_MEDPRI) {
138
139		arg2val = xc_mboxes[X_CALL_MEDPRI].arg2;
140
141		if (arg2val != CAPTURE_CPU_ARG &&
142		    !CPU_IN_SET((cpuset_t)arg2val, cpup->cpu_id))
143			goto unclaimed;
144
145		ASSERT(arg2val == CAPTURE_CPU_ARG);
146
147		if (cpup->cpu_m.xc_pend[pri] == 0)
148			goto unclaimed;
149
150		cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0;
151		cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1;
152
153		for (;;) {
154			if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) ||
155			    (cpup->cpu_m.xc_pend[X_CALL_MEDPRI]))
156				break;
157			SMT_PAUSE();
158		}
159		XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED);
160		return (DDI_INTR_CLAIMED);
161	}
162
163	if (cpup->cpu_m.xc_pend[pri] == 0)
164		goto unclaimed;
165
166	cpup->cpu_m.xc_pend[pri] = 0;
167	op = cpup->cpu_m.xc_state[pri];
168
169	/*
170	 * Don't invoke a null function.
171	 */
172	if (xc_mboxes[pri].func != NULL)
173		cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func)
174		    (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2,
175		    xc_mboxes[pri].arg3);
176	else
177		cpup->cpu_m.xc_retval[pri] = 0;
178
179	/*
180	 * Acknowledge that we have completed the x-call operation.
181	 */
182	cpup->cpu_m.xc_ack[pri] = 1;
183
184	if (op != XC_CALL_OP) {
185		/*
186		 * for (op == XC_SYNC_OP)
187		 * Wait for the initiator of the x-call to indicate
188		 * that all CPUs involved can proceed.
189		 */
190		while (cpup->cpu_m.xc_wait[pri])
191			SMT_PAUSE();
192
193		while (cpup->cpu_m.xc_state[pri] != XC_DONE)
194			SMT_PAUSE();
195
196		/*
197		 * Acknowledge that we have received the directive to continue.
198		 */
199		ASSERT(cpup->cpu_m.xc_ack[pri] == 0);
200		cpup->cpu_m.xc_ack[pri] = 1;
201	}
202
203	XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED);
204	return (DDI_INTR_CLAIMED);
205
206unclaimed:
207	XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_UNCLAIMED);
208	return (DDI_INTR_UNCLAIMED);
209}
210
211
212/*
213 * xc_do_call:
214 */
215static void
216xc_do_call(
217	xc_arg_t arg1,
218	xc_arg_t arg2,
219	xc_arg_t arg3,
220	int pri,
221	cpuset_t set,
222	xc_func_t func,
223	int sync)
224{
225	/*
226	 * If the pri indicates a low priority lock (below LOCK_LEVEL),
227	 * we must disable preemption to avoid migrating to another CPU
228	 * during the call.
229	 */
230	if (pri == X_CALL_LOPRI) {
231		kpreempt_disable();
232	} else {
233		pri = X_CALL_HIPRI;
234	}
235
236	/* always grab highest mutex to avoid deadlock */
237	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
238	xc_common(func, arg1, arg2, arg3, pri, set, sync);
239	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
240	if (pri == X_CALL_LOPRI)
241		kpreempt_enable();
242}
243
244
245/*
246 * xc_call: call specified function on all processors
247 * remotes may continue after service
248 * we wait here until everybody has completed.
249 */
250void
251xc_call(
252	xc_arg_t arg1,
253	xc_arg_t arg2,
254	xc_arg_t arg3,
255	int pri,
256	cpuset_t set,
257	xc_func_t func)
258{
259	xc_do_call(arg1, arg2, arg3, pri, set, func, 0);
260}
261
262/*
263 * xc_sync: call specified function on all processors
264 * after doing work, each remote waits until we let
265 * it continue; send the contiunue after everyone has
266 * informed us that they are done.
267 */
268void
269xc_sync(
270	xc_arg_t arg1,
271	xc_arg_t arg2,
272	xc_arg_t arg3,
273	int pri,
274	cpuset_t set,
275	xc_func_t func)
276{
277	xc_do_call(arg1, arg2, arg3, pri, set, func, 1);
278}
279
280
281/*
282 * The routines xc_capture_cpus and xc_release_cpus
283 * can be used in place of xc_sync in order to implement a critical
284 * code section where all CPUs in the system can be controlled.
285 * xc_capture_cpus is used to start the critical code section, and
286 * xc_release_cpus is used to end the critical code section.
287 */
288
289/*
290 * Capture the CPUs specified in order to start a x-call session,
291 * and/or to begin a critical section.
292 */
293void
294xc_capture_cpus(cpuset_t set)
295{
296	int cix;
297	int lcx;
298	struct cpu *cpup;
299	int	i;
300	cpuset_t *cpus;
301	cpuset_t c;
302
303	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
304
305	/*
306	 * Prevent deadlocks where we take an interrupt and are waiting
307	 * for a mutex owned by one of the CPUs that is captured for
308	 * the x-call, while that CPU is waiting for some x-call signal
309	 * to be set by us.
310	 *
311	 * This mutex also prevents preemption, since it raises SPL above
312	 * LOCK_LEVEL (it is a spin-type driver mutex).
313	 */
314	/* always grab highest mutex to avoid deadlock */
315	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
316	lcx = CPU->cpu_id;	/* now we're safe */
317
318	ASSERT(CPU->cpu_flags & CPU_READY);
319
320	/*
321	 * Wait for all cpus
322	 */
323	cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2;
324	if (CPU_IN_SET(*cpus, CPU->cpu_id))
325		CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id);
326	for (;;) {
327		c = *(volatile cpuset_t *)cpus;
328		CPUSET_AND(c, cpu_ready_set);
329		if (CPUSET_ISNULL(c))
330			break;
331		SMT_PAUSE();
332	}
333
334	/*
335	 * Store the set of CPUs involved in the x-call session, so that
336	 * xc_release_cpus will know what CPUs to act upon.
337	 */
338	xc_mboxes[X_CALL_MEDPRI].set = set;
339	xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG;
340
341	/*
342	 * Now capture each CPU in the set and cause it to go into a
343	 * holding pattern.
344	 */
345	i = 0;
346	for (cix = 0; cix < NCPU; cix++) {
347		if ((cpup = cpu[cix]) == NULL ||
348		    (cpup->cpu_flags & CPU_READY) == 0) {
349			/*
350			 * In case CPU wasn't ready, but becomes ready later,
351			 * take the CPU out of the set now.
352			 */
353			CPUSET_DEL(set, cix);
354			continue;
355		}
356		if (cix != lcx && CPU_IN_SET(set, cix)) {
357			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
358			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD;
359			cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1;
360			XC_TRACE(TT_XC_CAPTURE, X_CALL_MEDPRI, cix);
361			send_dirint(cix, XC_MED_PIL);
362		}
363		i++;
364		if (i >= ncpus)
365			break;
366	}
367
368	/*
369	 * Wait here until all remote calls to acknowledge.
370	 */
371	i = 0;
372	for (cix = 0; cix < NCPU; cix++) {
373		if (lcx != cix && CPU_IN_SET(set, cix)) {
374			cpup = cpu[cix];
375			while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0)
376				SMT_PAUSE();
377			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
378		}
379		i++;
380		if (i >= ncpus)
381			break;
382	}
383
384}
385
386/*
387 * Release the CPUs captured by xc_capture_cpus, thus terminating the
388 * x-call session and exiting the critical section.
389 */
390void
391xc_release_cpus(void)
392{
393	int cix;
394	int lcx = (int)(CPU->cpu_id);
395	cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set;
396	struct cpu *cpup;
397	int	i;
398
399	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
400
401	/*
402	 * Allow each CPU to exit its holding pattern.
403	 */
404	i = 0;
405	for (cix = 0; cix < NCPU; cix++) {
406		if ((cpup = cpu[cix]) == NULL)
407			continue;
408		if ((cpup->cpu_flags & CPU_READY) &&
409		    (cix != lcx) && CPU_IN_SET(set, cix)) {
410			/*
411			 * Clear xc_ack since we will be waiting for it
412			 * to be set again after we set XC_DONE.
413			 */
414			XC_TRACE(TT_XC_RELEASE, X_CALL_MEDPRI, cix);
415			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE;
416		}
417		i++;
418		if (i >= ncpus)
419			break;
420	}
421
422	xc_mboxes[X_CALL_MEDPRI].arg2 = 0;
423	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
424}
425
426/*
427 * Common code to call a specified function on a set of processors.
428 * sync specifies what kind of waiting is done.
429 *	-1 - no waiting, don't release remotes
430 *	0 - no waiting, release remotes immediately
431 *	1 - run service locally w/o waiting for remotes.
432 */
433static void
434xc_common(
435	xc_func_t func,
436	xc_arg_t arg1,
437	xc_arg_t arg2,
438	xc_arg_t arg3,
439	int pri,
440	cpuset_t set,
441	int sync)
442{
443	int cix;
444	int lcx = (int)(CPU->cpu_id);
445	struct cpu *cpup;
446
447	ASSERT(panicstr == NULL);
448
449	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
450	ASSERT(CPU->cpu_flags & CPU_READY);
451
452	/*
453	 * Set up the service definition mailbox.
454	 */
455	xc_mboxes[pri].func = func;
456	xc_mboxes[pri].arg1 = arg1;
457	xc_mboxes[pri].arg2 = arg2;
458	xc_mboxes[pri].arg3 = arg3;
459
460	/*
461	 * Request service on all remote processors.
462	 */
463	for (cix = 0; cix < NCPU; cix++) {
464		if ((cpup = cpu[cix]) == NULL ||
465		    (cpup->cpu_flags & CPU_READY) == 0) {
466			/*
467			 * In case the non-local CPU is not ready but becomes
468			 * ready later, take it out of the set now. The local
469			 * CPU needs to remain in the set to complete the
470			 * requested function.
471			 */
472			if (cix != lcx)
473				CPUSET_DEL(set, cix);
474		} else if (cix != lcx && CPU_IN_SET(set, cix)) {
475			CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
476			cpup->cpu_m.xc_ack[pri] = 0;
477			cpup->cpu_m.xc_wait[pri] = sync;
478			if (sync > 0)
479				cpup->cpu_m.xc_state[pri] = XC_SYNC_OP;
480			else
481				cpup->cpu_m.xc_state[pri] = XC_CALL_OP;
482			cpup->cpu_m.xc_pend[pri] = 1;
483			XC_TRACE(TT_XC_START, pri, cix);
484			send_dirint(cix, xc_xlat_xcptoipl[pri]);
485		}
486	}
487
488	/*
489	 * Run service locally.
490	 */
491	if (CPU_IN_SET(set, lcx) && func != NULL) {
492		XC_TRACE(TT_XC_START, pri, CPU->cpu_id);
493		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
494	}
495
496	if (sync == -1)
497		return;
498
499	/*
500	 * Wait here until all remote calls acknowledge.
501	 */
502	for (cix = 0; cix < NCPU; cix++) {
503		if (lcx != cix && CPU_IN_SET(set, cix)) {
504			cpup = cpu[cix];
505			while (cpup->cpu_m.xc_ack[pri] == 0)
506				SMT_PAUSE();
507			XC_TRACE(TT_XC_WAIT, pri, cix);
508			cpup->cpu_m.xc_ack[pri] = 0;
509		}
510	}
511
512	if (sync == 0)
513		return;
514
515	/*
516	 * Release any waiting CPUs
517	 */
518	for (cix = 0; cix < NCPU; cix++) {
519		if (lcx != cix && CPU_IN_SET(set, cix)) {
520			cpup = cpu[cix];
521			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
522				cpup->cpu_m.xc_wait[pri] = 0;
523				cpup->cpu_m.xc_state[pri] = XC_DONE;
524			}
525		}
526	}
527
528	/*
529	 * Wait for all CPUs to acknowledge completion before we continue.
530	 * Without this check it's possible (on a VM or hyper-threaded CPUs
531	 * or in the presence of Service Management Interrupts which can all
532	 * cause delays) for the remote processor to still be waiting by
533	 * the time xc_common() is next invoked with the sync flag set
534	 * resulting in a deadlock.
535	 */
536	for (cix = 0; cix < NCPU; cix++) {
537		if (lcx != cix && CPU_IN_SET(set, cix)) {
538			cpup = cpu[cix];
539			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
540				while (cpup->cpu_m.xc_ack[pri] == 0)
541					SMT_PAUSE();
542				XC_TRACE(TT_XC_ACK, pri, cix);
543				cpup->cpu_m.xc_ack[pri] = 0;
544			}
545		}
546	}
547}
548
549/*
550 * xc_trycall: attempt to call specified function on all processors
551 * remotes may wait for a long time
552 * we continue immediately
553 */
554void
555xc_trycall(
556	xc_arg_t arg1,
557	xc_arg_t arg2,
558	xc_arg_t arg3,
559	cpuset_t set,
560	xc_func_t func)
561{
562	int		save_kernel_preemption;
563	extern int	IGNORE_KERNEL_PREEMPTION;
564
565	/*
566	 * If we can grab the mutex, we'll do the cross-call.  If not -- if
567	 * someone else is already doing a cross-call -- we won't.
568	 */
569
570	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
571	IGNORE_KERNEL_PREEMPTION = 1;
572	if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) {
573		xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1);
574		mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
575	}
576	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
577}
578
579/*
580 * Used by the debugger to cross-call the other CPUs, thus causing them to
581 * enter the debugger.  We can't hold locks, so we spin on the cross-call
582 * lock until we get it.  When we get it, we send the cross-call, and assume
583 * that we successfully stopped the other CPUs.
584 */
585void
586kdi_xc_others(int this_cpu, void (*func)(void))
587{
588	extern int	IGNORE_KERNEL_PREEMPTION;
589	int save_kernel_preemption;
590	mutex_impl_t *lp;
591	cpuset_t set;
592	int x;
593
594	if (!xc_initialized)
595		return;
596
597	CPUSET_ALL_BUT(set, this_cpu);
598
599	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
600	IGNORE_KERNEL_PREEMPTION = 1;
601
602	lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI];
603	for (x = 0; x < 0x400000; x++) {
604		if (lock_spin_try(&lp->m_spin.m_spinlock)) {
605			xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI,
606			    set, -1);
607			lp->m_spin.m_spinlock = 0; /* XXX */
608			break;
609		}
610		(void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL);
611	}
612	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
613}
614