x_call.c revision 6336:4eaf084434c9
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * Facilities for cross-processor subroutine calls using "mailbox" interrupts.
30 *
31 */
32
33#include <sys/types.h>
34#include <sys/param.h>
35#include <sys/t_lock.h>
36#include <sys/thread.h>
37#include <sys/cpuvar.h>
38#include <sys/x_call.h>
39#include <sys/cpu.h>
40#include <sys/psw.h>
41#include <sys/sunddi.h>
42#include <sys/debug.h>
43#include <sys/systm.h>
44#include <sys/archsystm.h>
45#include <sys/machsystm.h>
46#include <sys/mutex_impl.h>
47#include <sys/traptrace.h>
48
49
50static struct	xc_mbox xc_mboxes[X_CALL_LEVELS];
51static kmutex_t xc_mbox_lock[X_CALL_LEVELS];
52static uint_t 	xc_xlat_xcptoipl[X_CALL_LEVELS] = {
53	XC_LO_PIL,
54	XC_MED_PIL,
55	XC_HI_PIL
56};
57
58static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t,
59    int, cpuset_t, int);
60
61static int	xc_initialized = 0;
62
63void
64xc_init()
65{
66	/*
67	 * By making these mutexes type MUTEX_DRIVER, the ones below
68	 * LOCK_LEVEL will be implemented as adaptive mutexes, and the
69	 * ones above LOCK_LEVEL will be spin mutexes.
70	 */
71	mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER,
72	    (void *)ipltospl(XC_LO_PIL));
73	mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER,
74	    (void *)ipltospl(XC_MED_PIL));
75	mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER,
76	    (void *)ipltospl(XC_HI_PIL));
77
78	xc_initialized = 1;
79}
80
81#if defined(TRAPTRACE)
82
83/*
84 * When xc_traptrace is on, put x-call records into the trap trace buffer.
85 */
86int xc_traptrace;
87
88void
89xc_make_trap_trace_entry(uint8_t marker, int pri, ulong_t arg)
90{
91	trap_trace_rec_t *ttr;
92	struct _xc_entry *xce;
93
94	if (xc_traptrace == 0)
95		return;
96
97	ttr = trap_trace_get_traceptr(TT_XCALL,
98	    (ulong_t)caller(), (ulong_t)getfp());
99	xce = &(ttr->ttr_info.xc_entry);
100
101	xce->xce_marker = marker;
102	xce->xce_pri = pri;
103	xce->xce_arg = arg;
104
105	if ((uint_t)pri < X_CALL_LEVELS) {
106		struct machcpu *mcpu = &CPU->cpu_m;
107
108		xce->xce_pend = mcpu->xc_pend[pri];
109		xce->xce_ack = mcpu->xc_ack[pri];
110		xce->xce_state = mcpu->xc_state[pri];
111		xce->xce_retval = mcpu->xc_retval[pri];
112		xce->xce_func = (uintptr_t)xc_mboxes[pri].func;
113	}
114}
115#endif
116
117#define	CAPTURE_CPU_ARG	~0UL
118
119/*
120 * X-call interrupt service routine.
121 *
122 * arg == X_CALL_MEDPRI	-  capture cpus.
123 *
124 * We're protected against changing CPUs by being a high-priority interrupt.
125 */
126/*ARGSUSED*/
127uint_t
128xc_serv(caddr_t arg1, caddr_t arg2)
129{
130	int op;
131	int pri = (int)(uintptr_t)arg1;
132	struct cpu *cpup = CPU;
133	xc_arg_t arg2val;
134
135	XC_TRACE(TT_XC_SVC_BEGIN, pri, (ulong_t)arg2);
136
137	if (pri == X_CALL_MEDPRI) {
138
139		arg2val = xc_mboxes[X_CALL_MEDPRI].arg2;
140
141		if (arg2val != CAPTURE_CPU_ARG ||
142		    !CPU_IN_SET(xc_mboxes[X_CALL_MEDPRI].set, cpup->cpu_id))
143			goto unclaimed;
144
145		ASSERT(arg2val == CAPTURE_CPU_ARG);
146
147		if (cpup->cpu_m.xc_pend[pri] == 0)
148			goto unclaimed;
149
150		cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0;
151		cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1;
152
153		for (;;) {
154			if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) ||
155			    (cpup->cpu_m.xc_pend[X_CALL_MEDPRI]))
156				break;
157			SMT_PAUSE();
158		}
159		CPUSET_DEL(xc_mboxes[X_CALL_MEDPRI].set, cpup->cpu_id);
160		XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED);
161		return (DDI_INTR_CLAIMED);
162	}
163
164	if (cpup->cpu_m.xc_pend[pri] == 0)
165		goto unclaimed;
166
167	cpup->cpu_m.xc_pend[pri] = 0;
168	op = cpup->cpu_m.xc_state[pri];
169
170	/*
171	 * Don't invoke a null function.
172	 */
173	if (xc_mboxes[pri].func != NULL) {
174		cpup->cpu_m.xc_retval[pri] =
175		    (*xc_mboxes[pri].func)(xc_mboxes[pri].arg1,
176		    xc_mboxes[pri].arg2, xc_mboxes[pri].arg3);
177	} else
178		cpup->cpu_m.xc_retval[pri] = 0;
179
180	/*
181	 * Acknowledge that we have completed the x-call operation.
182	 */
183	cpup->cpu_m.xc_ack[pri] = 1;
184
185	if (op != XC_CALL_OP) {
186		/*
187		 * for (op == XC_SYNC_OP)
188		 * Wait for the initiator of the x-call to indicate
189		 * that all CPUs involved can proceed.
190		 */
191		while (cpup->cpu_m.xc_wait[pri])
192			SMT_PAUSE();
193
194		while (cpup->cpu_m.xc_state[pri] != XC_DONE)
195			SMT_PAUSE();
196
197		/*
198		 * Acknowledge that we have received the directive to continue.
199		 */
200		ASSERT(cpup->cpu_m.xc_ack[pri] == 0);
201		cpup->cpu_m.xc_ack[pri] = 1;
202	}
203
204	XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED);
205	return (DDI_INTR_CLAIMED);
206
207unclaimed:
208	XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_UNCLAIMED);
209	return (DDI_INTR_UNCLAIMED);
210}
211
212
213/*
214 * xc_do_call:
215 */
216static void
217xc_do_call(
218	xc_arg_t arg1,
219	xc_arg_t arg2,
220	xc_arg_t arg3,
221	int pri,
222	cpuset_t set,
223	xc_func_t func,
224	int sync)
225{
226	/*
227	 * If the pri indicates a low priority lock (below LOCK_LEVEL),
228	 * we must disable preemption to avoid migrating to another CPU
229	 * during the call.
230	 */
231	if (pri == X_CALL_LOPRI) {
232		kpreempt_disable();
233	} else {
234		pri = X_CALL_HIPRI;
235	}
236
237	/* always grab highest mutex to avoid deadlock */
238	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
239	xc_common(func, arg1, arg2, arg3, pri, set, sync);
240	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
241	if (pri == X_CALL_LOPRI)
242		kpreempt_enable();
243}
244
245
246/*
247 * xc_call: call specified function on all processors
248 * remotes may continue after service
249 * we wait here until everybody has completed.
250 */
251void
252xc_call(
253	xc_arg_t arg1,
254	xc_arg_t arg2,
255	xc_arg_t arg3,
256	int pri,
257	cpuset_t set,
258	xc_func_t func)
259{
260	xc_do_call(arg1, arg2, arg3, pri, set, func, 0);
261}
262
263/*
264 * xc_sync: call specified function on all processors
265 * after doing work, each remote waits until we let
266 * it continue; send the contiunue after everyone has
267 * informed us that they are done.
268 */
269void
270xc_sync(
271	xc_arg_t arg1,
272	xc_arg_t arg2,
273	xc_arg_t arg3,
274	int pri,
275	cpuset_t set,
276	xc_func_t func)
277{
278	xc_do_call(arg1, arg2, arg3, pri, set, func, 1);
279}
280
281/*
282 * The routines xc_capture_cpus and xc_release_cpus
283 * can be used in place of xc_sync in order to implement a critical
284 * code section where all CPUs in the system can be controlled.
285 * xc_capture_cpus is used to start the critical code section, and
286 * xc_release_cpus is used to end the critical code section.
287 */
288
289/*
290 * Capture the CPUs specified in order to start a x-call session,
291 * and/or to begin a critical section.
292 */
293void
294xc_capture_cpus(cpuset_t set)
295{
296	int cix;
297	int lcx;
298	struct cpu *cpup;
299	int	i;
300
301	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
302
303	/*
304	 * Prevent deadlocks where we take an interrupt and are waiting
305	 * for a mutex owned by one of the CPUs that is captured for
306	 * the x-call, while that CPU is waiting for some x-call signal
307	 * to be set by us.
308	 *
309	 * This mutex also prevents preemption, since it raises SPL above
310	 * LOCK_LEVEL (it is a spin-type driver mutex).
311	 */
312	/* always grab highest mutex to avoid deadlock */
313	mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]);
314	lcx = CPU->cpu_id;	/* now we're safe */
315
316	ASSERT(CPU->cpu_flags & CPU_READY);
317
318	/*
319	 * Wait for all cpus.
320	 */
321
322	/*
323	 * First remove ourself.
324	 */
325	if (CPU_IN_SET(xc_mboxes[X_CALL_MEDPRI].set, CPU->cpu_id))
326		CPUSET_ATOMIC_DEL(xc_mboxes[X_CALL_MEDPRI].set, CPU->cpu_id);
327	/*
328	 * We must wait for all cpus to clear their bit from
329	 * xc_mboxes[X_CALL_MEDPRI].set before we write to this set.
330	 */
331	for (;;) {
332		CPUSET_AND(xc_mboxes[X_CALL_MEDPRI].set, cpu_ready_set);
333		if (CPUSET_ISNULL(xc_mboxes[X_CALL_MEDPRI].set))
334			break;
335		SMT_PAUSE();
336	}
337
338	/*
339	 * Store the set of CPUs involved in the x-call session, so that
340	 * xc_release_cpus will know what CPUs to act upon.
341	 */
342	xc_mboxes[X_CALL_MEDPRI].set = set;
343	xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG;
344
345	/*
346	 * Now capture each CPU in the set and cause it to go into a
347	 * holding pattern.
348	 */
349	i = 0;
350	for (cix = 0; cix < NCPU; cix++) {
351		if ((cpup = cpu[cix]) == NULL ||
352		    (cpup->cpu_flags & CPU_READY) == 0) {
353			/*
354			 * In case CPU wasn't ready, but becomes ready later,
355			 * take the CPU out of the set now.
356			 */
357			CPUSET_DEL(set, cix);
358			continue;
359		}
360		if (cix != lcx && CPU_IN_SET(set, cix)) {
361			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
362			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD;
363			cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1;
364			XC_TRACE(TT_XC_CAPTURE, X_CALL_MEDPRI, cix);
365			send_dirint(cix, XC_MED_PIL);
366		}
367		i++;
368		if (i >= ncpus)
369			break;
370	}
371
372	/*
373	 * Wait here until all remote calls to acknowledge.
374	 */
375	i = 0;
376	for (cix = 0; cix < NCPU; cix++) {
377		if (lcx != cix && CPU_IN_SET(set, cix)) {
378			cpup = cpu[cix];
379			while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0)
380				SMT_PAUSE();
381			cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0;
382		}
383		i++;
384		if (i >= ncpus)
385			break;
386	}
387
388}
389
390/*
391 * Release the CPUs captured by xc_capture_cpus, thus terminating the
392 * x-call session and exiting the critical section.
393 */
394void
395xc_release_cpus(void)
396{
397	int cix;
398	int lcx = (int)(CPU->cpu_id);
399	cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set;
400	struct cpu *cpup;
401	int	i;
402
403	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
404
405	/*
406	 * Allow each CPU to exit its holding pattern.
407	 */
408	i = 0;
409	for (cix = 0; cix < NCPU; cix++) {
410		if ((cpup = cpu[cix]) == NULL)
411			continue;
412		if ((cpup->cpu_flags & CPU_READY) &&
413		    (cix != lcx) && CPU_IN_SET(set, cix)) {
414			/*
415			 * Clear xc_ack since we will be waiting for it
416			 * to be set again after we set XC_DONE.
417			 */
418			XC_TRACE(TT_XC_RELEASE, X_CALL_MEDPRI, cix);
419			cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE;
420		}
421		i++;
422		if (i >= ncpus)
423			break;
424	}
425
426	xc_mboxes[X_CALL_MEDPRI].arg2 = 0;
427	mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
428}
429
430/*
431 * Common code to call a specified function on a set of processors.
432 * sync specifies what kind of waiting is done.
433 *	-1 - no waiting, don't release remotes
434 *	0 - no waiting, release remotes immediately
435 *	1 - run service locally w/o waiting for remotes.
436 */
437static void
438xc_common(
439	xc_func_t func,
440	xc_arg_t arg1,
441	xc_arg_t arg2,
442	xc_arg_t arg3,
443	int pri,
444	cpuset_t set,
445	int sync)
446{
447	int cix;
448	int do_local = 0;
449	struct cpu *cpup;
450	cpuset_t tset;
451	int last_cpu = 0;
452
453	ASSERT(panicstr == NULL);
454
455	ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI]));
456	ASSERT(CPU->cpu_flags & CPU_READY);
457
458	/*
459	 * Set up the service definition mailbox.
460	 */
461	xc_mboxes[pri].func = func;
462	xc_mboxes[pri].arg1 = arg1;
463	xc_mboxes[pri].arg2 = arg2;
464	xc_mboxes[pri].arg3 = arg3;
465
466	if (CPU_IN_SET(set, CPU->cpu_id)) {
467		do_local = 1;
468		CPUSET_DEL(set, CPU->cpu_id);
469	}
470
471	/*
472	 * Request service on all remote processors.
473	 */
474	tset = set;
475	for (cix = 0; cix < max_ncpus; cix++) {
476		if (!CPU_IN_SET(tset, cix))
477			continue;
478
479		if ((cpup = cpu[cix]) == NULL ||
480		    (cpup->cpu_flags & CPU_READY) == 0) {
481			/*
482			 * In case the CPU is not ready but becomes
483			 * ready later, take it out of the set now.
484			 */
485			CPUSET_DEL(set, cix);
486		} else {
487			CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
488			cpup->cpu_m.xc_ack[pri] = 0;
489			cpup->cpu_m.xc_wait[pri] = sync;
490			if (sync > 0)
491				cpup->cpu_m.xc_state[pri] = XC_SYNC_OP;
492			else
493				cpup->cpu_m.xc_state[pri] = XC_CALL_OP;
494			cpup->cpu_m.xc_pend[pri] = 1;
495			XC_TRACE(TT_XC_START, pri, cix);
496			send_dirint(cix, xc_xlat_xcptoipl[pri]);
497			last_cpu = cix;
498		}
499
500		CPUSET_DEL(tset, cix);
501		if (CPUSET_ISNULL(tset))
502			break;
503	}
504
505	/*
506	 * Run service locally
507	 */
508	if (do_local && func != NULL) {
509		XC_TRACE(TT_XC_START, pri, CPU->cpu_id);
510		CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3);
511	}
512
513	if (sync == -1)
514		return;
515
516	/*
517	 * Wait here until all remote calls acknowledge.
518	 */
519	for (cix = 0; cix <= last_cpu; cix++) {
520		if (CPU_IN_SET(set, cix)) {
521			cpup = cpu[cix];
522			while (cpup->cpu_m.xc_ack[pri] == 0)
523				SMT_PAUSE();
524			XC_TRACE(TT_XC_WAIT, pri, cix);
525			cpup->cpu_m.xc_ack[pri] = 0;
526		}
527	}
528
529	if (sync == 0)
530		return;
531
532	/*
533	 * Release any waiting CPUs
534	 */
535	for (cix = 0; cix <= last_cpu; cix++) {
536		if (CPU_IN_SET(set, cix)) {
537			cpup = cpu[cix];
538			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
539				cpup->cpu_m.xc_wait[pri] = 0;
540				cpup->cpu_m.xc_state[pri] = XC_DONE;
541			}
542		}
543	}
544
545	/*
546	 * Wait for all CPUs to acknowledge completion before we continue.
547	 * Without this check it's possible (on a VM or hyper-threaded CPUs
548	 * or in the presence of Service Management Interrupts which can all
549	 * cause delays) for the remote processor to still be waiting by
550	 * the time xc_common() is next invoked with the sync flag set
551	 * resulting in a deadlock.
552	 */
553	for (cix = 0; cix <= last_cpu; cix++) {
554		if (CPU_IN_SET(set, cix)) {
555			cpup = cpu[cix];
556			if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) {
557				while (cpup->cpu_m.xc_ack[pri] == 0)
558					SMT_PAUSE();
559				XC_TRACE(TT_XC_ACK, pri, cix);
560				cpup->cpu_m.xc_ack[pri] = 0;
561			}
562		}
563	}
564}
565
566/*
567 * xc_trycall: attempt to call specified function on all processors
568 * remotes may wait for a long time
569 * we continue immediately
570 */
571void
572xc_trycall(
573	xc_arg_t arg1,
574	xc_arg_t arg2,
575	xc_arg_t arg3,
576	cpuset_t set,
577	xc_func_t func)
578{
579	int		save_kernel_preemption;
580	extern int	IGNORE_KERNEL_PREEMPTION;
581
582	/*
583	 * If we can grab the mutex, we'll do the cross-call.  If not -- if
584	 * someone else is already doing a cross-call -- we won't.
585	 */
586
587	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
588	IGNORE_KERNEL_PREEMPTION = 1;
589	if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) {
590		xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1);
591		mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]);
592	}
593	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
594}
595
596/*
597 * Used by the debugger to cross-call the other CPUs, thus causing them to
598 * enter the debugger.  We can't hold locks, so we spin on the cross-call
599 * lock until we get it.  When we get it, we send the cross-call, and assume
600 * that we successfully stopped the other CPUs.
601 */
602void
603kdi_xc_others(int this_cpu, void (*func)(void))
604{
605	extern int	IGNORE_KERNEL_PREEMPTION;
606	int save_kernel_preemption;
607	mutex_impl_t *lp;
608	cpuset_t set;
609	int x;
610
611	if (!xc_initialized)
612		return;
613
614	CPUSET_ALL_BUT(set, this_cpu);
615
616	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
617	IGNORE_KERNEL_PREEMPTION = 1;
618
619	lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI];
620	for (x = 0; x < 0x400000; x++) {
621		if (lock_spin_try(&lp->m_spin.m_spinlock)) {
622			xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI,
623			    set, -1);
624			lp->m_spin.m_spinlock = 0; /* XXX */
625			break;
626		}
627		SMT_PAUSE();
628	}
629	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
630}
631