1/*
2 * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11/*! Functionality for symetrical multi-processors */
12
13
14#include <smp.h>
15
16#include <stdlib.h>
17#include <string.h>
18
19#include <arch/cpu.h>
20#include <arch/debug.h>
21#include <arch/int.h>
22#include <arch/smp.h>
23#include <boot/kernel_args.h>
24#include <cpu.h>
25#include <generic_syscall.h>
26#include <int.h>
27#include <spinlock_contention.h>
28#include <thread.h>
29#if DEBUG_SPINLOCK_LATENCIES
30#	include <safemode.h>
31#endif
32
33#include "kernel_debug_config.h"
34
35
36//#define TRACE_SMP
37#ifdef TRACE_SMP
38#	define TRACE(x) dprintf x
39#else
40#	define TRACE(x) ;
41#endif
42
43
44#undef try_acquire_spinlock
45#undef acquire_spinlock
46#undef release_spinlock
47
48
49#define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
50
51// These macros define the number of unsuccessful iterations in
52// acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
53// panic(), assuming a deadlock.
54#define SPINLOCK_DEADLOCK_COUNT				100000000
55#define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
56
57
58struct smp_msg {
59	struct smp_msg	*next;
60	int32			message;
61	addr_t			data;
62	addr_t			data2;
63	addr_t			data3;
64	void			*data_ptr;
65	uint32			flags;
66	int32			ref_count;
67	volatile bool	done;
68	uint32			proc_bitmap;
69};
70
71enum mailbox_source {
72	MAILBOX_LOCAL,
73	MAILBOX_BCAST,
74};
75
76static vint32 sBootCPUSpin = 0;
77
78static vint32 sEarlyCPUCall = 0;
79static void (*sEarlyCPUCallFunction)(void*, int);
80void* sEarlyCPUCallCookie;
81
82static struct smp_msg* sFreeMessages = NULL;
83static volatile int sFreeMessageCount = 0;
84static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
85
86static struct smp_msg* sCPUMessages[SMP_MAX_CPUS] = { NULL, };
87static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
88
89static struct smp_msg* sBroadcastMessages = NULL;
90static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
91
92static bool sICIEnabled = false;
93static int32 sNumCPUs = 1;
94
95static int32 process_pending_ici(int32 currentCPU);
96
97
98#if DEBUG_SPINLOCKS
99#define NUM_LAST_CALLERS	32
100
101static struct {
102	void		*caller;
103	spinlock	*lock;
104} sLastCaller[NUM_LAST_CALLERS];
105
106static vint32 sLastIndex = 0;
107	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
108	// as index into sLastCaller. Note, that it has to be casted to uint32
109	// before applying the modulo operation, since otherwise after overflowing
110	// that would yield negative indices.
111
112
113static void
114push_lock_caller(void* caller, spinlock* lock)
115{
116	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
117
118	sLastCaller[index].caller = caller;
119	sLastCaller[index].lock = lock;
120}
121
122
123static void*
124find_lock_caller(spinlock* lock)
125{
126	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
127
128	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
129		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
130		if (sLastCaller[index].lock == lock)
131			return sLastCaller[index].caller;
132	}
133
134	return NULL;
135}
136
137
138int
139dump_spinlock(int argc, char** argv)
140{
141	if (argc != 2) {
142		print_debugger_command_usage(argv[0]);
143		return 0;
144	}
145
146	uint64 address;
147	if (!evaluate_debug_expression(argv[1], &address, false))
148		return 0;
149
150	spinlock* lock = (spinlock*)(addr_t)address;
151	kprintf("spinlock %p:\n", lock);
152	bool locked = B_SPINLOCK_IS_LOCKED(lock);
153	if (locked) {
154		kprintf("  locked from %p\n", find_lock_caller(lock));
155	} else
156		kprintf("  not locked\n");
157
158	return 0;
159}
160
161
162#endif	// DEBUG_SPINLOCKS
163
164
165#if DEBUG_SPINLOCK_LATENCIES
166
167
168#define NUM_LATENCY_LOCKS	4
169#define DEBUG_LATENCY		200
170
171
172static struct {
173	spinlock	*lock;
174	bigtime_t	timestamp;
175} sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
176
177static int32 sLatencyIndex[B_MAX_CPU_COUNT];
178static bool sEnableLatencyCheck;
179
180
181static void
182push_latency(spinlock* lock)
183{
184	if (!sEnableLatencyCheck)
185		return;
186
187	int32 cpu = smp_get_current_cpu();
188	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
189
190	sLatency[cpu][index].lock = lock;
191	sLatency[cpu][index].timestamp = system_time();
192}
193
194
195static void
196test_latency(spinlock* lock)
197{
198	if (!sEnableLatencyCheck)
199		return;
200
201	int32 cpu = smp_get_current_cpu();
202
203	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
204		if (sLatency[cpu][i].lock == lock) {
205			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
206			if (diff > DEBUG_LATENCY && diff < 500000) {
207				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
208					lock, diff, DEBUG_LATENCY);
209			}
210
211			sLatency[cpu][i].lock = NULL;
212		}
213	}
214}
215
216
217#endif	// DEBUG_SPINLOCK_LATENCIES
218
219
220int
221dump_ici_messages(int argc, char** argv)
222{
223	// count broadcast messages
224	int32 count = 0;
225	int32 doneCount = 0;
226	int32 unreferencedCount = 0;
227	smp_msg* message = sBroadcastMessages;
228	while (message != NULL) {
229		count++;
230		if (message->done)
231			doneCount++;
232		if (message->ref_count <= 0)
233			unreferencedCount++;
234		message = message->next;
235	}
236
237	kprintf("ICI broadcast messages: %" B_PRId32 ", first: %p\n", count,
238		sBroadcastMessages);
239	kprintf("  done:         %" B_PRId32 "\n", doneCount);
240	kprintf("  unreferenced: %" B_PRId32 "\n", unreferencedCount);
241
242	// count per-CPU messages
243	for (int32 i = 0; i < sNumCPUs; i++) {
244		count = 0;
245		message = sCPUMessages[i];
246		while (message != NULL) {
247			count++;
248			message = message->next;
249		}
250
251		kprintf("CPU %" B_PRId32 " messages: %" B_PRId32 ", first: %p\n", i,
252			count, sCPUMessages[i]);
253	}
254
255	return 0;
256}
257
258
259int
260dump_ici_message(int argc, char** argv)
261{
262	if (argc != 2) {
263		print_debugger_command_usage(argv[0]);
264		return 0;
265	}
266
267	uint64 address;
268	if (!evaluate_debug_expression(argv[1], &address, false))
269		return 0;
270
271	smp_msg* message = (smp_msg*)(addr_t)address;
272	kprintf("ICI message %p:\n", message);
273	kprintf("  next:        %p\n", message->next);
274	kprintf("  message:     %" B_PRId32 "\n", message->message);
275	kprintf("  data:        0x%lx\n", message->data);
276	kprintf("  data2:       0x%lx\n", message->data2);
277	kprintf("  data3:       0x%lx\n", message->data3);
278	kprintf("  data_ptr:    %p\n", message->data_ptr);
279	kprintf("  flags:       %" B_PRIx32 "\n", message->flags);
280	kprintf("  ref_count:   %" B_PRIx32 "\n", message->ref_count);
281	kprintf("  done:        %s\n", message->done ? "true" : "false");
282	kprintf("  proc_bitmap: %" B_PRIx32 "\n", message->proc_bitmap);
283
284	return 0;
285}
286
287
288static inline void
289process_all_pending_ici(int32 currentCPU)
290{
291	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
292		;
293}
294
295
296bool
297try_acquire_spinlock(spinlock* lock)
298{
299#if DEBUG_SPINLOCKS
300	if (are_interrupts_enabled()) {
301		panic("try_acquire_spinlock: attempt to acquire lock %p with "
302			"interrupts enabled", lock);
303	}
304#endif
305
306#if B_DEBUG_SPINLOCK_CONTENTION
307	if (atomic_add(&lock->lock, 1) != 0)
308		return false;
309#else
310	if (atomic_or((int32*)lock, 1) != 0)
311		return false;
312
313#	if DEBUG_SPINLOCKS
314	push_lock_caller(arch_debug_get_caller(), lock);
315#	endif
316#endif
317
318	return true;
319}
320
321
322void
323acquire_spinlock(spinlock* lock)
324{
325#if DEBUG_SPINLOCKS
326	if (are_interrupts_enabled()) {
327		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
328			"enabled", lock);
329	}
330#endif
331
332	if (sNumCPUs > 1) {
333		int currentCPU = smp_get_current_cpu();
334#if B_DEBUG_SPINLOCK_CONTENTION
335		while (atomic_add(&lock->lock, 1) != 0)
336			process_all_pending_ici(currentCPU);
337#else
338		while (1) {
339			uint32 count = 0;
340			while (*lock != 0) {
341				if (++count == SPINLOCK_DEADLOCK_COUNT) {
342					panic("acquire_spinlock(): Failed to acquire spinlock %p "
343						"for a long time!", lock);
344					count = 0;
345				}
346
347				process_all_pending_ici(currentCPU);
348				PAUSE();
349			}
350			if (atomic_or((int32*)lock, 1) == 0)
351				break;
352		}
353
354#	if DEBUG_SPINLOCKS
355		push_lock_caller(arch_debug_get_caller(), lock);
356#	endif
357#endif
358	} else {
359#if DEBUG_SPINLOCKS
360		int32 oldValue;
361		oldValue = atomic_or((int32*)lock, 1);
362		if (oldValue != 0) {
363			panic("acquire_spinlock: attempt to acquire lock %p twice on "
364				"non-SMP system (last caller: %p, value %" B_PRId32 ")", lock,
365				find_lock_caller(lock), oldValue);
366		}
367
368		push_lock_caller(arch_debug_get_caller(), lock);
369#endif
370	}
371#if DEBUG_SPINLOCK_LATENCIES
372	push_latency(lock);
373#endif
374}
375
376
377static void
378acquire_spinlock_nocheck(spinlock *lock)
379{
380#if DEBUG_SPINLOCKS
381	if (are_interrupts_enabled()) {
382		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
383			"interrupts enabled", lock);
384	}
385#endif
386
387	if (sNumCPUs > 1) {
388#if B_DEBUG_SPINLOCK_CONTENTION
389		while (atomic_add(&lock->lock, 1) != 0) {
390		}
391#else
392		while (1) {
393			uint32 count = 0;
394			while (*lock != 0) {
395				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
396					panic("acquire_spinlock(): Failed to acquire spinlock %p "
397						"for a long time!", lock);
398					count = 0;
399				}
400
401				PAUSE();
402			}
403
404			if (atomic_or((int32*)lock, 1) == 0)
405				break;
406		}
407#endif
408	} else {
409#if DEBUG_SPINLOCKS
410		if (atomic_or((int32*)lock, 1) != 0) {
411			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
412				"on non-SMP system\n", lock);
413		}
414#endif
415	}
416}
417
418
419/*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
420static void
421acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
422{
423#if DEBUG_SPINLOCKS
424	if (are_interrupts_enabled()) {
425		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
426			"interrupts enabled", lock);
427	}
428#endif
429
430	if (sNumCPUs > 1) {
431#if B_DEBUG_SPINLOCK_CONTENTION
432		while (atomic_add(&lock->lock, 1) != 0)
433			process_all_pending_ici(currentCPU);
434#else
435		while (1) {
436			uint32 count = 0;
437			while (*lock != 0) {
438				if (++count == SPINLOCK_DEADLOCK_COUNT) {
439					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
440						"%p for a long time!", lock);
441					count = 0;
442				}
443
444				process_all_pending_ici(currentCPU);
445				PAUSE();
446			}
447			if (atomic_or((int32*)lock, 1) == 0)
448				break;
449		}
450
451#	if DEBUG_SPINLOCKS
452		push_lock_caller(arch_debug_get_caller(), lock);
453#	endif
454#endif
455	} else {
456#if DEBUG_SPINLOCKS
457		int32 oldValue;
458		oldValue = atomic_or((int32*)lock, 1);
459		if (oldValue != 0) {
460			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
461				"non-SMP system (last caller: %p, value %" B_PRId32 ")", lock,
462				find_lock_caller(lock), oldValue);
463		}
464
465		push_lock_caller(arch_debug_get_caller(), lock);
466#endif
467	}
468}
469
470
471void
472release_spinlock(spinlock *lock)
473{
474#if DEBUG_SPINLOCK_LATENCIES
475	test_latency(lock);
476#endif
477
478	if (sNumCPUs > 1) {
479		if (are_interrupts_enabled())
480			panic("release_spinlock: attempt to release lock %p with "
481				"interrupts enabled\n", lock);
482#if B_DEBUG_SPINLOCK_CONTENTION
483		{
484			int32 count = atomic_and(&lock->lock, 0) - 1;
485			if (count < 0) {
486				panic("release_spinlock: lock %p was already released\n", lock);
487			} else {
488				// add to the total count -- deal with carry manually
489				if ((uint32)atomic_add(&lock->count_low, count) + count
490						< (uint32)count) {
491					atomic_add(&lock->count_high, 1);
492				}
493			}
494		}
495#else
496		if (atomic_and((int32*)lock, 0) != 1)
497			panic("release_spinlock: lock %p was already released\n", lock);
498#endif
499	} else {
500#if DEBUG_SPINLOCKS
501		if (are_interrupts_enabled()) {
502			panic("release_spinlock: attempt to release lock %p with "
503				"interrupts enabled\n", lock);
504		}
505		if (atomic_and((int32*)lock, 0) != 1)
506			panic("release_spinlock: lock %p was already released\n", lock);
507#endif
508#if DEBUG_SPINLOCK_LATENCIES
509		test_latency(lock);
510#endif
511	}
512}
513
514
515/*!	Finds a free message and gets it.
516	NOTE: has side effect of disabling interrupts
517	return value is the former interrupt state
518*/
519static cpu_status
520find_free_message(struct smp_msg** msg)
521{
522	cpu_status state;
523
524	TRACE(("find_free_message: entry\n"));
525
526retry:
527	while (sFreeMessageCount <= 0) {
528		state = disable_interrupts();
529		process_all_pending_ici(smp_get_current_cpu());
530		restore_interrupts(state);
531		PAUSE();
532	}
533	state = disable_interrupts();
534	acquire_spinlock(&sFreeMessageSpinlock);
535
536	if (sFreeMessageCount <= 0) {
537		// someone grabbed one while we were getting the lock,
538		// go back to waiting for it
539		release_spinlock(&sFreeMessageSpinlock);
540		restore_interrupts(state);
541		goto retry;
542	}
543
544	*msg = sFreeMessages;
545	sFreeMessages = (*msg)->next;
546	sFreeMessageCount--;
547
548	release_spinlock(&sFreeMessageSpinlock);
549
550	TRACE(("find_free_message: returning msg %p\n", *msg));
551
552	return state;
553}
554
555
556/*!	Similar to find_free_message(), but expects the interrupts to be disabled
557	already.
558*/
559static void
560find_free_message_interrupts_disabled(int32 currentCPU,
561	struct smp_msg** _message)
562{
563	TRACE(("find_free_message_interrupts_disabled: entry\n"));
564
565	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
566	while (sFreeMessageCount <= 0) {
567		release_spinlock(&sFreeMessageSpinlock);
568		process_all_pending_ici(currentCPU);
569		PAUSE();
570		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
571	}
572
573	*_message = sFreeMessages;
574	sFreeMessages = (*_message)->next;
575	sFreeMessageCount--;
576
577	release_spinlock(&sFreeMessageSpinlock);
578
579	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
580		*_message));
581}
582
583
584static void
585return_free_message(struct smp_msg* msg)
586{
587	TRACE(("return_free_message: returning msg %p\n", msg));
588
589	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
590	msg->next = sFreeMessages;
591	sFreeMessages = msg;
592	sFreeMessageCount++;
593	release_spinlock(&sFreeMessageSpinlock);
594}
595
596
597static struct smp_msg*
598check_for_message(int currentCPU, mailbox_source& sourceMailbox)
599{
600	if (!sICIEnabled)
601		return NULL;
602
603	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
604
605	struct smp_msg* msg = sCPUMessages[currentCPU];
606	if (msg != NULL) {
607		sCPUMessages[currentCPU] = msg->next;
608		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
609		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
610		sourceMailbox = MAILBOX_LOCAL;
611	} else {
612		// try getting one from the broadcast mailbox
613
614		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
615		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
616
617		msg = sBroadcastMessages;
618		while (msg != NULL) {
619			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
620				// we have handled this one already
621				msg = msg->next;
622				continue;
623			}
624
625			// mark it so we wont try to process this one again
626			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
627			sourceMailbox = MAILBOX_BCAST;
628			break;
629		}
630		release_spinlock(&sBroadcastMessageSpinlock);
631
632		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU,
633			msg));
634	}
635	return msg;
636}
637
638
639static void
640finish_message_processing(int currentCPU, struct smp_msg* msg,
641	mailbox_source sourceMailbox)
642{
643	if (atomic_add(&msg->ref_count, -1) != 1)
644		return;
645
646	// we were the last one to decrement the ref_count
647	// it's our job to remove it from the list & possibly clean it up
648	struct smp_msg** mbox;
649	spinlock* spinlock;
650
651	// clean up the message from one of the mailboxes
652	if (sourceMailbox == MAILBOX_BCAST) {
653		mbox = &sBroadcastMessages;
654		spinlock = &sBroadcastMessageSpinlock;
655	} else {
656		mbox = &sCPUMessages[currentCPU];
657		spinlock = &sCPUMessageSpinlock[currentCPU];
658	}
659
660	acquire_spinlock_nocheck(spinlock);
661
662	TRACE(("cleaning up message %p\n", msg));
663
664	if (sourceMailbox != MAILBOX_BCAST) {
665		// local mailbox -- the message has already been removed in
666		// check_for_message()
667	} else if (msg == *mbox) {
668		*mbox = msg->next;
669	} else {
670		// we need to walk to find the message in the list.
671		// we can't use any data found when previously walking through
672		// the list, since the list may have changed. But, we are guaranteed
673		// to at least have msg in it.
674		struct smp_msg* last = NULL;
675		struct smp_msg* msg1;
676
677		msg1 = *mbox;
678		while (msg1 != NULL && msg1 != msg) {
679			last = msg1;
680			msg1 = msg1->next;
681		}
682
683		// by definition, last must be something
684		if (msg1 == msg && last != NULL)
685			last->next = msg->next;
686		else
687			panic("last == NULL or msg != msg1");
688	}
689
690	release_spinlock(spinlock);
691
692	if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
693		free(msg->data_ptr);
694
695	if ((msg->flags & SMP_MSG_FLAG_SYNC) != 0) {
696		msg->done = true;
697		// the caller cpu should now free the message
698	} else {
699		// in the !SYNC case, we get to free the message
700		return_free_message(msg);
701	}
702}
703
704
705static status_t
706process_pending_ici(int32 currentCPU)
707{
708	mailbox_source sourceMailbox;
709	struct smp_msg* msg = check_for_message(currentCPU, sourceMailbox);
710	if (msg == NULL)
711		return B_ENTRY_NOT_FOUND;
712
713	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
714
715	bool haltCPU = false;
716
717	switch (msg->message) {
718		case SMP_MSG_INVALIDATE_PAGE_RANGE:
719			arch_cpu_invalidate_TLB_range(msg->data, msg->data2);
720			break;
721		case SMP_MSG_INVALIDATE_PAGE_LIST:
722			arch_cpu_invalidate_TLB_list((addr_t*)msg->data, (int)msg->data2);
723			break;
724		case SMP_MSG_USER_INVALIDATE_PAGES:
725			arch_cpu_user_TLB_invalidate();
726			break;
727		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
728			arch_cpu_global_TLB_invalidate();
729			break;
730		case SMP_MSG_CPU_HALT:
731			haltCPU = true;
732			break;
733		case SMP_MSG_CALL_FUNCTION:
734		{
735			smp_call_func func = (smp_call_func)msg->data_ptr;
736			func(msg->data, currentCPU, msg->data2, msg->data3);
737			break;
738		}
739		case SMP_MSG_RESCHEDULE:
740		{
741			cpu_ent* cpu = thread_get_current_thread()->cpu;
742			cpu->invoke_scheduler = true;
743			cpu->invoke_scheduler_if_idle = false;
744			break;
745		}
746		case SMP_MSG_RESCHEDULE_IF_IDLE:
747		{
748			cpu_ent* cpu = thread_get_current_thread()->cpu;
749			if (!cpu->invoke_scheduler) {
750				cpu->invoke_scheduler = true;
751				cpu->invoke_scheduler_if_idle = true;
752			}
753			break;
754		}
755
756		default:
757			dprintf("smp_intercpu_int_handler: got unknown message %" B_PRId32 "\n",
758				msg->message);
759			break;
760	}
761
762	// finish dealing with this message, possibly removing it from the list
763	finish_message_processing(currentCPU, msg, sourceMailbox);
764
765	// special case for the halt message
766	if (haltCPU)
767		debug_trap_cpu_in_kdl(currentCPU, false);
768
769	return B_OK;
770}
771
772
773#if B_DEBUG_SPINLOCK_CONTENTION
774
775
776static uint64
777get_spinlock_counter(spinlock* lock)
778{
779	uint32 high;
780	uint32 low;
781	do {
782		high = (uint32)atomic_get(&lock->count_high);
783		low = (uint32)atomic_get(&lock->count_low);
784	} while (high != atomic_get(&lock->count_high));
785
786	return ((uint64)high << 32) | low;
787}
788
789
790static status_t
791spinlock_contention_syscall(const char* subsystem, uint32 function,
792	void* buffer, size_t bufferSize)
793{
794	spinlock_contention_info info;
795
796	if (function != GET_SPINLOCK_CONTENTION_INFO)
797		return B_BAD_VALUE;
798
799	if (bufferSize < sizeof(spinlock_contention_info))
800		return B_BAD_VALUE;
801
802	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
803	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
804
805	if (!IS_USER_ADDRESS(buffer)
806		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
807		return B_BAD_ADDRESS;
808	}
809
810	return B_OK;
811}
812
813
814#endif	// B_DEBUG_SPINLOCK_CONTENTION
815
816
817static void
818process_early_cpu_call(int32 cpu)
819{
820	sEarlyCPUCallFunction(sEarlyCPUCallCookie, cpu);
821	atomic_and(&sEarlyCPUCall, ~(uint32)(1 << cpu));
822}
823
824
825static void
826call_all_cpus_early(void (*function)(void*, int), void* cookie)
827{
828	if (sNumCPUs > 1) {
829		sEarlyCPUCallFunction = function;
830		sEarlyCPUCallCookie = cookie;
831
832		uint32 cpuMask = (1 << sNumCPUs) - 2;
833			// all CPUs but the boot cpu
834
835		sEarlyCPUCall = cpuMask;
836
837		// wait for all CPUs to finish
838		while ((sEarlyCPUCall & cpuMask) != 0)
839			PAUSE();
840	}
841
842	function(cookie, 0);
843}
844
845
846//	#pragma mark -
847
848
849int
850smp_intercpu_int_handler(int32 cpu)
851{
852	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
853
854	process_all_pending_ici(cpu);
855
856	TRACE(("smp_intercpu_int_handler: done\n"));
857
858	return B_HANDLED_INTERRUPT;
859}
860
861
862void
863smp_send_ici(int32 targetCPU, int32 message, addr_t data, addr_t data2,
864	addr_t data3, void* dataPointer, uint32 flags)
865{
866	struct smp_msg *msg;
867
868	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, "
869		"data3 0x%lx, ptr %p, flags 0x%lx\n", targetCPU, message, data, data2,
870		data3, dataPointer, flags));
871
872	if (sICIEnabled) {
873		int state;
874		int currentCPU;
875
876		// find_free_message leaves interrupts disabled
877		state = find_free_message(&msg);
878
879		currentCPU = smp_get_current_cpu();
880		if (targetCPU == currentCPU) {
881			return_free_message(msg);
882			restore_interrupts(state);
883			return; // nope, cant do that
884		}
885
886		// set up the message
887		msg->message = message;
888		msg->data = data;
889		msg->data2 = data2;
890		msg->data3 = data3;
891		msg->data_ptr = dataPointer;
892		msg->ref_count = 1;
893		msg->flags = flags;
894		msg->done = false;
895
896		// stick it in the appropriate cpu's mailbox
897		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
898		msg->next = sCPUMessages[targetCPU];
899		sCPUMessages[targetCPU] = msg;
900		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
901
902		arch_smp_send_ici(targetCPU);
903
904		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
905			// wait for the other cpu to finish processing it
906			// the interrupt handler will ref count it to <0
907			// if the message is sync after it has removed it from the mailbox
908			while (msg->done == false) {
909				process_all_pending_ici(currentCPU);
910				PAUSE();
911			}
912			// for SYNC messages, it's our responsibility to put it
913			// back into the free list
914			return_free_message(msg);
915		}
916
917		restore_interrupts(state);
918	}
919}
920
921
922void
923smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, addr_t data,
924	addr_t data2, addr_t data3, void *dataPointer, uint32 flags)
925{
926	if (!sICIEnabled)
927		return;
928
929	int currentCPU = smp_get_current_cpu();
930	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
931		& (((cpu_mask_t)1 << sNumCPUs) - 1);
932	if (cpuMask == 0) {
933		panic("smp_send_multicast_ici(): 0 CPU mask");
934		return;
935	}
936
937	// count target CPUs
938	int32 targetCPUs = 0;
939	for (int32 i = 0; i < sNumCPUs; i++) {
940		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
941			targetCPUs++;
942	}
943
944	// find_free_message leaves interrupts disabled
945	struct smp_msg *msg;
946	int state = find_free_message(&msg);
947
948	msg->message = message;
949	msg->data = data;
950	msg->data2 = data2;
951	msg->data3 = data3;
952	msg->data_ptr = dataPointer;
953	msg->ref_count = targetCPUs;
954	msg->flags = flags;
955	msg->proc_bitmap = ~cpuMask;
956	msg->done = false;
957
958	// stick it in the broadcast mailbox
959	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
960	msg->next = sBroadcastMessages;
961	sBroadcastMessages = msg;
962	release_spinlock(&sBroadcastMessageSpinlock);
963
964	arch_smp_send_broadcast_ici();
965		// TODO: Introduce a call that only bothers the target CPUs!
966
967	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
968		// wait for the other cpus to finish processing it
969		// the interrupt handler will ref count it to <0
970		// if the message is sync after it has removed it from the mailbox
971		while (msg->done == false) {
972			process_all_pending_ici(currentCPU);
973			PAUSE();
974		}
975
976		// for SYNC messages, it's our responsibility to put it
977		// back into the free list
978		return_free_message(msg);
979	}
980
981	restore_interrupts(state);
982}
983
984
985void
986smp_send_broadcast_ici(int32 message, addr_t data, addr_t data2, addr_t data3,
987	void *dataPointer, uint32 flags)
988{
989	struct smp_msg *msg;
990
991	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 "
992		"0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n", smp_get_current_cpu(),
993		message, data, data2, data3, dataPointer, flags));
994
995	if (sICIEnabled) {
996		int state;
997		int currentCPU;
998
999		// find_free_message leaves interrupts disabled
1000		state = find_free_message(&msg);
1001
1002		currentCPU = smp_get_current_cpu();
1003
1004		msg->message = message;
1005		msg->data = data;
1006		msg->data2 = data2;
1007		msg->data3 = data3;
1008		msg->data_ptr = dataPointer;
1009		msg->ref_count = sNumCPUs - 1;
1010		msg->flags = flags;
1011		msg->proc_bitmap = SET_BIT(0, currentCPU);
1012		msg->done = false;
1013
1014		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast "
1015			"mbox\n", currentCPU, msg));
1016
1017		// stick it in the appropriate cpu's mailbox
1018		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1019		msg->next = sBroadcastMessages;
1020		sBroadcastMessages = msg;
1021		release_spinlock(&sBroadcastMessageSpinlock);
1022
1023		arch_smp_send_broadcast_ici();
1024
1025		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
1026
1027		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1028			// wait for the other cpus to finish processing it
1029			// the interrupt handler will ref count it to <0
1030			// if the message is sync after it has removed it from the mailbox
1031			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
1032
1033			while (msg->done == false) {
1034				process_all_pending_ici(currentCPU);
1035				PAUSE();
1036			}
1037
1038			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
1039
1040			// for SYNC messages, it's our responsibility to put it
1041			// back into the free list
1042			return_free_message(msg);
1043		}
1044
1045		restore_interrupts(state);
1046	}
1047
1048	TRACE(("smp_send_broadcast_ici: done\n"));
1049}
1050
1051
1052void
1053smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
1054	addr_t data, addr_t data2, addr_t data3, void *dataPointer, uint32 flags)
1055{
1056	if (!sICIEnabled)
1057		return;
1058
1059	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
1060		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
1061		currentCPU, message, data, data2, data3, dataPointer, flags));
1062
1063	struct smp_msg *msg;
1064	find_free_message_interrupts_disabled(currentCPU, &msg);
1065
1066	msg->message = message;
1067	msg->data = data;
1068	msg->data2 = data2;
1069	msg->data3 = data3;
1070	msg->data_ptr = dataPointer;
1071	msg->ref_count = sNumCPUs - 1;
1072	msg->flags = flags;
1073	msg->proc_bitmap = SET_BIT(0, currentCPU);
1074	msg->done = false;
1075
1076	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1077		"into broadcast mbox\n", currentCPU, msg));
1078
1079	// stick it in the appropriate cpu's mailbox
1080	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1081	msg->next = sBroadcastMessages;
1082	sBroadcastMessages = msg;
1083	release_spinlock(&sBroadcastMessageSpinlock);
1084
1085	arch_smp_send_broadcast_ici();
1086
1087	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1088		currentCPU));
1089
1090	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1091		// wait for the other cpus to finish processing it
1092		// the interrupt handler will ref count it to <0
1093		// if the message is sync after it has removed it from the mailbox
1094		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1095			"ack\n", currentCPU));
1096
1097		while (msg->done == false) {
1098			process_all_pending_ici(currentCPU);
1099			PAUSE();
1100		}
1101
1102		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1103			"message to free list\n", currentCPU));
1104
1105		// for SYNC messages, it's our responsibility to put it
1106		// back into the free list
1107		return_free_message(msg);
1108	}
1109
1110	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1111}
1112
1113
1114/*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1115
1116	\param cpu The index of the calling CPU.
1117	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1118		does not return before all other CPUs have started waiting.
1119	\return \c true on the boot CPU, \c false otherwise.
1120*/
1121bool
1122smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1123{
1124	if (cpu == 0) {
1125		smp_cpu_rendezvous(rendezVous, cpu);
1126		return true;
1127	}
1128
1129	smp_cpu_rendezvous(rendezVous, cpu);
1130
1131	while (sBootCPUSpin == 0) {
1132		if ((sEarlyCPUCall & (1 << cpu)) != 0)
1133			process_early_cpu_call(cpu);
1134
1135		PAUSE();
1136	}
1137
1138	return false;
1139}
1140
1141
1142void
1143smp_wake_up_non_boot_cpus()
1144{
1145	// ICIs were previously being ignored
1146	if (sNumCPUs > 1)
1147		sICIEnabled = true;
1148
1149	// resume non boot CPUs
1150	sBootCPUSpin = 1;
1151}
1152
1153
1154/*!	Spin until all CPUs have reached the rendez-vous point.
1155
1156	The rendez-vous variable \c *var must have been initialized to 0 before the
1157	function is called. The variable will be non-null when the function returns.
1158
1159	Note that when the function returns on one CPU, it only means that all CPU
1160	have already entered the function. It does not mean that the variable can
1161	already be reset. Only when all CPUs have returned (which would have to be
1162	ensured via another rendez-vous) the variable can be reset.
1163*/
1164void
1165smp_cpu_rendezvous(volatile uint32* var, int current_cpu)
1166{
1167	atomic_or((vint32*)var, 1 << current_cpu);
1168
1169	while (*var != (((uint32)1 << sNumCPUs) - 1))
1170		PAUSE();
1171}
1172
1173
1174status_t
1175smp_init(kernel_args* args)
1176{
1177	TRACE(("smp_init: entry\n"));
1178
1179#if DEBUG_SPINLOCK_LATENCIES
1180	sEnableLatencyCheck
1181		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1182#endif
1183
1184#if DEBUG_SPINLOCKS
1185	add_debugger_command_etc("spinlock", &dump_spinlock,
1186		"Dump info on a spinlock",
1187		"\n"
1188		"Dumps info on a spinlock.\n", 0);
1189#endif
1190	add_debugger_command_etc("ici", &dump_ici_messages,
1191		"Dump info on pending ICI messages",
1192		"\n"
1193		"Dumps info on pending ICI messages.\n", 0);
1194	add_debugger_command_etc("ici_message", &dump_ici_message,
1195		"Dump info on an ICI message",
1196		"\n"
1197		"Dumps info on an ICI message.\n", 0);
1198
1199	if (args->num_cpus > 1) {
1200		sFreeMessages = NULL;
1201		sFreeMessageCount = 0;
1202		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1203			struct smp_msg* msg
1204				= (struct smp_msg*)malloc(sizeof(struct smp_msg));
1205			if (msg == NULL) {
1206				panic("error creating smp mailboxes\n");
1207				return B_ERROR;
1208			}
1209			memset(msg, 0, sizeof(struct smp_msg));
1210			msg->next = sFreeMessages;
1211			sFreeMessages = msg;
1212			sFreeMessageCount++;
1213		}
1214		sNumCPUs = args->num_cpus;
1215	}
1216	TRACE(("smp_init: calling arch_smp_init\n"));
1217
1218	return arch_smp_init(args);
1219}
1220
1221
1222status_t
1223smp_per_cpu_init(kernel_args* args, int32 cpu)
1224{
1225	return arch_smp_per_cpu_init(args, cpu);
1226}
1227
1228
1229status_t
1230smp_init_post_generic_syscalls(void)
1231{
1232#if B_DEBUG_SPINLOCK_CONTENTION
1233	return register_generic_syscall(SPINLOCK_CONTENTION,
1234		&spinlock_contention_syscall, 0, 0);
1235#else
1236	return B_OK;
1237#endif
1238}
1239
1240
1241void
1242smp_set_num_cpus(int32 numCPUs)
1243{
1244	sNumCPUs = numCPUs;
1245}
1246
1247
1248int32
1249smp_get_num_cpus()
1250{
1251	return sNumCPUs;
1252}
1253
1254
1255int32
1256smp_get_current_cpu(void)
1257{
1258	return thread_get_current_thread()->cpu->cpu_num;
1259}
1260
1261
1262// #pragma mark - public exported functions
1263
1264
1265void
1266call_all_cpus(void (*func)(void*, int), void* cookie)
1267{
1268	cpu_status state = disable_interrupts();
1269
1270	// if inter-CPU communication is not yet enabled, use the early mechanism
1271	if (!sICIEnabled) {
1272		call_all_cpus_early(func, cookie);
1273		restore_interrupts(state);
1274		return;
1275	}
1276
1277	if (smp_get_num_cpus() > 1) {
1278		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (addr_t)cookie,
1279			0, 0, (void*)func, SMP_MSG_FLAG_ASYNC);
1280	}
1281
1282	// we need to call this function ourselves as well
1283	func(cookie, smp_get_current_cpu());
1284
1285	restore_interrupts(state);
1286}
1287
1288
1289void
1290call_all_cpus_sync(void (*func)(void*, int), void* cookie)
1291{
1292	cpu_status state = disable_interrupts();
1293
1294	// if inter-CPU communication is not yet enabled, use the early mechanism
1295	if (!sICIEnabled) {
1296		call_all_cpus_early(func, cookie);
1297		restore_interrupts(state);
1298		return;
1299	}
1300
1301	if (smp_get_num_cpus() > 1) {
1302		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (addr_t)cookie,
1303			0, 0, (void*)func, SMP_MSG_FLAG_SYNC);
1304	}
1305
1306	// we need to call this function ourselves as well
1307	func(cookie, smp_get_current_cpu());
1308
1309	restore_interrupts(state);
1310}
1311
1312
1313void
1314memory_read_barrier(void)
1315{
1316	arch_cpu_memory_read_barrier();
1317}
1318
1319
1320void
1321memory_write_barrier(void)
1322{
1323	arch_cpu_memory_write_barrier();
1324}
1325