1/*
2 * arch/ubicom32/kernel/ldsr.c
3 *   Ubicom32 architecture Linux Device Services Driver Interface
4 *
5 * (C) Copyright 2009, Ubicom, Inc.
6 *
7 * This file is part of the Ubicom32 Linux Kernel Port.
8 *
9 * The Ubicom32 Linux Kernel Port is free software: you can redistribute
10 * it and/or modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation, either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * The Ubicom32 Linux Kernel Port is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
17 * the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with the Ubicom32 Linux Kernel Port.  If not,
21 * see <http://www.gnu.org/licenses/>.
22 *
23 * Ubicom32 implementation derived from (with many thanks):
24 *   arch/m68knommu
25 *   arch/blackfin
26 *   arch/parisc
27 *
28 * NOTES:
29 *
30 * The LDSR is a programmable interrupt controller that is written in software.
31 * It emulates the behavior of an pic by fielding the interrupts, choosing a
32 * victim thread to take the interrupt and forcing that thread to take a context
33 * switch to the appropriate interrupt handler.
34 *
35 * Because traps are treated as just a special class of interrupts, the LDSR
36 * also handles the processing of traps.
37 *
38 * Because we compile Linux both UP and SMP, we need the LDSR to use
39 * architectural locking that is not "compiled out" when compiling UP.  For now,
40 * we use the single atomic bit lock.
41 */
42#include <linux/kernel.h>
43#include <linux/init.h>
44#include <linux/sched.h>
45#include <linux/interrupt.h>
46#include <linux/irq.h>
47#include <linux/profile.h>
48#include <linux/clocksource.h>
49#include <linux/types.h>
50#include <linux/module.h>
51#include <linux/cpumask.h>
52#include <linux/bug.h>
53#include <linux/delay.h>
54#include <asm/ip5000.h>
55#include <asm/atomic.h>
56#include <asm/machdep.h>
57#include <asm/asm-offsets.h>
58#include <asm/traps.h>
59#include <asm/thread.h>
60#include <asm/range-protect.h>
61
62/*
63 * One can not print from the LDSR so the best we can do is
64 * check a condition and stall all of the threads.
65 */
66
67// #define DEBUG_LDSR 1
68#if defined(DEBUG_LDSR)
69#define DEBUG_ASSERT(cond) \
70	if (!(cond)) { \
71		THREAD_STALL; \
72	}
73#else
74#define DEBUG_ASSERT(cond)
75#endif
76
77/*
78 * Make global so that we can use it in the RFI code in assembly.
79 */
80unsigned int ldsr_soft_irq_mask;
81EXPORT_SYMBOL(ldsr_soft_irq_mask);
82
83static unsigned int ldsr_suspend_mask;
84static unsigned int ldsr_soft_irq;
85static unsigned int ldsr_stack_space[1024];
86
87static struct ldsr_register_bank {
88	volatile unsigned int enabled0;
89	volatile unsigned int enabled1;
90	volatile unsigned int mask0;
91	volatile unsigned int mask1;
92	unsigned int total;
93	unsigned int retry;
94	unsigned int backout;
95} ldsr_interrupt;
96
97/*
98 * Which thread/cpu are we?
99 */
100static int ldsr_tid = -1;
101
102#if defined(CONFIG_IRQSTACKS)
103/*
104 * per-CPU IRQ stacks (thread information and stack)
105 *
106 * NOTE: Do not use DEFINE_PER_CPU() as it makes it harder
107 * to find the location of ctx from assembly language.
108 */
109union irq_ctx {
110	struct thread_info      tinfo;
111	u32                     stack[THREAD_SIZE/sizeof(u32)];
112};
113static union irq_ctx *percpu_irq_ctxs[NR_CPUS];
114
115/*
116 *  Storage for the interrupt stack.
117 */
118#if !defined(CONFIG_IRQSTACKS_USEOCM)
119static char percpu_irq_stacks[(NR_CPUS * THREAD_SIZE) + (THREAD_SIZE - 1)];
120#else
121/*
122 *  For OCM, the linker will ensure that space is allocated for the stack
123 *  see (vmlinux.lds.S)
124 */
125static char percpu_irq_stacks[];
126#endif
127
128#endif
129
130/*
131 * Save trap IRQ because we need to un-suspend if it gets set.
132 */
133static unsigned int ldsr_trap_irq_mask;
134static unsigned int ldsr_trap_irq;
135
136/*
137 * ret_from_interrupt_to_kernel
138 *	Just restore the context and do nothing else.
139 */
140asmlinkage void ret_from_interrupt_to_kernel(void)__attribute__((naked));
141
142/*
143 * ret_from_interrupt_to_user
144 *	Call scheduler if needed. Just restore the context.
145 */
146asmlinkage void ret_from_interrupt_to_user(void)__attribute__((naked));
147
148#ifdef DEBUG_LDSR
149u32_t old_sp, old_pc, old_a0, old_a5, old_a3;
150struct pt_regs copy_regs, *copy_save_area;
151#endif
152
153int __user_mode(unsigned long sp)
154{
155
156	u32_t saved_stack_base = sp & ~(ASM_THREAD_SIZE - 1);
157#if defined(CONFIG_IRQSTACKS_USEOCM)
158	if ((union irq_ctx *)saved_stack_base == percpu_irq_ctxs[smp_processor_id()]) {
159		/*
160		 *  On the interrupt stack.
161		 */
162		return 0;
163	}
164#endif
165
166	if (!(u32_t)current) {
167		return 0;
168	}
169	return saved_stack_base != ((u32_t)current->stack);
170}
171
172/*
173 * ldsr_lock_release()
174 *	Release the LDSR lock.
175 */
176static void ldsr_lock_release(void)
177{
178	UBICOM32_UNLOCK(LDSR_LOCK_BIT);
179}
180
181/*
182 * ldsr_lock_acquire()
183 *	Acquire the LDSR lock, spin if not available.
184 */
185static void ldsr_lock_acquire(void)
186{
187	UBICOM32_LOCK(LDSR_LOCK_BIT);
188}
189
190/*
191 * ldsr_thread_irq_disable()
192 *	Disable interrupts for the specified thread.
193 */
194static void ldsr_thread_irq_disable(unsigned int tid)
195{
196	unsigned int mask = (1 << tid);
197
198	asm volatile (
199	"	or.4	scratchpad1, scratchpad1, %0	\n\t"
200		:
201		: "d"(mask)
202		: "cc"
203	);
204}
205
206/*
207 * ldsr_thread_get_interrupts()
208 *	Get the interrupt state for all threads.
209 */
210static unsigned long ldsr_thread_get_interrupts(void)
211{
212	unsigned long ret = 0;
213	asm volatile (
214	"	move.4	%0, scratchpad1	\n\t"
215		: "=r" (ret)
216		:
217	);
218	return ret;
219}
220
221/*
222 * ldsr_emulate_and_run()
223 *	Emulate the instruction and then set the thread to run.
224 */
225static void ldsr_emulate_and_run(unsigned int tid)
226{
227	unsigned int thread_mask = (1 << tid);
228	u32_t write_csr = (tid << 15) | (1 << 14);
229
230	/*
231	 * Emulate the unaligned access.
232	 */
233	unaligned_emulate(tid);
234
235	/*
236	 * Get the thread back in a running state.
237	 */
238	asm volatile (
239	"	setcsr	%0			\n\t"
240	"	setcsr_flush 0			\n\t"
241	"	move.4	trap_cause, #0		\n\t" /* Clear the trap cause
242						       * register */
243	"	setcsr	#0			\n\t"
244	"	setcsr_flush 0			\n\t"
245	"	move.4	mt_dbg_active_set, %1	\n\t" /* Activate thread even if
246						       * in dbg/fault state */
247	"	move.4	mt_active_set, %1	\n\t" /* Restart target
248						       * thread. */
249		:
250		: "r" (write_csr), "d" (thread_mask)
251		: "cc"
252	);
253	thread_enable_mask(thread_mask);
254}
255
256/*
257 * ldsr_preemptive_context_save()
258 *	save thread context from another hardware thread.  The other thread must
259 *	be stalled.
260 */
261static inline void ldsr_preemptive_context_save(u32_t thread,
262						struct pt_regs *regs)
263{
264	/*
265	 * Save the current state of the specified thread
266	 */
267	asm volatile (
268	"       move.4  a3, %0					\n\t"
269
270		/* set src1 from the target thread */
271	"       move.4  csr, %1					\n\t"
272	"	setcsr_flush 0					\n\t"
273	"	setcsr_flush 0					\n\t"
274
275		/* copy state from the other thread */
276	"       move.4  "D(PT_D0)"(a3), d0			\n\t"
277	"       move.4  "D(PT_D1)"(a3), d1			\n\t"
278	"       move.4  "D(PT_D2)"(a3), d2			\n\t"
279	"       move.4  "D(PT_D3)"(a3), d3			\n\t"
280	"       move.4  "D(PT_D4)"(a3), d4			\n\t"
281	"       move.4  "D(PT_D5)"(a3), d5			\n\t"
282	"       move.4  "D(PT_D6)"(a3), d6			\n\t"
283	"       move.4  "D(PT_D7)"(a3), d7			\n\t"
284	"       move.4  "D(PT_D8)"(a3), d8			\n\t"
285	"       move.4  "D(PT_D9)"(a3), d9			\n\t"
286	"       move.4  "D(PT_D10)"(a3), d10			\n\t"
287	"       move.4  "D(PT_D11)"(a3), d11			\n\t"
288	"       move.4  "D(PT_D12)"(a3), d12			\n\t"
289	"       move.4  "D(PT_D13)"(a3), d13			\n\t"
290	"       move.4  "D(PT_D14)"(a3), d14			\n\t"
291	"       move.4  "D(PT_D15)"(a3), d15			\n\t"
292	"       move.4  "D(PT_A0)"(a3), a0			\n\t"
293	"       move.4  "D(PT_A1)"(a3), a1			\n\t"
294	"       move.4  "D(PT_A2)"(a3), a2			\n\t"
295	"       move.4  "D(PT_A3)"(a3), a3			\n\t"
296	"       move.4  "D(PT_A4)"(a3), a4			\n\t"
297	"       move.4  "D(PT_A5)"(a3), a5			\n\t"
298	"       move.4  "D(PT_A6)"(a3), a6			\n\t"
299	"       move.4  "D(PT_SP)"(a3), a7			\n\t"
300	"       move.4  "D(PT_ACC0HI)"(a3), acc0_hi		\n\t"
301	"       move.4  "D(PT_ACC0LO)"(a3), acc0_lo		\n\t"
302	"       move.4  "D(PT_MAC_RC16)"(a3), mac_rc16		\n\t"
303	"       move.4  "D(PT_ACC1HI)"(a3), acc1_hi		\n\t"
304	"       move.4  "D(PT_ACC1LO)"(a3), acc1_lo		\n\t"
305	"       move.4  "D(PT_SOURCE3)"(a3), source3		\n\t"
306	"       move.4  "D(PT_INST_CNT)"(a3), inst_cnt		\n\t"
307	"       move.4  "D(PT_CSR)"(a3), csr			\n\t"
308	"       move.4  "D(PT_DUMMY_UNUSED)"(a3), #0		\n\t"
309	"       move.4  "D(PT_INT_MASK0)"(a3), int_mask0	\n\t"
310	"       move.4  "D(PT_INT_MASK1)"(a3), int_mask1	\n\t"
311	"       move.4  "D(PT_TRAP_CAUSE)"(a3), trap_cause	\n\t"
312	"       move.4  "D(PT_PC)"(a3), pc			\n\t"
313	"	move.4	"D(PT_PREVIOUS_PC)"(a3), previous_pc	\n\t"
314		/* disable csr thread select */
315	"       movei   csr, #0					\n\t"
316	"       setcsr_flush 0					\n\t"
317	:
318	: "r" (regs->dn), "d" ((thread << 9) | (1 << 8))
319	: "a3"
320	);
321}
322
323/*
324 * ldsr_rotate_threads()
325 *	Simple round robin algorithm for choosing the next cpu
326 */
327static int ldsr_rotate_threads(unsigned long cpus)
328{
329	static unsigned char ldsr_bits[8] = {
330		3, 0, 1, 0, 2, 0, 1, 0
331	};
332
333	static int nextbit;
334	int thisbit;
335
336	/*
337	 * Move the interrupts down so that we consider interrupts from where
338	 * we left off, then take the interrupts we would lose and move them
339	 * to the top half of the interrupts value.
340	 */
341	cpus = (cpus >> nextbit) | (cpus << ((sizeof(cpus) * 8) - nextbit));
342
343	/*
344	 * 50% of the time we won't take this at all and then of the cases where
345	 * we do about 50% of those we only execute once.
346	 */
347	if (!(cpus & 0xffff)) {
348		nextbit += 16;
349		cpus >>= 16;
350	}
351
352	if (!(cpus & 0xff)) {
353		nextbit += 8;
354		cpus >>= 8;
355	}
356
357	if (!(cpus & 0xf)) {
358		nextbit += 4;
359		cpus >>= 4;
360	}
361
362	nextbit += ldsr_bits[cpus & 0x7];
363	thisbit = (nextbit & ((sizeof(cpus) * 8) - 1));
364	nextbit = (thisbit + 1) & ((sizeof(cpus) * 8) - 1);
365	DEBUG_ASSERT(thisbit < THREAD_ARCHITECTURAL_MAX);
366	return thisbit;
367}
368
369/*
370 * ldsr_rotate_interrupts()
371 *	Get rotating next set bit value.
372 */
373static int ldsr_rotate_interrupts(unsigned long long interrupts)
374{
375	static unsigned char ldsr_bits[8] = {
376		3, 0, 1, 0, 2, 0, 1, 0
377	};
378
379	static int nextbit;
380	int thisbit;
381
382	/*
383	 * Move the interrupts down so that we consider interrupts from where
384	 * we left off, then take the interrupts we would lose and move them
385	 * to the top half of the interrupts value.
386	 */
387	interrupts = (interrupts >> nextbit) |
388		(interrupts << ((sizeof(interrupts) * 8) - nextbit));
389
390	/*
391	 * 50% of the time we won't take this at all and then of the cases where
392	 * we do about 50% of those we only execute once.
393	 */
394	if (!(interrupts & 0xffffffff)) {
395		nextbit += 32;
396		interrupts >>= 32;
397	}
398
399	if (!(interrupts & 0xffff)) {
400		nextbit += 16;
401		interrupts >>= 16;
402	}
403
404	if (!(interrupts & 0xff)) {
405		nextbit += 8;
406		interrupts >>= 8;
407	}
408
409	if (!(interrupts & 0xf)) {
410		nextbit += 4;
411		interrupts >>= 4;
412	}
413
414	nextbit += ldsr_bits[interrupts & 0x7];
415	thisbit = (nextbit & ((sizeof(interrupts) * 8) - 1));
416	nextbit = (thisbit + 1) & ((sizeof(interrupts) * 8) - 1);
417
418	DEBUG_ASSERT(thisbit < (sizeof(interrupts) * 8));
419	return thisbit;
420}
421
422/*
423 * ldsr_backout_or_irq()
424 *
425 * One way or the other this interrupt is not being
426 * processed, make sure that it is reset.  We are
427 * not going to call irq_end_vector() so unmask the
428 * interrupt.
429 */
430static void ldsr_backout_of_irq(int vector, unsigned long tid_mask)
431{
432#if defined(CONFIG_SMP)
433	if (unlikely(vector == smp_ipi_irq)) {
434		smp_reset_ipi(tid_mask);
435	}
436#endif
437	ldsr_unmask_vector(vector);
438	ldsr_interrupt.backout++;
439}
440
441#if defined(CONFIG_IRQSTACKS)
442/*
443 * ldsr_choose_savearea_and_returnvec()
444 *	Test our current state (user, kernel, interrupt) and set things up.
445 *
446 * This version of the function uses 3 stacks and nests interrupts
447 * on the interrupt stack.
448 */
449static struct pt_regs *ldsr_choose_savearea_and_returnvec(thread_t tid, u32_t linux_sp, u32_t *pvec)
450{
451	struct pt_regs *save_area;
452	u32_t masked_linux_sp = linux_sp & ~(THREAD_SIZE - 1);
453	struct thread_info * ti= (struct thread_info *)sw_ksp[tid];
454
455#if defined(CONFIG_SMP)
456	union irq_ctx *icp = percpu_irq_ctxs[tid];
457#else
458	union irq_ctx *icp = percpu_irq_ctxs[0];
459#endif
460
461	if (masked_linux_sp == (u32_t)icp) {
462		/*
463		 * Fault/Interrupt occurred while on the interrupt stack.
464		 */
465		save_area = (struct pt_regs *)((char *)linux_sp - sizeof(struct pt_regs) - 8);
466		*pvec = (u32_t)(&ret_from_interrupt_to_kernel);
467	} else {
468		/*
469		 *  Fault/Interrupt occurred while on user/kernel stack.  This is a new
470		 *  first use of the interrupt stack.
471		 */
472		save_area = (struct pt_regs *) ((char *)icp + sizeof(icp->stack) - sizeof(struct pt_regs) - 8);
473		if (masked_linux_sp == (u32_t)ti) {
474			*pvec  = (u32_t)(&ret_from_interrupt_to_kernel);
475		} else {
476			*pvec  = (u32_t)(&ret_from_interrupt_to_user);
477		}
478
479		/*
480		 * Because the softirq code will execute on the "interrupt" stack, we
481		 * need to maintain the knowledge of what "task" was executing on the
482		 * cpu.  This is done by copying the thread_info->task from the cpu
483		 * we are about to context switch into the interrupt contexts thread_info
484		 * structure.
485		 */
486		icp->tinfo.task = ti->task;
487		icp->tinfo.preempt_count =
488				(icp->tinfo.preempt_count & ~SOFTIRQ_MASK) |
489				(ti->preempt_count & SOFTIRQ_MASK);
490		icp->tinfo.interrupt_nesting = 0;
491	}
492	save_area->nesting_level = icp->tinfo.interrupt_nesting;
493	return save_area;
494}
495
496#else
497/*
498 * ldsr_choose_savearea_and_returnvec()
499 *	Test our current state (user, kernel, interrupt) and set things up.
500 *
501 * The version of the function uses just the user & kernel stack and
502 * nests interrupts on the existing kernel stack.
503 */
504static struct pt_regs *ldsr_choose_savearea_and_returnvec(thread_t tid, u32_t linux_sp, u32_t *pvec)
505{
506	struct pt_regs *save_area;
507	u32_t masked_linux_sp = linux_sp & ~(THREAD_SIZE - 1);
508	struct thread_info *ti = (struct thread_info *)sw_ksp[tid];
509
510	if (masked_linux_sp == (u32_t)ti) {
511		/*
512		 * Fault/Interrupt occurred while on the kernel stack.
513		 */
514		save_area = (struct pt_regs *)((char *)linux_sp - sizeof(struct pt_regs) - 8);
515		*pvec = (u32_t) (&ret_from_interrupt_to_kernel);
516	} else {
517		/*
518		 *  Fault/Interrupt occurred while on user stack.
519		 */
520		ti->interrupt_nesting = 0;
521		save_area = (struct pt_regs *)((u32_t)ti + THREAD_SIZE - sizeof(struct pt_regs) - 8);
522		*pvec  = (u32_t) (&ret_from_interrupt_to_user);
523	}
524	save_area->nesting_level = ti->interrupt_nesting;
525	return save_area;
526}
527#endif
528
529/*
530 * ldsr_ctxsw_thread()
531 *	Context switch a mainline thread to execute do_IRQ() for the specified
532 *	vector.
533 */
534static void ldsr_ctxsw_thread(int vector, thread_t tid)
535{
536	u32_t linux_sp;
537	u32_t return_vector;
538	struct pt_regs *save_area, *regs;
539	u32_t thread_mask = (1 << tid);
540	u32_t read_csr = ((tid << 9) | (1 << 8));
541	u32_t write_csr = (tid << 15) | (1 << 14);
542	u32_t interrupt_vector = (u32_t)(&do_IRQ);
543
544	unsigned int frame_type = UBICOM32_FRAME_TYPE_INTERRUPT;
545
546
547	DEBUG_ASSERT(!thread_is_enabled(tid));
548
549	/*
550	 * Acquire the necessary global and per thread locks for tid.
551	 * As a side effect, we ensure that the thread has not trapped
552	 * and return true if it has.
553	 */
554	if (unlikely(thread_is_trapped(tid))) {
555		/*
556		 * Read the trap cause, the sp and clear the MT_TRAP bits.
557		 */
558		unsigned int cause;
559		asm volatile (
560		"	setcsr	%3		\n\t"
561		"	setcsr_flush 0		\n\t"
562		"	setcsr_flush 0		\n\t"
563		"	move.4	%0, TRAP_CAUSE	\n\t"
564		"	move.4	%1, SP		\n\t"
565		"	setcsr	#0		\n\t"
566		"	setcsr_flush 0		\n\t"
567		"	move.4	MT_BREAK_CLR, %2\n\t"
568		"	move.4	MT_TRAP_CLR, %2	\n\t"
569			: "=&r" (cause), "=&r" (linux_sp)
570			: "r" (thread_mask), "m" (read_csr)
571		);
572
573		ldsr_backout_of_irq(vector, (1 << tid));
574
575#if !defined(CONFIG_UNALIGNED_ACCESS_DISABLED)
576		/*
577		 * See if the unaligned trap handler can deal with this.
578		 * If so, emulate the instruction and then just restart
579		 * the thread.
580		 */
581		if (unaligned_only(cause)) {
582#if defined(CONFIG_UNALIGNED_ACCESS_USERSPACE_ONLY)
583			/*
584			 * Check if this is a kernel stack if so we will not
585			 * handle the trap
586			 */
587			u32_t masked_linux_sp = linux_sp & ~(THREAD_SIZE - 1);
588			if ((masked_linux_sp != (u32_t)sw_ksp[tid]) &&
589			    unaligned_only(cause)) {
590				ldsr_emulate_and_run(tid);
591				return;
592			}
593#else
594			ldsr_emulate_and_run(tid);
595			return;
596#endif
597
598		}
599#endif
600
601		interrupt_vector = (u32_t)(&trap_handler);
602		frame_type = UBICOM32_FRAME_TYPE_TRAP;
603	} else {
604		/*
605		 * Read the target thread's SP
606		 */
607		asm volatile (
608		"	setcsr	%1		\n\t"
609		"	setcsr_flush 0		\n\t"
610		"	setcsr_flush 0		\n\t"
611		"	move.4	%0, SP		\n\t"
612		"	setcsr	#0		\n\t"
613		"	setcsr_flush 0		\n\t"
614			: "=m" (linux_sp)
615			: "m" (read_csr)
616		);
617	}
618
619	/*
620	 * We are delivering an interrupt, count it.
621	 */
622	ldsr_interrupt.total++;
623
624	/*
625	 * At this point, we will definitely force this thread to
626	 * a new context, show its interrupts as disabled.
627	 */
628	ldsr_thread_irq_disable(tid);
629
630	/*
631	 * Test our current state (user, kernel, interrupt).  Save the
632	 * appropriate data and setup for the return.
633	 */
634	save_area = ldsr_choose_savearea_and_returnvec(tid, linux_sp, &return_vector);
635
636	/*
637	 *  The pt_regs (save_area) contains the type of thread that we are dealing
638	 *  with (KERNEL/NORMAL) and is copied into each pt_regs area.  We get this
639	 *  from the current tasks kernel pt_regs area that always exists at the
640	 *  top of the kernel stack.
641	 */
642	regs = (struct pt_regs *)((u32_t)sw_ksp[tid] + THREAD_SIZE - sizeof(struct pt_regs) - 8);
643	save_area->thread_type = regs->thread_type;
644
645	/*
646	 * Preserve the context of the Linux thread.
647	 */
648	ldsr_preemptive_context_save(tid, save_area);
649
650	/*
651	 * Load the fram_type into the save_area.
652	 */
653	save_area->frame_type = frame_type;
654
655#ifdef CONFIG_STOP_ON_TRAP
656	/*
657	 * Before we get backtrace and showing stacks working well, it sometimes
658	 * helps to enter the debugger when a trap occurs before we change the
659	 * thread to handle the fault.  This optional code causes all threads to
660	 * stop on every trap frame.  One assumes that GDB connected via the
661	 * mailbox interface will be used to recover from this state.
662	 */
663	if (frame_type == UBICOM32_FRAME_TYPE_TRAP) {
664		THREAD_STALL;
665	}
666#endif
667
668#ifdef DEBUG_LDSR
669	copy_regs = *save_area;
670	copy_save_area = save_area;
671
672	old_a0 = save_area->an[0];
673	old_a3 = save_area->an[3];
674	old_sp = save_area->an[7];
675	old_a5 = save_area->an[5];
676	old_pc = save_area->pc;
677#endif
678
679	/*
680	 * Now we have to switch the kernel thread to run do_IRQ function.
681	 *	Set pc to do_IRQ
682	 *	Set d0 to vector
683	 *	Set d1 to save_area.
684	 *	Set a5 to the proper return vector.
685	 */
686	asm volatile (
687	"	setcsr	%0			\n\t"
688	"	setcsr_flush 0			\n\t"
689	"	move.4	d0, %5			\n\t" /* d0 = 0 vector # */
690	"	move.4	d1, %1			\n\t" /* d1 = save_area */
691	"	move.4	sp, %1			\n\t" /* sp = save_area */
692	"	move.4	a5, %2			\n\t" /* a5 = return_vector */
693	"	move.4	pc, %3			\n\t" /* pc = do_IRQ routine. */
694	"	move.4	trap_cause, #0		\n\t" /* Clear the trap cause
695						       * register */
696	"	setcsr	#0			\n\t"
697	"	setcsr_flush 0			\n\t"
698	"	enable_kernel_ranges %4		\n\t"
699	"	move.4	mt_dbg_active_set, %4	\n\t" /* Activate thread even if
700						       * in dbg/fault state */
701	"	move.4	mt_active_set, %4	\n\t" /* Restart target
702						       * thread. */
703		:
704		: "r" (write_csr), "r" (save_area),
705		  "r" (return_vector), "r" (interrupt_vector),
706		  "d" (thread_mask), "r" (vector)
707		: "cc"
708	);
709	thread_enable_mask(thread_mask);
710}
711
712/*
713 * ldsr_deliver_interrupt()
714 *	Deliver the interrupt to one of the threads or all of the threads.
715 */
716static void ldsr_deliver_interrupt(int vector,
717				   unsigned long deliver_to,
718				   int all)
719{
720	unsigned long disabled_threads;
721	unsigned long possible_threads;
722	unsigned long trapped_threads;
723	unsigned long global_locks;
724
725	/*
726	 * Disable all of the threads that we might want to send
727	 * this interrupt to.
728	 */
729retry:
730	DEBUG_ASSERT(deliver_to);
731	thread_disable_mask(deliver_to);
732
733	/*
734	 * If any threads are in the trap state, we have to service the
735	 * trap for those threads first.
736	 */
737	asm volatile (
738		"move.4	%0, MT_TRAP		\n\t"
739		: "=r" (trapped_threads)
740		:
741	);
742
743	trapped_threads &= deliver_to;
744	if (unlikely(trapped_threads)) {
745		/*
746		 * all traps will be handled, so clear the trap bit before restarting any threads
747		 */
748	        ubicom32_clear_interrupt(ldsr_trap_irq);
749
750		/*
751		 * Let the remaining untrapped threads, continue.
752		 */
753		deliver_to &= ~trapped_threads;
754		if (deliver_to) {
755			thread_enable_mask(deliver_to);
756		}
757
758		/*
759		 * For the trapped threads force them to handle
760		 * a trap.
761		 */
762		while (trapped_threads) {
763			unsigned long which = ffz(~trapped_threads);
764			trapped_threads &= ~(1 << which);
765			ldsr_ctxsw_thread(vector, which);
766		}
767		return;
768	}
769
770	/*
771	 * Can we deliver an interrupt to any of the threads?
772	 */
773	disabled_threads = ldsr_thread_get_interrupts();
774	possible_threads = deliver_to & ~disabled_threads;
775	if (unlikely(!possible_threads)) {
776#if defined(CONFIG_SMP)
777		/*
778		 * In the SMP case, we can not wait because 1 cpu might be
779		 * sending an IPI to another cpu which is currently blocked.
780		 * The only way to ensure IPI delivery is to backout and
781		 * keep trying.  For SMP, we don't sleep until the interrupts
782		 * are delivered.
783		 */
784		thread_enable_mask(deliver_to);
785		ldsr_backout_of_irq(vector, deliver_to);
786		return;
787#else
788		/*
789		 * In the UP case, we have nothing to do so we should wait.
790		 *
791		 * Since the INT_MASK0 and INT_MASK1 are "re-loaded" before we
792		 * suspend in the outer loop, we do not need to save them here.
793		 *
794		 * We test that we were awakened for our specific interrupts
795		 * because the ldsr mask/unmask operations will force the ldsr
796		 * awake even if the interrupt on the mainline thread is not
797		 * completed.
798		 */
799		unsigned int scratch = 0;
800		thread_enable_mask(deliver_to);
801		asm volatile (
802		"	move.4	INT_MASK0, %1		\n\t"
803		"	move.4	INT_MASK1, #0		\n\t"
804
805		"1:	suspend				\n\t"
806		"	move.4	%0, INT_STAT0		\n\t"
807		"	and.4	%0, %0, %1		\n\t"
808		"	jmpeq.f	1b			\n\t"
809
810		"	move.4	INT_CLR0, %2		\n\t"
811			: "+r" (scratch)
812			: "d" (ldsr_suspend_mask), "r" (ldsr_soft_irq_mask)
813			: "cc"
814		);
815
816		/*
817		 * This delay is sized to coincide with the time it takes a
818		 * thread to complete the exit (see return_from_interrupt).
819		 */
820		ldsr_interrupt.retry++;
821		__delay(10);
822		goto retry;
823#endif
824	}
825
826	/*
827	 * If any of the global locks are held, we can not deliver any
828	 * interrupts, we spin delay(10) and then try again.  If our
829	 * spinning becomes a bottle neck, we will need to suspend but for
830	 * now lets just spin.
831	 */
832	asm volatile (
833		"move.4	%0, scratchpad1		\n\t"
834		: "=r" (global_locks)
835		:
836	);
837	if (unlikely(global_locks & 0xffff0000)) {
838		thread_enable_mask(deliver_to);
839
840		/*
841		 * This delay is sized to coincide with the average time it
842		 * takes a thread to release a global lock.
843		 */
844		ldsr_interrupt.retry++;
845		__delay(10);
846		goto retry;
847	}
848
849	/*
850	 * Deliver to one cpu.
851	 */
852	if (!all) {
853		/*
854		 * Find our victim and then enable everyone else.
855		 */
856		unsigned long victim = ldsr_rotate_threads(possible_threads);
857		DEBUG_ASSERT((deliver_to & (1 << victim)));
858		DEBUG_ASSERT((possible_threads & (1 << victim)));
859
860		deliver_to &= ~(1 << victim);
861		if (deliver_to) {
862			thread_enable_mask(deliver_to);
863		}
864		ldsr_ctxsw_thread(vector, victim);
865		return;
866	}
867
868	/*
869	 * If we can't deliver to some threads, wake them
870	 * back up and reset things to deliver to them.
871	 */
872	deliver_to &= ~possible_threads;
873	if (unlikely(deliver_to)) {
874		thread_enable_mask(deliver_to);
875		ldsr_backout_of_irq(vector, deliver_to);
876	}
877
878	/*
879	 * Deliver to all possible threads(s).
880	 */
881	while (possible_threads) {
882		unsigned long victim = ffz(~possible_threads);
883		possible_threads &= ~(1 << victim);
884		ldsr_ctxsw_thread(vector, victim);
885	}
886}
887
888/*
889 * ldsr_thread()
890 *	This thread acts as the interrupt controller for Linux.
891 */
892static void ldsr_thread(void *arg)
893{
894	int stat0;
895	int stat1;
896	int interrupt0;
897	int interrupt1;
898	long long interrupts;
899	unsigned long cpus;
900
901#if !defined(CONFIG_SMP)
902	/*
903	 * In a non-smp configuration, we can not use the cpu(s) arrays because
904	 * there is not a 1-1 correspondence between cpus(s) and our threads.
905	 * Thus we must get a local idea of the mainline threads and use the
906	 * one and only 1 set as the victim.  We do this once before the ldsr
907	 * loop.
908	 *
909	 * In the SMP case, we will use the cpu(s) map to determine which cpu(s)
910	 * are valid to send interrupts to.
911	 */
912	int victim = 0;
913	unsigned int mainline = thread_get_mainline();
914	if (mainline == 0) {
915		panic("no mainline Linux threads to interrupt");
916		return;
917	}
918	victim = ffz(~mainline);
919	cpus = (1 << victim);
920#endif
921
922	while (1) {
923		/*
924		 * If one changes this code not to reload the INT_MASK(s), you
925		 * need to know that code in the lock waiting above does not
926		 * reset the MASK registers back; so that code will need to be
927		 * changed.
928		 */
929		ldsr_lock_acquire();
930		asm volatile (
931		"	move.4 INT_MASK0, %0	\n\t"
932		"	move.4 INT_MASK1, %1	\n\t"
933			:
934			: "U4" (ldsr_interrupt.mask0), "U4" (ldsr_interrupt.mask1)
935		);
936		ldsr_lock_release();
937		thread_suspend();
938
939		/*
940		 * Read the interrupt status registers
941		 */
942		asm volatile (
943			"move.4 %0, INT_STAT0	\n\t"
944			"move.4 %1, INT_STAT1	\n\t"
945			: "=r" (stat0), "=r" (stat1)
946			:
947		);
948
949		/*
950		 * We only care about interrupts that we have been told to care
951		 * about.  The interrupt must be enabled, unmasked, and have
952		 * occurred in the hardware.
953		 */
954		ldsr_lock_acquire();
955		interrupt0 = ldsr_interrupt.enabled0 &
956			ldsr_interrupt.mask0 & stat0;
957		interrupt1 = ldsr_interrupt.enabled1 &
958			ldsr_interrupt.mask1 & stat1;
959		ldsr_lock_release();
960
961		/*
962		 * For each interrupt in the "snapshot" we will mask the
963		 * interrupt handle the interrupt (typically calling do_IRQ()).
964		 *
965		 * The interrupt is unmasked by desc->chip->end() function in
966		 * the per chip generic interrupt handling code
967		 * (arch/ubicom32/kernel/irq.c).8
968		 */
969		interrupts = ((unsigned long long)interrupt1 << 32) |
970			interrupt0;
971		while (interrupts) {
972			int all = 0;
973			int vector = ldsr_rotate_interrupts(interrupts);
974			interrupts &= ~((unsigned long long)1 << vector);
975
976			/*
977			 * Now mask off this vector so that the LDSR ignores
978			 * it until it is acknowledged.
979			 */
980			ldsr_mask_vector(vector);
981#if !defined(CONFIG_SMP)
982			ldsr_deliver_interrupt(vector, cpus, all);
983#else
984			cpus = smp_get_affinity(vector, &all);
985			if (!cpus) {
986				/*
987				 * No CPU to deliver to so just leave
988				 * the interrupt unmasked and increase
989				 * the backout count.  We will eventually
990				 * return and deliver it again.
991				 */
992				ldsr_unmask_vector(vector);
993				ldsr_interrupt.backout++;
994				continue;
995			}
996			ldsr_deliver_interrupt(vector, cpus, all);
997#endif
998		}
999	}
1000
1001	/* NOTREACHED */
1002}
1003
1004/*
1005 * ldsr_mask_vector()
1006 *	Temporarily mask the interrupt vector, turn off the bit in the mask
1007 *	register.
1008 */
1009void ldsr_mask_vector(unsigned int vector)
1010{
1011	unsigned int mask;
1012	if (vector < 32) {
1013		mask = ~(1 << vector);
1014		ldsr_lock_acquire();
1015		ldsr_interrupt.mask0 &= mask;
1016		ldsr_lock_release();
1017		thread_resume(ldsr_tid);
1018		return;
1019	}
1020
1021	mask = ~(1 << (vector - 32));
1022	ldsr_lock_acquire();
1023	ldsr_interrupt.mask1 &= mask;
1024	ldsr_lock_release();
1025	thread_resume(ldsr_tid);
1026}
1027
1028/*
1029 * ldsr_unmask_vector()
1030 *	Unmask the interrupt vector so that it can be used, turn on the bit in
1031 *	the mask register.
1032 *
1033 * Because it is legal for the interrupt path to disable an interrupt,
1034 * the unmasking code must ensure that disabled interrupts are not
1035 * unmasked.
1036 */
1037void ldsr_unmask_vector(unsigned int vector)
1038{
1039	unsigned int mask;
1040	if (vector < 32) {
1041		mask = (1 << vector);
1042		ldsr_lock_acquire();
1043		ldsr_interrupt.mask0 |= (mask & ldsr_interrupt.enabled0);
1044		ldsr_lock_release();
1045		thread_resume(ldsr_tid);
1046		return;
1047	}
1048
1049	mask = (1 << (vector - 32));
1050	ldsr_lock_acquire();
1051	ldsr_interrupt.mask1 |= (mask & ldsr_interrupt.enabled1);
1052	ldsr_lock_release();
1053	thread_resume(ldsr_tid);
1054}
1055
1056/*
1057 * ldsr_enable_vector()
1058 *	The LDSR implements an interrupt controller and has a local (to the
1059 *	LDSR) copy of its interrupt mask.
1060 */
1061void ldsr_enable_vector(unsigned int vector)
1062{
1063	unsigned int mask;
1064	if (vector < 32) {
1065		mask = (1 << vector);
1066		ldsr_lock_acquire();
1067		ldsr_interrupt.enabled0 |= mask;
1068		ldsr_interrupt.mask0 |= mask;
1069		ldsr_lock_release();
1070		thread_resume(ldsr_tid);
1071		return;
1072	}
1073
1074	mask = (1 << (vector - 32));
1075	ldsr_lock_acquire();
1076	ldsr_interrupt.enabled1 |= mask;
1077	ldsr_interrupt.mask1 |= mask;
1078	ldsr_lock_release();
1079	thread_resume(ldsr_tid);
1080}
1081
1082/*
1083 * ldsr_disable_vector()
1084 *	The LDSR implements an interrupt controller and has a local (to the
1085 *	LDSR) copy of its interrupt mask.
1086 */
1087void ldsr_disable_vector(unsigned int vector)
1088{
1089	unsigned int mask;
1090
1091	if (vector < 32) {
1092		mask = ~(1 << vector);
1093		ldsr_lock_acquire();
1094		ldsr_interrupt.enabled0 &= mask;
1095		ldsr_interrupt.mask0 &= mask;
1096		ldsr_lock_release();
1097		thread_resume(ldsr_tid);
1098		return;
1099	}
1100
1101	mask = ~(1 << (vector - 32));
1102	ldsr_lock_acquire();
1103	ldsr_interrupt.enabled1 &= mask;
1104	ldsr_interrupt.mask1 &= mask;
1105	ldsr_lock_release();
1106	thread_resume(ldsr_tid);
1107}
1108
1109/*
1110 * ldsr_get_threadid()
1111 *	Return the threadid of the LDSR thread.
1112 */
1113thread_t ldsr_get_threadid(void)
1114{
1115	return ldsr_tid;
1116}
1117
1118/*
1119 * ldsr_set_trap_irq()
1120 *	Save away the trap Soft IRQ
1121 *
1122 * See the per thread lock suspend code above for an explination.
1123 */
1124void ldsr_set_trap_irq(unsigned int irq)
1125{
1126	ldsr_trap_irq = irq;
1127	ldsr_trap_irq_mask = (1 << irq);
1128	ldsr_suspend_mask |= ldsr_trap_irq_mask;
1129}
1130
1131/*
1132 * ldsr_init()
1133 *	Initialize the LDSR (Interrupt Controller)
1134 */
1135void ldsr_init(void)
1136{
1137#if defined(CONFIG_IRQSTACKS)
1138	int i;
1139	union irq_ctx *icp;
1140#endif
1141
1142	void *stack_high = (void *)ldsr_stack_space;
1143	stack_high += sizeof(ldsr_stack_space);
1144	stack_high -= 8;
1145
1146
1147	/*
1148	 * Obtain a soft IRQ to use
1149	 */
1150	if (irq_soft_alloc(&ldsr_soft_irq) < 0) {
1151		panic("no software IRQ is available\n");
1152		return;
1153	}
1154	ldsr_soft_irq_mask |= (1 << ldsr_soft_irq);
1155	ldsr_suspend_mask |= ldsr_soft_irq_mask;
1156
1157	/*
1158	 * Now allocate and start the LDSR thread.
1159	 */
1160	ldsr_tid = thread_alloc();
1161	if (ldsr_tid < 0) {
1162		panic("no thread available to run LDSR");
1163		return;
1164	}
1165
1166#if defined(CONFIG_IRQSTACKS)
1167	/*
1168	 * Initialize the per-cpu irq thread_info structure that
1169	 * is at the top of each per-cpu irq stack.
1170	 */
1171	icp = (union irq_ctx *)
1172		(((unsigned long)percpu_irq_stacks + (THREAD_SIZE - 1)) & ~(THREAD_SIZE - 1));
1173	for (i = 0; i < NR_CPUS; i++) {
1174		struct thread_info *ti = &(icp->tinfo);
1175		ti->task = NULL;
1176		ti->exec_domain = NULL;
1177		ti->cpu = i;
1178		ti->preempt_count = 0;
1179		ti->interrupt_nesting = 0;
1180		percpu_irq_ctxs[i] = icp++;
1181	}
1182#endif
1183	thread_start(ldsr_tid, ldsr_thread, NULL,
1184		     stack_high, THREAD_TYPE_NORMAL);
1185}
1186