1/*
2 *	Intel IO-APIC support for multi-Pentium hosts.
3 *
4 *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 *	Many thanks to Stig Venaas for trying out countless experimental
7 *	patches and reporting/debugging problems patiently!
8 *
9 *	(c) 1999, Multiple IO-APIC support, developed by
10 *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 *	further tested and cleaned up by Zach Brown <zab@redhat.com>
13 *	and Ingo Molnar <mingo@redhat.com>
14 *
15 *	Fixes
16 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
17 *					thanks to Eric Gilmore
18 *					and Rolf G. Tews
19 *					for testing these extensively
20 *	Paul Diefenbaugh	:	Added full ACPI support
21 */
22
23#include <linux/mm.h>
24#include <linux/interrupt.h>
25#include <linux/init.h>
26#include <linux/delay.h>
27#include <linux/sched.h>
28#include <linux/pci.h>
29#include <linux/mc146818rtc.h>
30#include <linux/acpi.h>
31#include <linux/sysdev.h>
32#include <linux/msi.h>
33#include <linux/htirq.h>
34#ifdef CONFIG_ACPI
35#include <acpi/acpi_bus.h>
36#endif
37
38#include <asm/idle.h>
39#include <asm/io.h>
40#include <asm/smp.h>
41#include <asm/desc.h>
42#include <asm/proto.h>
43#include <asm/mach_apic.h>
44#include <asm/acpi.h>
45#include <asm/dma.h>
46#include <asm/nmi.h>
47#include <asm/msidef.h>
48#include <asm/hypertransport.h>
49
50struct irq_cfg {
51	cpumask_t domain;
52	cpumask_t old_domain;
53	unsigned move_cleanup_count;
54	u8 vector;
55	u8 move_in_progress : 1;
56};
57
58/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
59struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
60	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
61	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
62	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
63	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
64	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
65	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
66	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
67	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
68	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
69	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
70	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
71	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
72	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
73	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
74	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
75	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
76};
77
78static int assign_irq_vector(int irq, cpumask_t mask);
79
80#define __apicdebuginit  __init
81
82int sis_apic_bug; /* not actually supported, dummy for compile */
83
84static int no_timer_check;
85
86static int disable_timer_pin_1 __initdata;
87
88int timer_over_8254 __initdata = 1;
89
90/* Where if anywhere is the i8259 connect in external int mode */
91static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
92
93static DEFINE_SPINLOCK(ioapic_lock);
94DEFINE_SPINLOCK(vector_lock);
95
96/*
97 * # of IRQ routing registers
98 */
99int nr_ioapic_registers[MAX_IO_APICS];
100
101/*
102 * Rough estimation of how many shared IRQs there are, can
103 * be changed anytime.
104 */
105#define MAX_PLUS_SHARED_IRQS NR_IRQS
106#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
107
108/*
109 * This is performance-critical, we want to do it O(1)
110 *
111 * the indexing order of this array favors 1:1 mappings
112 * between pins and IRQs.
113 */
114
115static struct irq_pin_list {
116	short apic, pin, next;
117} irq_2_pin[PIN_MAP_SIZE];
118
119struct io_apic {
120	unsigned int index;
121	unsigned int unused[3];
122	unsigned int data;
123};
124
125static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
126{
127	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
128		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
129}
130
131static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
132{
133	struct io_apic __iomem *io_apic = io_apic_base(apic);
134	writel(reg, &io_apic->index);
135	return readl(&io_apic->data);
136}
137
138static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
139{
140	struct io_apic __iomem *io_apic = io_apic_base(apic);
141	writel(reg, &io_apic->index);
142	writel(value, &io_apic->data);
143}
144
145/*
146 * Re-write a value: to be used for read-modify-write
147 * cycles where the read already set up the index register.
148 */
149static inline void io_apic_modify(unsigned int apic, unsigned int value)
150{
151	struct io_apic __iomem *io_apic = io_apic_base(apic);
152	writel(value, &io_apic->data);
153}
154
155/*
156 * Synchronize the IO-APIC and the CPU by doing
157 * a dummy read from the IO-APIC
158 */
159static inline void io_apic_sync(unsigned int apic)
160{
161	struct io_apic __iomem *io_apic = io_apic_base(apic);
162	readl(&io_apic->data);
163}
164
165#define __DO_ACTION(R, ACTION, FINAL)					\
166									\
167{									\
168	int pin;							\
169	struct irq_pin_list *entry = irq_2_pin + irq;			\
170									\
171	BUG_ON(irq >= NR_IRQS);						\
172	for (;;) {							\
173		unsigned int reg;					\
174		pin = entry->pin;					\
175		if (pin == -1)						\
176			break;						\
177		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
178		reg ACTION;						\
179		io_apic_modify(entry->apic, reg);			\
180		FINAL;							\
181		if (!entry->next)					\
182			break;						\
183		entry = irq_2_pin + entry->next;			\
184	}								\
185}
186
187union entry_union {
188	struct { u32 w1, w2; };
189	struct IO_APIC_route_entry entry;
190};
191
192static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
193{
194	union entry_union eu;
195	unsigned long flags;
196	spin_lock_irqsave(&ioapic_lock, flags);
197	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
198	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
199	spin_unlock_irqrestore(&ioapic_lock, flags);
200	return eu.entry;
201}
202
203/*
204 * When we write a new IO APIC routing entry, we need to write the high
205 * word first! If the mask bit in the low word is clear, we will enable
206 * the interrupt, and we need to make sure the entry is fully populated
207 * before that happens.
208 */
209static void
210__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
211{
212	union entry_union eu;
213	eu.entry = e;
214	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
215	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
216}
217
218static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
219{
220	unsigned long flags;
221	spin_lock_irqsave(&ioapic_lock, flags);
222	__ioapic_write_entry(apic, pin, e);
223	spin_unlock_irqrestore(&ioapic_lock, flags);
224}
225
226/*
227 * When we mask an IO APIC routing entry, we need to write the low
228 * word first, in order to set the mask bit before we change the
229 * high bits!
230 */
231static void ioapic_mask_entry(int apic, int pin)
232{
233	unsigned long flags;
234	union entry_union eu = { .entry.mask = 1 };
235
236	spin_lock_irqsave(&ioapic_lock, flags);
237	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
238	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
239	spin_unlock_irqrestore(&ioapic_lock, flags);
240}
241
242#ifdef CONFIG_SMP
243static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
244{
245	int apic, pin;
246	struct irq_pin_list *entry = irq_2_pin + irq;
247
248	BUG_ON(irq >= NR_IRQS);
249	for (;;) {
250		unsigned int reg;
251		apic = entry->apic;
252		pin = entry->pin;
253		if (pin == -1)
254			break;
255		io_apic_write(apic, 0x11 + pin*2, dest);
256		reg = io_apic_read(apic, 0x10 + pin*2);
257		reg &= ~0x000000ff;
258		reg |= vector;
259		io_apic_modify(apic, reg);
260		if (!entry->next)
261			break;
262		entry = irq_2_pin + entry->next;
263	}
264}
265
266static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
267{
268	struct irq_cfg *cfg = irq_cfg + irq;
269	unsigned long flags;
270	unsigned int dest;
271	cpumask_t tmp;
272
273	cpus_and(tmp, mask, cpu_online_map);
274	if (cpus_empty(tmp))
275		return;
276
277	if (assign_irq_vector(irq, mask))
278		return;
279
280	cpus_and(tmp, cfg->domain, mask);
281	dest = cpu_mask_to_apicid(tmp);
282
283	/*
284	 * Only the high 8 bits are valid.
285	 */
286	dest = SET_APIC_LOGICAL_ID(dest);
287
288	spin_lock_irqsave(&ioapic_lock, flags);
289	__target_IO_APIC_irq(irq, dest, cfg->vector);
290	irq_desc[irq].affinity = mask;
291	spin_unlock_irqrestore(&ioapic_lock, flags);
292}
293#endif
294
295/*
296 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
297 * shared ISA-space IRQs, so we have to support them. We are super
298 * fast in the common case, and fast for shared ISA-space IRQs.
299 */
300static void add_pin_to_irq(unsigned int irq, int apic, int pin)
301{
302	static int first_free_entry = NR_IRQS;
303	struct irq_pin_list *entry = irq_2_pin + irq;
304
305	BUG_ON(irq >= NR_IRQS);
306	while (entry->next)
307		entry = irq_2_pin + entry->next;
308
309	if (entry->pin != -1) {
310		entry->next = first_free_entry;
311		entry = irq_2_pin + entry->next;
312		if (++first_free_entry >= PIN_MAP_SIZE)
313			panic("io_apic.c: ran out of irq_2_pin entries!");
314	}
315	entry->apic = apic;
316	entry->pin = pin;
317}
318
319
320#define DO_ACTION(name,R,ACTION, FINAL)					\
321									\
322	static void name##_IO_APIC_irq (unsigned int irq)		\
323	__DO_ACTION(R, ACTION, FINAL)
324
325DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
326						/* mask = 1 */
327DO_ACTION( __unmask,           0, &= 0xfffeffff, )
328						/* mask = 0 */
329
330static void mask_IO_APIC_irq (unsigned int irq)
331{
332	unsigned long flags;
333
334	spin_lock_irqsave(&ioapic_lock, flags);
335	__mask_IO_APIC_irq(irq);
336	spin_unlock_irqrestore(&ioapic_lock, flags);
337}
338
339static void unmask_IO_APIC_irq (unsigned int irq)
340{
341	unsigned long flags;
342
343	spin_lock_irqsave(&ioapic_lock, flags);
344	__unmask_IO_APIC_irq(irq);
345	spin_unlock_irqrestore(&ioapic_lock, flags);
346}
347
348static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
349{
350	struct IO_APIC_route_entry entry;
351
352	/* Check delivery_mode to be sure we're not clearing an SMI pin */
353	entry = ioapic_read_entry(apic, pin);
354	if (entry.delivery_mode == dest_SMI)
355		return;
356	/*
357	 * Disable it in the IO-APIC irq-routing table:
358	 */
359	ioapic_mask_entry(apic, pin);
360}
361
362static void clear_IO_APIC (void)
363{
364	int apic, pin;
365
366	for (apic = 0; apic < nr_ioapics; apic++)
367		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
368			clear_IO_APIC_pin(apic, pin);
369}
370
371int skip_ioapic_setup;
372int ioapic_force;
373
374/* dummy parsing: see setup.c */
375
376static int __init disable_ioapic_setup(char *str)
377{
378	skip_ioapic_setup = 1;
379	return 0;
380}
381early_param("noapic", disable_ioapic_setup);
382
383/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
384static int __init disable_timer_pin_setup(char *arg)
385{
386	disable_timer_pin_1 = 1;
387	return 1;
388}
389__setup("disable_timer_pin_1", disable_timer_pin_setup);
390
391static int __init setup_disable_8254_timer(char *s)
392{
393	timer_over_8254 = -1;
394	return 1;
395}
396static int __init setup_enable_8254_timer(char *s)
397{
398	timer_over_8254 = 2;
399	return 1;
400}
401
402__setup("disable_8254_timer", setup_disable_8254_timer);
403__setup("enable_8254_timer", setup_enable_8254_timer);
404
405
406/*
407 * Find the IRQ entry number of a certain pin.
408 */
409static int find_irq_entry(int apic, int pin, int type)
410{
411	int i;
412
413	for (i = 0; i < mp_irq_entries; i++)
414		if (mp_irqs[i].mpc_irqtype == type &&
415		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
416		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
417		    mp_irqs[i].mpc_dstirq == pin)
418			return i;
419
420	return -1;
421}
422
423/*
424 * Find the pin to which IRQ[irq] (ISA) is connected
425 */
426static int __init find_isa_irq_pin(int irq, int type)
427{
428	int i;
429
430	for (i = 0; i < mp_irq_entries; i++) {
431		int lbus = mp_irqs[i].mpc_srcbus;
432
433		if (test_bit(lbus, mp_bus_not_pci) &&
434		    (mp_irqs[i].mpc_irqtype == type) &&
435		    (mp_irqs[i].mpc_srcbusirq == irq))
436
437			return mp_irqs[i].mpc_dstirq;
438	}
439	return -1;
440}
441
442static int __init find_isa_irq_apic(int irq, int type)
443{
444	int i;
445
446	for (i = 0; i < mp_irq_entries; i++) {
447		int lbus = mp_irqs[i].mpc_srcbus;
448
449		if (test_bit(lbus, mp_bus_not_pci) &&
450		    (mp_irqs[i].mpc_irqtype == type) &&
451		    (mp_irqs[i].mpc_srcbusirq == irq))
452			break;
453	}
454	if (i < mp_irq_entries) {
455		int apic;
456		for(apic = 0; apic < nr_ioapics; apic++) {
457			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
458				return apic;
459		}
460	}
461
462	return -1;
463}
464
465/*
466 * Find a specific PCI IRQ entry.
467 * Not an __init, possibly needed by modules
468 */
469static int pin_2_irq(int idx, int apic, int pin);
470
471int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
472{
473	int apic, i, best_guess = -1;
474
475	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
476		bus, slot, pin);
477	if (mp_bus_id_to_pci_bus[bus] == -1) {
478		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
479		return -1;
480	}
481	for (i = 0; i < mp_irq_entries; i++) {
482		int lbus = mp_irqs[i].mpc_srcbus;
483
484		for (apic = 0; apic < nr_ioapics; apic++)
485			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
486			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
487				break;
488
489		if (!test_bit(lbus, mp_bus_not_pci) &&
490		    !mp_irqs[i].mpc_irqtype &&
491		    (bus == lbus) &&
492		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
493			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
494
495			if (!(apic || IO_APIC_IRQ(irq)))
496				continue;
497
498			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
499				return irq;
500			/*
501			 * Use the first all-but-pin matching entry as a
502			 * best-guess fuzzy result for broken mptables.
503			 */
504			if (best_guess < 0)
505				best_guess = irq;
506		}
507	}
508	BUG_ON(best_guess >= NR_IRQS);
509	return best_guess;
510}
511
512/* ISA interrupts are always polarity zero edge triggered,
513 * when listed as conforming in the MP table. */
514
515#define default_ISA_trigger(idx)	(0)
516#define default_ISA_polarity(idx)	(0)
517
518/* PCI interrupts are always polarity one level triggered,
519 * when listed as conforming in the MP table. */
520
521#define default_PCI_trigger(idx)	(1)
522#define default_PCI_polarity(idx)	(1)
523
524static int __init MPBIOS_polarity(int idx)
525{
526	int bus = mp_irqs[idx].mpc_srcbus;
527	int polarity;
528
529	/*
530	 * Determine IRQ line polarity (high active or low active):
531	 */
532	switch (mp_irqs[idx].mpc_irqflag & 3)
533	{
534		case 0: /* conforms, ie. bus-type dependent polarity */
535			if (test_bit(bus, mp_bus_not_pci))
536				polarity = default_ISA_polarity(idx);
537			else
538				polarity = default_PCI_polarity(idx);
539			break;
540		case 1: /* high active */
541		{
542			polarity = 0;
543			break;
544		}
545		case 2: /* reserved */
546		{
547			printk(KERN_WARNING "broken BIOS!!\n");
548			polarity = 1;
549			break;
550		}
551		case 3: /* low active */
552		{
553			polarity = 1;
554			break;
555		}
556		default: /* invalid */
557		{
558			printk(KERN_WARNING "broken BIOS!!\n");
559			polarity = 1;
560			break;
561		}
562	}
563	return polarity;
564}
565
566static int MPBIOS_trigger(int idx)
567{
568	int bus = mp_irqs[idx].mpc_srcbus;
569	int trigger;
570
571	/*
572	 * Determine IRQ trigger mode (edge or level sensitive):
573	 */
574	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
575	{
576		case 0: /* conforms, ie. bus-type dependent */
577			if (test_bit(bus, mp_bus_not_pci))
578				trigger = default_ISA_trigger(idx);
579			else
580				trigger = default_PCI_trigger(idx);
581			break;
582		case 1: /* edge */
583		{
584			trigger = 0;
585			break;
586		}
587		case 2: /* reserved */
588		{
589			printk(KERN_WARNING "broken BIOS!!\n");
590			trigger = 1;
591			break;
592		}
593		case 3: /* level */
594		{
595			trigger = 1;
596			break;
597		}
598		default: /* invalid */
599		{
600			printk(KERN_WARNING "broken BIOS!!\n");
601			trigger = 0;
602			break;
603		}
604	}
605	return trigger;
606}
607
608static inline int irq_polarity(int idx)
609{
610	return MPBIOS_polarity(idx);
611}
612
613static inline int irq_trigger(int idx)
614{
615	return MPBIOS_trigger(idx);
616}
617
618static int pin_2_irq(int idx, int apic, int pin)
619{
620	int irq, i;
621	int bus = mp_irqs[idx].mpc_srcbus;
622
623	/*
624	 * Debugging check, we are in big trouble if this message pops up!
625	 */
626	if (mp_irqs[idx].mpc_dstirq != pin)
627		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
628
629	if (test_bit(bus, mp_bus_not_pci)) {
630		irq = mp_irqs[idx].mpc_srcbusirq;
631	} else {
632		/*
633		 * PCI IRQs are mapped in order
634		 */
635		i = irq = 0;
636		while (i < apic)
637			irq += nr_ioapic_registers[i++];
638		irq += pin;
639	}
640	BUG_ON(irq >= NR_IRQS);
641	return irq;
642}
643
644static int __assign_irq_vector(int irq, cpumask_t mask)
645{
646	/*
647	 * NOTE! The local APIC isn't very good at handling
648	 * multiple interrupts at the same interrupt level.
649	 * As the interrupt level is determined by taking the
650	 * vector number and shifting that right by 4, we
651	 * want to spread these out a bit so that they don't
652	 * all fall in the same interrupt level.
653	 *
654	 * Also, we've got to be careful not to trash gate
655	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
656	 */
657	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
658	unsigned int old_vector;
659	int cpu;
660	struct irq_cfg *cfg;
661
662	BUG_ON((unsigned)irq >= NR_IRQS);
663	cfg = &irq_cfg[irq];
664
665	/* Only try and allocate irqs on cpus that are present */
666	cpus_and(mask, mask, cpu_online_map);
667
668	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
669		return -EBUSY;
670
671	old_vector = cfg->vector;
672	if (old_vector) {
673		cpumask_t tmp;
674		cpus_and(tmp, cfg->domain, mask);
675		if (!cpus_empty(tmp))
676			return 0;
677	}
678
679	for_each_cpu_mask(cpu, mask) {
680		cpumask_t domain, new_mask;
681		int new_cpu;
682		int vector, offset;
683
684		domain = vector_allocation_domain(cpu);
685		cpus_and(new_mask, domain, cpu_online_map);
686
687		vector = current_vector;
688		offset = current_offset;
689next:
690		vector += 8;
691		if (vector >= FIRST_SYSTEM_VECTOR) {
692			/* If we run out of vectors on large boxen, must share them. */
693			offset = (offset + 1) % 8;
694			vector = FIRST_DEVICE_VECTOR + offset;
695		}
696		if (unlikely(current_vector == vector))
697			continue;
698		if (vector == IA32_SYSCALL_VECTOR)
699			goto next;
700		for_each_cpu_mask(new_cpu, new_mask)
701			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
702				goto next;
703		/* Found one! */
704		current_vector = vector;
705		current_offset = offset;
706		if (old_vector) {
707			cfg->move_in_progress = 1;
708			cfg->old_domain = cfg->domain;
709		}
710		for_each_cpu_mask(new_cpu, new_mask)
711			per_cpu(vector_irq, new_cpu)[vector] = irq;
712		cfg->vector = vector;
713		cfg->domain = domain;
714		return 0;
715	}
716	return -ENOSPC;
717}
718
719static int assign_irq_vector(int irq, cpumask_t mask)
720{
721	int err;
722	unsigned long flags;
723
724	spin_lock_irqsave(&vector_lock, flags);
725	err = __assign_irq_vector(irq, mask);
726	spin_unlock_irqrestore(&vector_lock, flags);
727	return err;
728}
729
730static void __clear_irq_vector(int irq)
731{
732	struct irq_cfg *cfg;
733	cpumask_t mask;
734	int cpu, vector;
735
736	BUG_ON((unsigned)irq >= NR_IRQS);
737	cfg = &irq_cfg[irq];
738	BUG_ON(!cfg->vector);
739
740	vector = cfg->vector;
741	cpus_and(mask, cfg->domain, cpu_online_map);
742	for_each_cpu_mask(cpu, mask)
743		per_cpu(vector_irq, cpu)[vector] = -1;
744
745	cfg->vector = 0;
746	cfg->domain = CPU_MASK_NONE;
747}
748
749void __setup_vector_irq(int cpu)
750{
751	/* Initialize vector_irq on a new cpu */
752	/* This function must be called with vector_lock held */
753	int irq, vector;
754
755	/* Mark the inuse vectors */
756	for (irq = 0; irq < NR_IRQS; ++irq) {
757		if (!cpu_isset(cpu, irq_cfg[irq].domain))
758			continue;
759		vector = irq_cfg[irq].vector;
760		per_cpu(vector_irq, cpu)[vector] = irq;
761	}
762	/* Mark the free vectors */
763	for (vector = 0; vector < NR_VECTORS; ++vector) {
764		irq = per_cpu(vector_irq, cpu)[vector];
765		if (irq < 0)
766			continue;
767		if (!cpu_isset(cpu, irq_cfg[irq].domain))
768			per_cpu(vector_irq, cpu)[vector] = -1;
769	}
770}
771
772
773static struct irq_chip ioapic_chip;
774
775static void ioapic_register_intr(int irq, unsigned long trigger)
776{
777	if (trigger)
778		set_irq_chip_and_handler_name(irq, &ioapic_chip,
779					      handle_fasteoi_irq, "fasteoi");
780	else
781		set_irq_chip_and_handler_name(irq, &ioapic_chip,
782					      handle_edge_irq, "edge");
783}
784
785static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
786			      int trigger, int polarity)
787{
788	struct irq_cfg *cfg = irq_cfg + irq;
789	struct IO_APIC_route_entry entry;
790	cpumask_t mask;
791
792	if (!IO_APIC_IRQ(irq))
793		return;
794
795	mask = TARGET_CPUS;
796	if (assign_irq_vector(irq, mask))
797		return;
798
799	cpus_and(mask, cfg->domain, mask);
800
801	apic_printk(APIC_VERBOSE,KERN_DEBUG
802		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
803		    "IRQ %d Mode:%i Active:%i)\n",
804		    apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
805		    irq, trigger, polarity);
806
807	/*
808	 * add it to the IO-APIC irq-routing table:
809	 */
810	memset(&entry,0,sizeof(entry));
811
812	entry.delivery_mode = INT_DELIVERY_MODE;
813	entry.dest_mode = INT_DEST_MODE;
814	entry.dest = cpu_mask_to_apicid(mask);
815	entry.mask = 0;				/* enable IRQ */
816	entry.trigger = trigger;
817	entry.polarity = polarity;
818	entry.vector = cfg->vector;
819
820	/* Mask level triggered irqs.
821	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
822	 */
823	if (trigger)
824		entry.mask = 1;
825
826	ioapic_register_intr(irq, trigger);
827	if (irq < 16)
828		disable_8259A_irq(irq);
829
830	ioapic_write_entry(apic, pin, entry);
831}
832
833static void __init setup_IO_APIC_irqs(void)
834{
835	int apic, pin, idx, irq, first_notcon = 1;
836
837	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
838
839	for (apic = 0; apic < nr_ioapics; apic++) {
840	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
841
842		idx = find_irq_entry(apic,pin,mp_INT);
843		if (idx == -1) {
844			if (first_notcon) {
845				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
846				first_notcon = 0;
847			} else
848				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
849			continue;
850		}
851
852		irq = pin_2_irq(idx, apic, pin);
853		add_pin_to_irq(irq, apic, pin);
854
855		setup_IO_APIC_irq(apic, pin, irq,
856				  irq_trigger(idx), irq_polarity(idx));
857	}
858	}
859
860	if (!first_notcon)
861		apic_printk(APIC_VERBOSE," not connected.\n");
862}
863
864/*
865 * Set up the 8259A-master output pin as broadcast to all
866 * CPUs.
867 */
868static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
869{
870	struct IO_APIC_route_entry entry;
871	unsigned long flags;
872
873	memset(&entry,0,sizeof(entry));
874
875	disable_8259A_irq(0);
876
877	/* mask LVT0 */
878	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
879
880	/*
881	 * We use logical delivery to get the timer IRQ
882	 * to the first CPU.
883	 */
884	entry.dest_mode = INT_DEST_MODE;
885	entry.mask = 0;					/* unmask IRQ now */
886	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
887	entry.delivery_mode = INT_DELIVERY_MODE;
888	entry.polarity = 0;
889	entry.trigger = 0;
890	entry.vector = vector;
891
892	/*
893	 * The timer IRQ doesn't have to know that behind the
894	 * scene we have a 8259A-master in AEOI mode ...
895	 */
896	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
897
898	/*
899	 * Add it to the IO-APIC irq-routing table:
900	 */
901	spin_lock_irqsave(&ioapic_lock, flags);
902	io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
903	io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
904	spin_unlock_irqrestore(&ioapic_lock, flags);
905
906	enable_8259A_irq(0);
907}
908
909void __apicdebuginit print_IO_APIC(void)
910{
911	int apic, i;
912	union IO_APIC_reg_00 reg_00;
913	union IO_APIC_reg_01 reg_01;
914	union IO_APIC_reg_02 reg_02;
915	unsigned long flags;
916
917	if (apic_verbosity == APIC_QUIET)
918		return;
919
920	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
921	for (i = 0; i < nr_ioapics; i++)
922		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
923		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
924
925	/*
926	 * We are a bit conservative about what we expect.  We have to
927	 * know about every hardware change ASAP.
928	 */
929	printk(KERN_INFO "testing the IO APIC.......................\n");
930
931	for (apic = 0; apic < nr_ioapics; apic++) {
932
933	spin_lock_irqsave(&ioapic_lock, flags);
934	reg_00.raw = io_apic_read(apic, 0);
935	reg_01.raw = io_apic_read(apic, 1);
936	if (reg_01.bits.version >= 0x10)
937		reg_02.raw = io_apic_read(apic, 2);
938	spin_unlock_irqrestore(&ioapic_lock, flags);
939
940	printk("\n");
941	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
942	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
943	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
944
945	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
946	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
947
948	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
949	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
950
951	if (reg_01.bits.version >= 0x10) {
952		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
953		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
954	}
955
956	printk(KERN_DEBUG ".... IRQ redirection table:\n");
957
958	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
959			  " Stat Dmod Deli Vect:   \n");
960
961	for (i = 0; i <= reg_01.bits.entries; i++) {
962		struct IO_APIC_route_entry entry;
963
964		entry = ioapic_read_entry(apic, i);
965
966		printk(KERN_DEBUG " %02x %03X ",
967			i,
968			entry.dest
969		);
970
971		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
972			entry.mask,
973			entry.trigger,
974			entry.irr,
975			entry.polarity,
976			entry.delivery_status,
977			entry.dest_mode,
978			entry.delivery_mode,
979			entry.vector
980		);
981	}
982	}
983	printk(KERN_DEBUG "IRQ to pin mappings:\n");
984	for (i = 0; i < NR_IRQS; i++) {
985		struct irq_pin_list *entry = irq_2_pin + i;
986		if (entry->pin < 0)
987			continue;
988		printk(KERN_DEBUG "IRQ%d ", i);
989		for (;;) {
990			printk("-> %d:%d", entry->apic, entry->pin);
991			if (!entry->next)
992				break;
993			entry = irq_2_pin + entry->next;
994		}
995		printk("\n");
996	}
997
998	printk(KERN_INFO ".................................... done.\n");
999
1000	return;
1001}
1002
1003
1004static void __init enable_IO_APIC(void)
1005{
1006	union IO_APIC_reg_01 reg_01;
1007	int i8259_apic, i8259_pin;
1008	int i, apic;
1009	unsigned long flags;
1010
1011	for (i = 0; i < PIN_MAP_SIZE; i++) {
1012		irq_2_pin[i].pin = -1;
1013		irq_2_pin[i].next = 0;
1014	}
1015
1016	/*
1017	 * The number of IO-APIC IRQ registers (== #pins):
1018	 */
1019	for (apic = 0; apic < nr_ioapics; apic++) {
1020		spin_lock_irqsave(&ioapic_lock, flags);
1021		reg_01.raw = io_apic_read(apic, 1);
1022		spin_unlock_irqrestore(&ioapic_lock, flags);
1023		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1024	}
1025	for(apic = 0; apic < nr_ioapics; apic++) {
1026		int pin;
1027		/* See if any of the pins is in ExtINT mode */
1028		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1029			struct IO_APIC_route_entry entry;
1030			entry = ioapic_read_entry(apic, pin);
1031
1032			/* If the interrupt line is enabled and in ExtInt mode
1033			 * I have found the pin where the i8259 is connected.
1034			 */
1035			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1036				ioapic_i8259.apic = apic;
1037				ioapic_i8259.pin  = pin;
1038				goto found_i8259;
1039			}
1040		}
1041	}
1042 found_i8259:
1043	/* Look to see what if the MP table has reported the ExtINT */
1044	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
1045	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1046	/* Trust the MP table if nothing is setup in the hardware */
1047	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1048		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1049		ioapic_i8259.pin  = i8259_pin;
1050		ioapic_i8259.apic = i8259_apic;
1051	}
1052	/* Complain if the MP table and the hardware disagree */
1053	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1054		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1055	{
1056		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1057	}
1058
1059	/*
1060	 * Do not trust the IO-APIC being empty at bootup
1061	 */
1062	clear_IO_APIC();
1063}
1064
1065/*
1066 * Not an __init, needed by the reboot code
1067 */
1068void disable_IO_APIC(void)
1069{
1070	/*
1071	 * Clear the IO-APIC before rebooting:
1072	 */
1073	clear_IO_APIC();
1074
1075	/*
1076	 * If the i8259 is routed through an IOAPIC
1077	 * Put that IOAPIC in virtual wire mode
1078	 * so legacy interrupts can be delivered.
1079	 */
1080	if (ioapic_i8259.pin != -1) {
1081		struct IO_APIC_route_entry entry;
1082
1083		memset(&entry, 0, sizeof(entry));
1084		entry.mask            = 0; /* Enabled */
1085		entry.trigger         = 0; /* Edge */
1086		entry.irr             = 0;
1087		entry.polarity        = 0; /* High */
1088		entry.delivery_status = 0;
1089		entry.dest_mode       = 0; /* Physical */
1090		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
1091		entry.vector          = 0;
1092		entry.dest          = GET_APIC_ID(apic_read(APIC_ID));
1093
1094		/*
1095		 * Add it to the IO-APIC irq-routing table:
1096		 */
1097		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1098	}
1099
1100	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1101}
1102
1103static int __init timer_irq_works(void)
1104{
1105	unsigned long t1 = jiffies;
1106
1107	local_irq_enable();
1108	/* Let ten ticks pass... */
1109	mdelay((10 * 1000) / HZ);
1110
1111	/*
1112	 * Expect a few ticks at least, to be sure some possible
1113	 * glue logic does not lock up after one or two first
1114	 * ticks in a non-ExtINT mode.  Also the local APIC
1115	 * might have cached one ExtINT interrupt.  Finally, at
1116	 * least one tick may be lost due to delays.
1117	 */
1118
1119	/* jiffies wrap? */
1120	if (jiffies - t1 > 4)
1121		return 1;
1122	return 0;
1123}
1124
1125/*
1126 * In the SMP+IOAPIC case it might happen that there are an unspecified
1127 * number of pending IRQ events unhandled. These cases are very rare,
1128 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1129 * better to do it this way as thus we do not have to be aware of
1130 * 'pending' interrupts in the IRQ path, except at this point.
1131 */
1132/*
1133 * Edge triggered needs to resend any interrupt
1134 * that was delayed but this is now handled in the device
1135 * independent code.
1136 */
1137
1138/*
1139 * Starting up a edge-triggered IO-APIC interrupt is
1140 * nasty - we need to make sure that we get the edge.
1141 * If it is already asserted for some reason, we need
1142 * return 1 to indicate that is was pending.
1143 *
1144 * This is not complete - we should be able to fake
1145 * an edge even if it isn't on the 8259A...
1146 */
1147
1148static unsigned int startup_ioapic_irq(unsigned int irq)
1149{
1150	int was_pending = 0;
1151	unsigned long flags;
1152
1153	spin_lock_irqsave(&ioapic_lock, flags);
1154	if (irq < 16) {
1155		disable_8259A_irq(irq);
1156		if (i8259A_irq_pending(irq))
1157			was_pending = 1;
1158	}
1159	__unmask_IO_APIC_irq(irq);
1160	spin_unlock_irqrestore(&ioapic_lock, flags);
1161
1162	return was_pending;
1163}
1164
1165static int ioapic_retrigger_irq(unsigned int irq)
1166{
1167	struct irq_cfg *cfg = &irq_cfg[irq];
1168	cpumask_t mask;
1169	unsigned long flags;
1170
1171	spin_lock_irqsave(&vector_lock, flags);
1172	cpus_clear(mask);
1173	cpu_set(first_cpu(cfg->domain), mask);
1174
1175	send_IPI_mask(mask, cfg->vector);
1176	spin_unlock_irqrestore(&vector_lock, flags);
1177
1178	return 1;
1179}
1180
1181/*
1182 * Level and edge triggered IO-APIC interrupts need different handling,
1183 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1184 * handled with the level-triggered descriptor, but that one has slightly
1185 * more overhead. Level-triggered interrupts cannot be handled with the
1186 * edge-triggered handler, without risking IRQ storms and other ugly
1187 * races.
1188 */
1189
1190#ifdef CONFIG_SMP
1191asmlinkage void smp_irq_move_cleanup_interrupt(void)
1192{
1193	unsigned vector, me;
1194	ack_APIC_irq();
1195	exit_idle();
1196	irq_enter();
1197
1198	me = smp_processor_id();
1199	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
1200		unsigned int irq;
1201		struct irq_desc *desc;
1202		struct irq_cfg *cfg;
1203		irq = __get_cpu_var(vector_irq)[vector];
1204		if (irq >= NR_IRQS)
1205			continue;
1206
1207		desc = irq_desc + irq;
1208		cfg = irq_cfg + irq;
1209		spin_lock(&desc->lock);
1210		if (!cfg->move_cleanup_count)
1211			goto unlock;
1212
1213		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
1214			goto unlock;
1215
1216		__get_cpu_var(vector_irq)[vector] = -1;
1217		cfg->move_cleanup_count--;
1218unlock:
1219		spin_unlock(&desc->lock);
1220	}
1221
1222	irq_exit();
1223}
1224
1225static void irq_complete_move(unsigned int irq)
1226{
1227	struct irq_cfg *cfg = irq_cfg + irq;
1228	unsigned vector, me;
1229
1230	if (likely(!cfg->move_in_progress))
1231		return;
1232
1233	vector = ~get_irq_regs()->orig_rax;
1234	me = smp_processor_id();
1235	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
1236		cpumask_t cleanup_mask;
1237
1238		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1239		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1240		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1241		cfg->move_in_progress = 0;
1242	}
1243}
1244#else
1245static inline void irq_complete_move(unsigned int irq) {}
1246#endif
1247
1248static void ack_apic_edge(unsigned int irq)
1249{
1250	irq_complete_move(irq);
1251	move_native_irq(irq);
1252	ack_APIC_irq();
1253}
1254
1255static void ack_apic_level(unsigned int irq)
1256{
1257	int do_unmask_irq = 0;
1258
1259	irq_complete_move(irq);
1260#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
1261	/* If we are moving the irq we need to mask it */
1262	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
1263		do_unmask_irq = 1;
1264		mask_IO_APIC_irq(irq);
1265	}
1266#endif
1267
1268	/*
1269	 * We must acknowledge the irq before we move it or the acknowledge will
1270	 * not propagate properly.
1271	 */
1272	ack_APIC_irq();
1273
1274	/* Now we can move and renable the irq */
1275	move_masked_irq(irq);
1276	if (unlikely(do_unmask_irq))
1277		unmask_IO_APIC_irq(irq);
1278}
1279
1280static struct irq_chip ioapic_chip __read_mostly = {
1281	.name 		= "IO-APIC",
1282	.startup 	= startup_ioapic_irq,
1283	.mask	 	= mask_IO_APIC_irq,
1284	.unmask	 	= unmask_IO_APIC_irq,
1285	.ack 		= ack_apic_edge,
1286	.eoi 		= ack_apic_level,
1287#ifdef CONFIG_SMP
1288	.set_affinity 	= set_ioapic_affinity_irq,
1289#endif
1290	.retrigger	= ioapic_retrigger_irq,
1291};
1292
1293static inline void init_IO_APIC_traps(void)
1294{
1295	int irq;
1296
1297	/*
1298	 * NOTE! The local APIC isn't very good at handling
1299	 * multiple interrupts at the same interrupt level.
1300	 * As the interrupt level is determined by taking the
1301	 * vector number and shifting that right by 4, we
1302	 * want to spread these out a bit so that they don't
1303	 * all fall in the same interrupt level.
1304	 *
1305	 * Also, we've got to be careful not to trash gate
1306	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1307	 */
1308	for (irq = 0; irq < NR_IRQS ; irq++) {
1309		int tmp = irq;
1310		if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
1311			/*
1312			 * Hmm.. We don't have an entry for this,
1313			 * so default to an old-fashioned 8259
1314			 * interrupt if we can..
1315			 */
1316			if (irq < 16)
1317				make_8259A_irq(irq);
1318			else
1319				/* Strange. Oh, well.. */
1320				irq_desc[irq].chip = &no_irq_chip;
1321		}
1322	}
1323}
1324
1325static void enable_lapic_irq (unsigned int irq)
1326{
1327	unsigned long v;
1328
1329	v = apic_read(APIC_LVT0);
1330	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1331}
1332
1333static void disable_lapic_irq (unsigned int irq)
1334{
1335	unsigned long v;
1336
1337	v = apic_read(APIC_LVT0);
1338	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1339}
1340
1341static void ack_lapic_irq (unsigned int irq)
1342{
1343	ack_APIC_irq();
1344}
1345
1346static void end_lapic_irq (unsigned int i) { /* nothing */ }
1347
1348static struct hw_interrupt_type lapic_irq_type __read_mostly = {
1349	.name = "local-APIC",
1350	.typename = "local-APIC-edge",
1351	.startup = NULL, /* startup_irq() not used for IRQ0 */
1352	.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1353	.enable = enable_lapic_irq,
1354	.disable = disable_lapic_irq,
1355	.ack = ack_lapic_irq,
1356	.end = end_lapic_irq,
1357};
1358
1359static void setup_nmi (void)
1360{
1361	/*
1362 	 * Dirty trick to enable the NMI watchdog ...
1363	 * We put the 8259A master into AEOI mode and
1364	 * unmask on all local APICs LVT0 as NMI.
1365	 *
1366	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
1367	 * is from Maciej W. Rozycki - so we do not have to EOI from
1368	 * the NMI handler or the timer interrupt.
1369	 */
1370	printk(KERN_INFO "activating NMI Watchdog ...");
1371
1372	enable_NMI_through_LVT0(NULL);
1373
1374	printk(" done.\n");
1375}
1376
1377/*
1378 * This looks a bit hackish but it's about the only one way of sending
1379 * a few INTA cycles to 8259As and any associated glue logic.  ICR does
1380 * not support the ExtINT mode, unfortunately.  We need to send these
1381 * cycles as some i82489DX-based boards have glue logic that keeps the
1382 * 8259A interrupt line asserted until INTA.  --macro
1383 */
1384static inline void unlock_ExtINT_logic(void)
1385{
1386	int apic, pin, i;
1387	struct IO_APIC_route_entry entry0, entry1;
1388	unsigned char save_control, save_freq_select;
1389	unsigned long flags;
1390
1391	pin  = find_isa_irq_pin(8, mp_INT);
1392	apic = find_isa_irq_apic(8, mp_INT);
1393	if (pin == -1)
1394		return;
1395
1396	spin_lock_irqsave(&ioapic_lock, flags);
1397	*(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1398	*(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1399	spin_unlock_irqrestore(&ioapic_lock, flags);
1400	clear_IO_APIC_pin(apic, pin);
1401
1402	memset(&entry1, 0, sizeof(entry1));
1403
1404	entry1.dest_mode = 0;			/* physical delivery */
1405	entry1.mask = 0;			/* unmask IRQ now */
1406	entry1.dest = hard_smp_processor_id();
1407	entry1.delivery_mode = dest_ExtINT;
1408	entry1.polarity = entry0.polarity;
1409	entry1.trigger = 0;
1410	entry1.vector = 0;
1411
1412	spin_lock_irqsave(&ioapic_lock, flags);
1413	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
1414	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
1415	spin_unlock_irqrestore(&ioapic_lock, flags);
1416
1417	save_control = CMOS_READ(RTC_CONTROL);
1418	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
1419	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
1420		   RTC_FREQ_SELECT);
1421	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
1422
1423	i = 100;
1424	while (i-- > 0) {
1425		mdelay(10);
1426		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
1427			i -= 10;
1428	}
1429
1430	CMOS_WRITE(save_control, RTC_CONTROL);
1431	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
1432	clear_IO_APIC_pin(apic, pin);
1433
1434	spin_lock_irqsave(&ioapic_lock, flags);
1435	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
1436	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
1437	spin_unlock_irqrestore(&ioapic_lock, flags);
1438}
1439
1440static inline void check_timer(void)
1441{
1442	struct irq_cfg *cfg = irq_cfg + 0;
1443	int apic1, pin1, apic2, pin2;
1444
1445	/*
1446	 * get/set the timer IRQ vector:
1447	 */
1448	disable_8259A_irq(0);
1449	assign_irq_vector(0, TARGET_CPUS);
1450
1451	/*
1452	 * Subtle, code in do_timer_interrupt() expects an AEOI
1453	 * mode for the 8259A whenever interrupts are routed
1454	 * through I/O APICs.  Also IRQ0 has to be enabled in
1455	 * the 8259A which implies the virtual wire has to be
1456	 * disabled in the local APIC.
1457	 */
1458	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1459	init_8259A(1);
1460	if (timer_over_8254 > 0)
1461		enable_8259A_irq(0);
1462
1463	pin1  = find_isa_irq_pin(0, mp_INT);
1464	apic1 = find_isa_irq_apic(0, mp_INT);
1465	pin2  = ioapic_i8259.pin;
1466	apic2 = ioapic_i8259.apic;
1467
1468	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1469		cfg->vector, apic1, pin1, apic2, pin2);
1470
1471	if (pin1 != -1) {
1472		/*
1473		 * Ok, does IRQ0 through the IOAPIC work?
1474		 */
1475		unmask_IO_APIC_irq(0);
1476		if (!no_timer_check && timer_irq_works()) {
1477			nmi_watchdog_default();
1478			if (nmi_watchdog == NMI_IO_APIC) {
1479				disable_8259A_irq(0);
1480				setup_nmi();
1481				enable_8259A_irq(0);
1482			}
1483			if (disable_timer_pin_1 > 0)
1484				clear_IO_APIC_pin(0, pin1);
1485			return;
1486		}
1487		clear_IO_APIC_pin(apic1, pin1);
1488		apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
1489				"connected to IO-APIC\n");
1490	}
1491
1492	apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
1493				"through the 8259A ... ");
1494	if (pin2 != -1) {
1495		apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
1496			apic2, pin2);
1497		/*
1498		 * legacy devices should be connected to IO APIC #0
1499		 */
1500		setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
1501		if (timer_irq_works()) {
1502			apic_printk(APIC_VERBOSE," works.\n");
1503			nmi_watchdog_default();
1504			if (nmi_watchdog == NMI_IO_APIC) {
1505				setup_nmi();
1506			}
1507			return;
1508		}
1509		/*
1510		 * Cleanup, just in case ...
1511		 */
1512		clear_IO_APIC_pin(apic2, pin2);
1513	}
1514	apic_printk(APIC_VERBOSE," failed.\n");
1515
1516	if (nmi_watchdog == NMI_IO_APIC) {
1517		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
1518		nmi_watchdog = 0;
1519	}
1520
1521	apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1522
1523	disable_8259A_irq(0);
1524	irq_desc[0].chip = &lapic_irq_type;
1525	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
1526	enable_8259A_irq(0);
1527
1528	if (timer_irq_works()) {
1529		apic_printk(APIC_VERBOSE," works.\n");
1530		return;
1531	}
1532	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1533	apic_printk(APIC_VERBOSE," failed.\n");
1534
1535	apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
1536
1537	init_8259A(0);
1538	make_8259A_irq(0);
1539	apic_write(APIC_LVT0, APIC_DM_EXTINT);
1540
1541	unlock_ExtINT_logic();
1542
1543	if (timer_irq_works()) {
1544		apic_printk(APIC_VERBOSE," works.\n");
1545		return;
1546	}
1547	apic_printk(APIC_VERBOSE," failed :(.\n");
1548	panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
1549}
1550
1551static int __init notimercheck(char *s)
1552{
1553	no_timer_check = 1;
1554	return 1;
1555}
1556__setup("no_timer_check", notimercheck);
1557
1558/*
1559 *
1560 * IRQ's that are handled by the PIC in the MPS IOAPIC case.
1561 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
1562 *   Linux doesn't really care, as it's not actually used
1563 *   for any interrupt handling anyway.
1564 */
1565#define PIC_IRQS	(1<<2)
1566
1567void __init setup_IO_APIC(void)
1568{
1569	enable_IO_APIC();
1570
1571	if (acpi_ioapic)
1572		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
1573	else
1574		io_apic_irqs = ~PIC_IRQS;
1575
1576	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
1577
1578	sync_Arb_IDs();
1579	setup_IO_APIC_irqs();
1580	init_IO_APIC_traps();
1581	check_timer();
1582	if (!acpi_ioapic)
1583		print_IO_APIC();
1584}
1585
1586struct sysfs_ioapic_data {
1587	struct sys_device dev;
1588	struct IO_APIC_route_entry entry[0];
1589};
1590static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
1591
1592static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
1593{
1594	struct IO_APIC_route_entry *entry;
1595	struct sysfs_ioapic_data *data;
1596	int i;
1597
1598	data = container_of(dev, struct sysfs_ioapic_data, dev);
1599	entry = data->entry;
1600	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
1601		*entry = ioapic_read_entry(dev->id, i);
1602
1603	return 0;
1604}
1605
1606static int ioapic_resume(struct sys_device *dev)
1607{
1608	struct IO_APIC_route_entry *entry;
1609	struct sysfs_ioapic_data *data;
1610	unsigned long flags;
1611	union IO_APIC_reg_00 reg_00;
1612	int i;
1613
1614	data = container_of(dev, struct sysfs_ioapic_data, dev);
1615	entry = data->entry;
1616
1617	spin_lock_irqsave(&ioapic_lock, flags);
1618	reg_00.raw = io_apic_read(dev->id, 0);
1619	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
1620		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
1621		io_apic_write(dev->id, 0, reg_00.raw);
1622	}
1623	spin_unlock_irqrestore(&ioapic_lock, flags);
1624	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
1625		ioapic_write_entry(dev->id, i, entry[i]);
1626
1627	return 0;
1628}
1629
1630static struct sysdev_class ioapic_sysdev_class = {
1631	set_kset_name("ioapic"),
1632	.suspend = ioapic_suspend,
1633	.resume = ioapic_resume,
1634};
1635
1636static int __init ioapic_init_sysfs(void)
1637{
1638	struct sys_device * dev;
1639	int i, size, error = 0;
1640
1641	error = sysdev_class_register(&ioapic_sysdev_class);
1642	if (error)
1643		return error;
1644
1645	for (i = 0; i < nr_ioapics; i++ ) {
1646		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
1647			* sizeof(struct IO_APIC_route_entry);
1648		mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
1649		if (!mp_ioapic_data[i]) {
1650			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
1651			continue;
1652		}
1653		memset(mp_ioapic_data[i], 0, size);
1654		dev = &mp_ioapic_data[i]->dev;
1655		dev->id = i;
1656		dev->cls = &ioapic_sysdev_class;
1657		error = sysdev_register(dev);
1658		if (error) {
1659			kfree(mp_ioapic_data[i]);
1660			mp_ioapic_data[i] = NULL;
1661			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
1662			continue;
1663		}
1664	}
1665
1666	return 0;
1667}
1668
1669device_initcall(ioapic_init_sysfs);
1670
1671/*
1672 * Dynamic irq allocate and deallocation
1673 */
1674int create_irq(void)
1675{
1676	/* Allocate an unused irq */
1677	int irq;
1678	int new;
1679	unsigned long flags;
1680
1681	irq = -ENOSPC;
1682	spin_lock_irqsave(&vector_lock, flags);
1683	for (new = (NR_IRQS - 1); new >= 0; new--) {
1684		if (platform_legacy_irq(new))
1685			continue;
1686		if (irq_cfg[new].vector != 0)
1687			continue;
1688		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
1689			irq = new;
1690		break;
1691	}
1692	spin_unlock_irqrestore(&vector_lock, flags);
1693
1694	if (irq >= 0) {
1695		dynamic_irq_init(irq);
1696	}
1697	return irq;
1698}
1699
1700void destroy_irq(unsigned int irq)
1701{
1702	unsigned long flags;
1703
1704	dynamic_irq_cleanup(irq);
1705
1706	spin_lock_irqsave(&vector_lock, flags);
1707	__clear_irq_vector(irq);
1708	spin_unlock_irqrestore(&vector_lock, flags);
1709}
1710
1711/*
1712 * MSI mesage composition
1713 */
1714#ifdef CONFIG_PCI_MSI
1715static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1716{
1717	struct irq_cfg *cfg = irq_cfg + irq;
1718	int err;
1719	unsigned dest;
1720	cpumask_t tmp;
1721
1722	tmp = TARGET_CPUS;
1723	err = assign_irq_vector(irq, tmp);
1724	if (!err) {
1725		cpus_and(tmp, cfg->domain, tmp);
1726		dest = cpu_mask_to_apicid(tmp);
1727
1728		msg->address_hi = MSI_ADDR_BASE_HI;
1729		msg->address_lo =
1730			MSI_ADDR_BASE_LO |
1731			((INT_DEST_MODE == 0) ?
1732				MSI_ADDR_DEST_MODE_PHYSICAL:
1733				MSI_ADDR_DEST_MODE_LOGICAL) |
1734			((INT_DELIVERY_MODE != dest_LowestPrio) ?
1735				MSI_ADDR_REDIRECTION_CPU:
1736				MSI_ADDR_REDIRECTION_LOWPRI) |
1737			MSI_ADDR_DEST_ID(dest);
1738
1739		msg->data =
1740			MSI_DATA_TRIGGER_EDGE |
1741			MSI_DATA_LEVEL_ASSERT |
1742			((INT_DELIVERY_MODE != dest_LowestPrio) ?
1743				MSI_DATA_DELIVERY_FIXED:
1744				MSI_DATA_DELIVERY_LOWPRI) |
1745			MSI_DATA_VECTOR(cfg->vector);
1746	}
1747	return err;
1748}
1749
1750#ifdef CONFIG_SMP
1751static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1752{
1753	struct irq_cfg *cfg = irq_cfg + irq;
1754	struct msi_msg msg;
1755	unsigned int dest;
1756	cpumask_t tmp;
1757
1758	cpus_and(tmp, mask, cpu_online_map);
1759	if (cpus_empty(tmp))
1760		return;
1761
1762	if (assign_irq_vector(irq, mask))
1763		return;
1764
1765	cpus_and(tmp, cfg->domain, mask);
1766	dest = cpu_mask_to_apicid(tmp);
1767
1768	read_msi_msg(irq, &msg);
1769
1770	msg.data &= ~MSI_DATA_VECTOR_MASK;
1771	msg.data |= MSI_DATA_VECTOR(cfg->vector);
1772	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1773	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1774
1775	write_msi_msg(irq, &msg);
1776	irq_desc[irq].affinity = mask;
1777}
1778#endif /* CONFIG_SMP */
1779
1780/*
1781 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
1782 * which implement the MSI or MSI-X Capability Structure.
1783 */
1784static struct irq_chip msi_chip = {
1785	.name		= "PCI-MSI",
1786	.unmask		= unmask_msi_irq,
1787	.mask		= mask_msi_irq,
1788	.ack		= ack_apic_edge,
1789#ifdef CONFIG_SMP
1790	.set_affinity	= set_msi_irq_affinity,
1791#endif
1792	.retrigger	= ioapic_retrigger_irq,
1793};
1794
1795int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
1796{
1797	struct msi_msg msg;
1798	int irq, ret;
1799	irq = create_irq();
1800	if (irq < 0)
1801		return irq;
1802
1803	ret = msi_compose_msg(dev, irq, &msg);
1804	if (ret < 0) {
1805		destroy_irq(irq);
1806		return ret;
1807	}
1808
1809	set_irq_msi(irq, desc);
1810	write_msi_msg(irq, &msg);
1811
1812	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
1813
1814	return 0;
1815}
1816
1817void arch_teardown_msi_irq(unsigned int irq)
1818{
1819	destroy_irq(irq);
1820}
1821
1822#endif /* CONFIG_PCI_MSI */
1823
1824/*
1825 * Hypertransport interrupt support
1826 */
1827#ifdef CONFIG_HT_IRQ
1828
1829#ifdef CONFIG_SMP
1830
1831static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
1832{
1833	struct ht_irq_msg msg;
1834	fetch_ht_irq_msg(irq, &msg);
1835
1836	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
1837	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
1838
1839	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
1840	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
1841
1842	write_ht_irq_msg(irq, &msg);
1843}
1844
1845static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1846{
1847	struct irq_cfg *cfg = irq_cfg + irq;
1848	unsigned int dest;
1849	cpumask_t tmp;
1850
1851	cpus_and(tmp, mask, cpu_online_map);
1852	if (cpus_empty(tmp))
1853		return;
1854
1855	if (assign_irq_vector(irq, mask))
1856		return;
1857
1858	cpus_and(tmp, cfg->domain, mask);
1859	dest = cpu_mask_to_apicid(tmp);
1860
1861	target_ht_irq(irq, dest, cfg->vector);
1862	irq_desc[irq].affinity = mask;
1863}
1864#endif
1865
1866static struct irq_chip ht_irq_chip = {
1867	.name		= "PCI-HT",
1868	.mask		= mask_ht_irq,
1869	.unmask		= unmask_ht_irq,
1870	.ack		= ack_apic_edge,
1871#ifdef CONFIG_SMP
1872	.set_affinity	= set_ht_irq_affinity,
1873#endif
1874	.retrigger	= ioapic_retrigger_irq,
1875};
1876
1877int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1878{
1879	struct irq_cfg *cfg = irq_cfg + irq;
1880	int err;
1881	cpumask_t tmp;
1882
1883	tmp = TARGET_CPUS;
1884	err = assign_irq_vector(irq, tmp);
1885	if (!err) {
1886		struct ht_irq_msg msg;
1887		unsigned dest;
1888
1889		cpus_and(tmp, cfg->domain, tmp);
1890		dest = cpu_mask_to_apicid(tmp);
1891
1892		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1893
1894		msg.address_lo =
1895			HT_IRQ_LOW_BASE |
1896			HT_IRQ_LOW_DEST_ID(dest) |
1897			HT_IRQ_LOW_VECTOR(cfg->vector) |
1898			((INT_DEST_MODE == 0) ?
1899				HT_IRQ_LOW_DM_PHYSICAL :
1900				HT_IRQ_LOW_DM_LOGICAL) |
1901			HT_IRQ_LOW_RQEOI_EDGE |
1902			((INT_DELIVERY_MODE != dest_LowestPrio) ?
1903				HT_IRQ_LOW_MT_FIXED :
1904				HT_IRQ_LOW_MT_ARBITRATED) |
1905			HT_IRQ_LOW_IRQ_MASKED;
1906
1907		write_ht_irq_msg(irq, &msg);
1908
1909		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1910					      handle_edge_irq, "edge");
1911	}
1912	return err;
1913}
1914#endif /* CONFIG_HT_IRQ */
1915
1916/* --------------------------------------------------------------------------
1917                          ACPI-based IOAPIC Configuration
1918   -------------------------------------------------------------------------- */
1919
1920#ifdef CONFIG_ACPI
1921
1922#define IO_APIC_MAX_ID		0xFE
1923
1924int __init io_apic_get_redir_entries (int ioapic)
1925{
1926	union IO_APIC_reg_01	reg_01;
1927	unsigned long flags;
1928
1929	spin_lock_irqsave(&ioapic_lock, flags);
1930	reg_01.raw = io_apic_read(ioapic, 1);
1931	spin_unlock_irqrestore(&ioapic_lock, flags);
1932
1933	return reg_01.bits.entries;
1934}
1935
1936
1937int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
1938{
1939	if (!IO_APIC_IRQ(irq)) {
1940		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
1941			ioapic);
1942		return -EINVAL;
1943	}
1944
1945	/*
1946	 * IRQs < 16 are already in the irq_2_pin[] map
1947	 */
1948	if (irq >= 16)
1949		add_pin_to_irq(irq, ioapic, pin);
1950
1951	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
1952
1953	return 0;
1954}
1955
1956#endif /* CONFIG_ACPI */
1957
1958
1959/*
1960 * This function currently is only a helper for the i386 smp boot process where
1961 * we need to reprogram the ioredtbls to cater for the cpus which have come online
1962 * so mask in all cases should simply be TARGET_CPUS
1963 */
1964#ifdef CONFIG_SMP
1965void __init setup_ioapic_dest(void)
1966{
1967	int pin, ioapic, irq, irq_entry;
1968
1969	if (skip_ioapic_setup == 1)
1970		return;
1971
1972	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
1973		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
1974			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
1975			if (irq_entry == -1)
1976				continue;
1977			irq = pin_2_irq(irq_entry, ioapic, pin);
1978
1979			/* setup_IO_APIC_irqs could fail to get vector for some device
1980			 * when you have too many devices, because at that time only boot
1981			 * cpu is online.
1982			 */
1983			if (!irq_cfg[irq].vector)
1984				setup_IO_APIC_irq(ioapic, pin, irq,
1985						  irq_trigger(irq_entry),
1986						  irq_polarity(irq_entry));
1987			else
1988				set_ioapic_affinity_irq(irq, TARGET_CPUS);
1989		}
1990
1991	}
1992}
1993#endif
1994