1/*
2 * OpenPIC emulation
3 *
4 * Copyright (c) 2004 Jocelyn Mayer
5 *               2011 Alexander Graf
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26#include <linux/slab.h>
27#include <linux/mutex.h>
28#include <linux/kvm_host.h>
29#include <linux/errno.h>
30#include <linux/fs.h>
31#include <linux/anon_inodes.h>
32#include <linux/uaccess.h>
33#include <asm/mpic.h>
34#include <asm/kvm_para.h>
35#include <asm/kvm_ppc.h>
36#include <kvm/iodev.h>
37
38#define MAX_CPU     32
39#define MAX_SRC     256
40#define MAX_TMR     4
41#define MAX_IPI     4
42#define MAX_MSI     8
43#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
44#define VID         0x03	/* MPIC version ID */
45
46/* OpenPIC capability flags */
47#define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
48#define OPENPIC_FLAG_ILR          (2 << 0)
49
50/* OpenPIC address map */
51#define OPENPIC_REG_SIZE             0x40000
52#define OPENPIC_GLB_REG_START        0x0
53#define OPENPIC_GLB_REG_SIZE         0x10F0
54#define OPENPIC_TMR_REG_START        0x10F0
55#define OPENPIC_TMR_REG_SIZE         0x220
56#define OPENPIC_MSI_REG_START        0x1600
57#define OPENPIC_MSI_REG_SIZE         0x200
58#define OPENPIC_SUMMARY_REG_START    0x3800
59#define OPENPIC_SUMMARY_REG_SIZE     0x800
60#define OPENPIC_SRC_REG_START        0x10000
61#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
62#define OPENPIC_CPU_REG_START        0x20000
63#define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
64
65struct fsl_mpic_info {
66	int max_ext;
67};
68
69static struct fsl_mpic_info fsl_mpic_20 = {
70	.max_ext = 12,
71};
72
73static struct fsl_mpic_info fsl_mpic_42 = {
74	.max_ext = 12,
75};
76
77#define FRR_NIRQ_SHIFT    16
78#define FRR_NCPU_SHIFT     8
79#define FRR_VID_SHIFT      0
80
81#define VID_REVISION_1_2   2
82#define VID_REVISION_1_3   3
83
84#define VIR_GENERIC      0x00000000	/* Generic Vendor ID */
85
86#define GCR_RESET        0x80000000
87#define GCR_MODE_PASS    0x00000000
88#define GCR_MODE_MIXED   0x20000000
89#define GCR_MODE_PROXY   0x60000000
90
91#define TBCR_CI           0x80000000	/* count inhibit */
92#define TCCR_TOG          0x80000000	/* toggles when decrement to zero */
93
94#define IDR_EP_SHIFT      31
95#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
96#define IDR_CI0_SHIFT     30
97#define IDR_CI1_SHIFT     29
98#define IDR_P1_SHIFT      1
99#define IDR_P0_SHIFT      0
100
101#define ILR_INTTGT_MASK   0x000000ff
102#define ILR_INTTGT_INT    0x00
103#define ILR_INTTGT_CINT   0x01	/* critical */
104#define ILR_INTTGT_MCP    0x02	/* machine check */
105#define NUM_OUTPUTS       3
106
107#define MSIIR_OFFSET       0x140
108#define MSIIR_SRS_SHIFT    29
109#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
110#define MSIIR_IBS_SHIFT    24
111#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
112
113static int get_current_cpu(void)
114{
115#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
116	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
117	return vcpu ? vcpu->arch.irq_cpu_id : -1;
118#else
119	/* XXX */
120	return -1;
121#endif
122}
123
124static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
125				      u32 val, int idx);
126static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
127				     u32 *ptr, int idx);
128static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
129				    uint32_t val);
130
131enum irq_type {
132	IRQ_TYPE_NORMAL = 0,
133	IRQ_TYPE_FSLINT,	/* FSL internal interrupt -- level only */
134	IRQ_TYPE_FSLSPECIAL,	/* FSL timer/IPI interrupt, edge, no polarity */
135};
136
137struct irq_queue {
138	/* Round up to the nearest 64 IRQs so that the queue length
139	 * won't change when moving between 32 and 64 bit hosts.
140	 */
141	unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
142	int next;
143	int priority;
144};
145
146struct irq_source {
147	uint32_t ivpr;		/* IRQ vector/priority register */
148	uint32_t idr;		/* IRQ destination register */
149	uint32_t destmask;	/* bitmap of CPU destinations */
150	int last_cpu;
151	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
152	int pending;		/* TRUE if IRQ is pending */
153	enum irq_type type;
154	bool level:1;		/* level-triggered */
155	bool nomask:1;	/* critical interrupts ignore mask on some FSL MPICs */
156};
157
158#define IVPR_MASK_SHIFT       31
159#define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
160#define IVPR_ACTIVITY_SHIFT   30
161#define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
162#define IVPR_MODE_SHIFT       29
163#define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
164#define IVPR_POLARITY_SHIFT   23
165#define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
166#define IVPR_SENSE_SHIFT      22
167#define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
168
169#define IVPR_PRIORITY_MASK     (0xF << 16)
170#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
171#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
172
173/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
174#define IDR_EP      0x80000000	/* external pin */
175#define IDR_CI      0x40000000	/* critical interrupt */
176
177struct irq_dest {
178	struct kvm_vcpu *vcpu;
179
180	int32_t ctpr;		/* CPU current task priority */
181	struct irq_queue raised;
182	struct irq_queue servicing;
183
184	/* Count of IRQ sources asserting on non-INT outputs */
185	uint32_t outputs_active[NUM_OUTPUTS];
186};
187
188#define MAX_MMIO_REGIONS 10
189
190struct openpic {
191	struct kvm *kvm;
192	struct kvm_device *dev;
193	struct kvm_io_device mmio;
194	const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
195	int num_mmio_regions;
196
197	gpa_t reg_base;
198	spinlock_t lock;
199
200	/* Behavior control */
201	struct fsl_mpic_info *fsl;
202	uint32_t model;
203	uint32_t flags;
204	uint32_t nb_irqs;
205	uint32_t vid;
206	uint32_t vir;		/* Vendor identification register */
207	uint32_t vector_mask;
208	uint32_t tfrr_reset;
209	uint32_t ivpr_reset;
210	uint32_t idr_reset;
211	uint32_t brr1;
212	uint32_t mpic_mode_mask;
213
214	/* Global registers */
215	uint32_t frr;		/* Feature reporting register */
216	uint32_t gcr;		/* Global configuration register  */
217	uint32_t pir;		/* Processor initialization register */
218	uint32_t spve;		/* Spurious vector register */
219	uint32_t tfrr;		/* Timer frequency reporting register */
220	/* Source registers */
221	struct irq_source src[MAX_IRQ];
222	/* Local registers per output pin */
223	struct irq_dest dst[MAX_CPU];
224	uint32_t nb_cpus;
225	/* Timer registers */
226	struct {
227		uint32_t tccr;	/* Global timer current count register */
228		uint32_t tbcr;	/* Global timer base count register */
229	} timers[MAX_TMR];
230	/* Shared MSI registers */
231	struct {
232		uint32_t msir;	/* Shared Message Signaled Interrupt Register */
233	} msi[MAX_MSI];
234	uint32_t max_irq;
235	uint32_t irq_ipi0;
236	uint32_t irq_tim0;
237	uint32_t irq_msi;
238};
239
240
241static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
242			   int output)
243{
244	struct kvm_interrupt irq = {
245		.irq = KVM_INTERRUPT_SET_LEVEL,
246	};
247
248	if (!dst->vcpu) {
249		pr_debug("%s: destination cpu %d does not exist\n",
250			 __func__, (int)(dst - &opp->dst[0]));
251		return;
252	}
253
254	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
255		output);
256
257	if (output != ILR_INTTGT_INT)	/* TODO */
258		return;
259
260	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
261}
262
263static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
264			   int output)
265{
266	if (!dst->vcpu) {
267		pr_debug("%s: destination cpu %d does not exist\n",
268			 __func__, (int)(dst - &opp->dst[0]));
269		return;
270	}
271
272	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
273		output);
274
275	if (output != ILR_INTTGT_INT)	/* TODO */
276		return;
277
278	kvmppc_core_dequeue_external(dst->vcpu);
279}
280
281static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
282{
283	set_bit(n_IRQ, q->queue);
284}
285
286static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
287{
288	clear_bit(n_IRQ, q->queue);
289}
290
291static void IRQ_check(struct openpic *opp, struct irq_queue *q)
292{
293	int irq = -1;
294	int next = -1;
295	int priority = -1;
296
297	for (;;) {
298		irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
299		if (irq == opp->max_irq)
300			break;
301
302		pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
303			irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
304
305		if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
306			next = irq;
307			priority = IVPR_PRIORITY(opp->src[irq].ivpr);
308		}
309	}
310
311	q->next = next;
312	q->priority = priority;
313}
314
315static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
316{
317	/* XXX: optimize */
318	IRQ_check(opp, q);
319
320	return q->next;
321}
322
323static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
324			   bool active, bool was_active)
325{
326	struct irq_dest *dst;
327	struct irq_source *src;
328	int priority;
329
330	dst = &opp->dst[n_CPU];
331	src = &opp->src[n_IRQ];
332
333	pr_debug("%s: IRQ %d active %d was %d\n",
334		__func__, n_IRQ, active, was_active);
335
336	if (src->output != ILR_INTTGT_INT) {
337		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
338			__func__, src->output, n_IRQ, active, was_active,
339			dst->outputs_active[src->output]);
340
341		/* On Freescale MPIC, critical interrupts ignore priority,
342		 * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
343		 * masking.
344		 */
345		if (active) {
346			if (!was_active &&
347			    dst->outputs_active[src->output]++ == 0) {
348				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
349					__func__, src->output, n_CPU, n_IRQ);
350				mpic_irq_raise(opp, dst, src->output);
351			}
352		} else {
353			if (was_active &&
354			    --dst->outputs_active[src->output] == 0) {
355				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
356					__func__, src->output, n_CPU, n_IRQ);
357				mpic_irq_lower(opp, dst, src->output);
358			}
359		}
360
361		return;
362	}
363
364	priority = IVPR_PRIORITY(src->ivpr);
365
366	/* Even if the interrupt doesn't have enough priority,
367	 * it is still raised, in case ctpr is lowered later.
368	 */
369	if (active)
370		IRQ_setbit(&dst->raised, n_IRQ);
371	else
372		IRQ_resetbit(&dst->raised, n_IRQ);
373
374	IRQ_check(opp, &dst->raised);
375
376	if (active && priority <= dst->ctpr) {
377		pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
378			__func__, n_IRQ, priority, dst->ctpr, n_CPU);
379		active = 0;
380	}
381
382	if (active) {
383		if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
384		    priority <= dst->servicing.priority) {
385			pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
386				__func__, n_IRQ, dst->servicing.next, n_CPU);
387		} else {
388			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
389				__func__, n_CPU, n_IRQ, dst->raised.next);
390			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
391		}
392	} else {
393		IRQ_get_next(opp, &dst->servicing);
394		if (dst->raised.priority > dst->ctpr &&
395		    dst->raised.priority > dst->servicing.priority) {
396			pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
397				__func__, n_IRQ, dst->raised.next,
398				dst->raised.priority, dst->ctpr,
399				dst->servicing.priority, n_CPU);
400			/* IRQ line stays asserted */
401		} else {
402			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
403				__func__, n_IRQ, dst->ctpr,
404				dst->servicing.priority, n_CPU);
405			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
406		}
407	}
408}
409
410/* update pic state because registers for n_IRQ have changed value */
411static void openpic_update_irq(struct openpic *opp, int n_IRQ)
412{
413	struct irq_source *src;
414	bool active, was_active;
415	int i;
416
417	src = &opp->src[n_IRQ];
418	active = src->pending;
419
420	if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
421		/* Interrupt source is disabled */
422		pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
423		active = false;
424	}
425
426	was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
427
428	/*
429	 * We don't have a similar check for already-active because
430	 * ctpr may have changed and we need to withdraw the interrupt.
431	 */
432	if (!active && !was_active) {
433		pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
434		return;
435	}
436
437	if (active)
438		src->ivpr |= IVPR_ACTIVITY_MASK;
439	else
440		src->ivpr &= ~IVPR_ACTIVITY_MASK;
441
442	if (src->destmask == 0) {
443		/* No target */
444		pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
445		return;
446	}
447
448	if (src->destmask == (1 << src->last_cpu)) {
449		/* Only one CPU is allowed to receive this IRQ */
450		IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
451	} else if (!(src->ivpr & IVPR_MODE_MASK)) {
452		/* Directed delivery mode */
453		for (i = 0; i < opp->nb_cpus; i++) {
454			if (src->destmask & (1 << i)) {
455				IRQ_local_pipe(opp, i, n_IRQ, active,
456					       was_active);
457			}
458		}
459	} else {
460		/* Distributed delivery mode */
461		for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
462			if (i == opp->nb_cpus)
463				i = 0;
464
465			if (src->destmask & (1 << i)) {
466				IRQ_local_pipe(opp, i, n_IRQ, active,
467					       was_active);
468				src->last_cpu = i;
469				break;
470			}
471		}
472	}
473}
474
475static void openpic_set_irq(void *opaque, int n_IRQ, int level)
476{
477	struct openpic *opp = opaque;
478	struct irq_source *src;
479
480	if (n_IRQ >= MAX_IRQ) {
481		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
482		return;
483	}
484
485	src = &opp->src[n_IRQ];
486	pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
487		n_IRQ, level, src->ivpr);
488	if (src->level) {
489		/* level-sensitive irq */
490		src->pending = level;
491		openpic_update_irq(opp, n_IRQ);
492	} else {
493		/* edge-sensitive irq */
494		if (level) {
495			src->pending = 1;
496			openpic_update_irq(opp, n_IRQ);
497		}
498
499		if (src->output != ILR_INTTGT_INT) {
500			/* Edge-triggered interrupts shouldn't be used
501			 * with non-INT delivery, but just in case,
502			 * try to make it do something sane rather than
503			 * cause an interrupt storm.  This is close to
504			 * what you'd probably see happen in real hardware.
505			 */
506			src->pending = 0;
507			openpic_update_irq(opp, n_IRQ);
508		}
509	}
510}
511
512static void openpic_reset(struct openpic *opp)
513{
514	int i;
515
516	opp->gcr = GCR_RESET;
517	/* Initialise controller registers */
518	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
519	    (opp->vid << FRR_VID_SHIFT);
520
521	opp->pir = 0;
522	opp->spve = -1 & opp->vector_mask;
523	opp->tfrr = opp->tfrr_reset;
524	/* Initialise IRQ sources */
525	for (i = 0; i < opp->max_irq; i++) {
526		opp->src[i].ivpr = opp->ivpr_reset;
527
528		switch (opp->src[i].type) {
529		case IRQ_TYPE_NORMAL:
530			opp->src[i].level =
531			    !!(opp->ivpr_reset & IVPR_SENSE_MASK);
532			break;
533
534		case IRQ_TYPE_FSLINT:
535			opp->src[i].ivpr |= IVPR_POLARITY_MASK;
536			break;
537
538		case IRQ_TYPE_FSLSPECIAL:
539			break;
540		}
541
542		write_IRQreg_idr(opp, i, opp->idr_reset);
543	}
544	/* Initialise IRQ destinations */
545	for (i = 0; i < MAX_CPU; i++) {
546		opp->dst[i].ctpr = 15;
547		memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
548		opp->dst[i].raised.next = -1;
549		memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
550		opp->dst[i].servicing.next = -1;
551	}
552	/* Initialise timers */
553	for (i = 0; i < MAX_TMR; i++) {
554		opp->timers[i].tccr = 0;
555		opp->timers[i].tbcr = TBCR_CI;
556	}
557	/* Go out of RESET state */
558	opp->gcr = 0;
559}
560
561static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
562{
563	return opp->src[n_IRQ].idr;
564}
565
566static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
567{
568	if (opp->flags & OPENPIC_FLAG_ILR)
569		return opp->src[n_IRQ].output;
570
571	return 0xffffffff;
572}
573
574static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
575{
576	return opp->src[n_IRQ].ivpr;
577}
578
579static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
580				    uint32_t val)
581{
582	struct irq_source *src = &opp->src[n_IRQ];
583	uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
584	uint32_t crit_mask = 0;
585	uint32_t mask = normal_mask;
586	int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
587	int i;
588
589	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
590		crit_mask = mask << crit_shift;
591		mask |= crit_mask | IDR_EP;
592	}
593
594	src->idr = val & mask;
595	pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
596
597	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
598		if (src->idr & crit_mask) {
599			if (src->idr & normal_mask) {
600				pr_debug("%s: IRQ configured for multiple output types, using critical\n",
601					__func__);
602			}
603
604			src->output = ILR_INTTGT_CINT;
605			src->nomask = true;
606			src->destmask = 0;
607
608			for (i = 0; i < opp->nb_cpus; i++) {
609				int n_ci = IDR_CI0_SHIFT - i;
610
611				if (src->idr & (1UL << n_ci))
612					src->destmask |= 1UL << i;
613			}
614		} else {
615			src->output = ILR_INTTGT_INT;
616			src->nomask = false;
617			src->destmask = src->idr & normal_mask;
618		}
619	} else {
620		src->destmask = src->idr;
621	}
622}
623
624static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
625				    uint32_t val)
626{
627	if (opp->flags & OPENPIC_FLAG_ILR) {
628		struct irq_source *src = &opp->src[n_IRQ];
629
630		src->output = val & ILR_INTTGT_MASK;
631		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
632			src->output);
633
634		/* TODO: on MPIC v4.0 only, set nomask for non-INT */
635	}
636}
637
638static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
639				     uint32_t val)
640{
641	uint32_t mask;
642
643	/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
644	 * the polarity bit is read-only on internal interrupts.
645	 */
646	mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
647	    IVPR_POLARITY_MASK | opp->vector_mask;
648
649	/* ACTIVITY bit is read-only */
650	opp->src[n_IRQ].ivpr =
651	    (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
652
653	/* For FSL internal interrupts, The sense bit is reserved and zero,
654	 * and the interrupt is always level-triggered.  Timers and IPIs
655	 * have no sense or polarity bits, and are edge-triggered.
656	 */
657	switch (opp->src[n_IRQ].type) {
658	case IRQ_TYPE_NORMAL:
659		opp->src[n_IRQ].level =
660		    !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
661		break;
662
663	case IRQ_TYPE_FSLINT:
664		opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
665		break;
666
667	case IRQ_TYPE_FSLSPECIAL:
668		opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
669		break;
670	}
671
672	openpic_update_irq(opp, n_IRQ);
673	pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
674		opp->src[n_IRQ].ivpr);
675}
676
677static void openpic_gcr_write(struct openpic *opp, uint64_t val)
678{
679	if (val & GCR_RESET) {
680		openpic_reset(opp);
681		return;
682	}
683
684	opp->gcr &= ~opp->mpic_mode_mask;
685	opp->gcr |= val & opp->mpic_mode_mask;
686}
687
688static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
689{
690	struct openpic *opp = opaque;
691	int err = 0;
692
693	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
694	if (addr & 0xF)
695		return 0;
696
697	switch (addr) {
698	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
699		break;
700	case 0x40:
701	case 0x50:
702	case 0x60:
703	case 0x70:
704	case 0x80:
705	case 0x90:
706	case 0xA0:
707	case 0xB0:
708		err = openpic_cpu_write_internal(opp, addr, val,
709						 get_current_cpu());
710		break;
711	case 0x1000:		/* FRR */
712		break;
713	case 0x1020:		/* GCR */
714		openpic_gcr_write(opp, val);
715		break;
716	case 0x1080:		/* VIR */
717		break;
718	case 0x1090:		/* PIR */
719		/*
720		 * This register is used to reset a CPU core --
721		 * let userspace handle it.
722		 */
723		err = -ENXIO;
724		break;
725	case 0x10A0:		/* IPI_IVPR */
726	case 0x10B0:
727	case 0x10C0:
728	case 0x10D0: {
729		int idx;
730		idx = (addr - 0x10A0) >> 4;
731		write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
732		break;
733	}
734	case 0x10E0:		/* SPVE */
735		opp->spve = val & opp->vector_mask;
736		break;
737	default:
738		break;
739	}
740
741	return err;
742}
743
744static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
745{
746	struct openpic *opp = opaque;
747	u32 retval;
748	int err = 0;
749
750	pr_debug("%s: addr %#llx\n", __func__, addr);
751	retval = 0xFFFFFFFF;
752	if (addr & 0xF)
753		goto out;
754
755	switch (addr) {
756	case 0x1000:		/* FRR */
757		retval = opp->frr;
758		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
759		break;
760	case 0x1020:		/* GCR */
761		retval = opp->gcr;
762		break;
763	case 0x1080:		/* VIR */
764		retval = opp->vir;
765		break;
766	case 0x1090:		/* PIR */
767		retval = 0x00000000;
768		break;
769	case 0x00:		/* Block Revision Register1 (BRR1) */
770		retval = opp->brr1;
771		break;
772	case 0x40:
773	case 0x50:
774	case 0x60:
775	case 0x70:
776	case 0x80:
777	case 0x90:
778	case 0xA0:
779	case 0xB0:
780		err = openpic_cpu_read_internal(opp, addr,
781			&retval, get_current_cpu());
782		break;
783	case 0x10A0:		/* IPI_IVPR */
784	case 0x10B0:
785	case 0x10C0:
786	case 0x10D0:
787		{
788			int idx;
789			idx = (addr - 0x10A0) >> 4;
790			retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
791		}
792		break;
793	case 0x10E0:		/* SPVE */
794		retval = opp->spve;
795		break;
796	default:
797		break;
798	}
799
800out:
801	pr_debug("%s: => 0x%08x\n", __func__, retval);
802	*ptr = retval;
803	return err;
804}
805
806static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
807{
808	struct openpic *opp = opaque;
809	int idx;
810
811	addr += 0x10f0;
812
813	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
814	if (addr & 0xF)
815		return 0;
816
817	if (addr == 0x10f0) {
818		/* TFRR */
819		opp->tfrr = val;
820		return 0;
821	}
822
823	idx = (addr >> 6) & 0x3;
824	addr = addr & 0x30;
825
826	switch (addr & 0x30) {
827	case 0x00:		/* TCCR */
828		break;
829	case 0x10:		/* TBCR */
830		if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
831		    (val & TBCR_CI) == 0 &&
832		    (opp->timers[idx].tbcr & TBCR_CI) != 0)
833			opp->timers[idx].tccr &= ~TCCR_TOG;
834
835		opp->timers[idx].tbcr = val;
836		break;
837	case 0x20:		/* TVPR */
838		write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
839		break;
840	case 0x30:		/* TDR */
841		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
842		break;
843	}
844
845	return 0;
846}
847
848static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
849{
850	struct openpic *opp = opaque;
851	uint32_t retval = -1;
852	int idx;
853
854	pr_debug("%s: addr %#llx\n", __func__, addr);
855	if (addr & 0xF)
856		goto out;
857
858	idx = (addr >> 6) & 0x3;
859	if (addr == 0x0) {
860		/* TFRR */
861		retval = opp->tfrr;
862		goto out;
863	}
864
865	switch (addr & 0x30) {
866	case 0x00:		/* TCCR */
867		retval = opp->timers[idx].tccr;
868		break;
869	case 0x10:		/* TBCR */
870		retval = opp->timers[idx].tbcr;
871		break;
872	case 0x20:		/* TIPV */
873		retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
874		break;
875	case 0x30:		/* TIDE (TIDR) */
876		retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
877		break;
878	}
879
880out:
881	pr_debug("%s: => 0x%08x\n", __func__, retval);
882	*ptr = retval;
883	return 0;
884}
885
886static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
887{
888	struct openpic *opp = opaque;
889	int idx;
890
891	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
892
893	addr = addr & 0xffff;
894	idx = addr >> 5;
895
896	switch (addr & 0x1f) {
897	case 0x00:
898		write_IRQreg_ivpr(opp, idx, val);
899		break;
900	case 0x10:
901		write_IRQreg_idr(opp, idx, val);
902		break;
903	case 0x18:
904		write_IRQreg_ilr(opp, idx, val);
905		break;
906	}
907
908	return 0;
909}
910
911static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
912{
913	struct openpic *opp = opaque;
914	uint32_t retval;
915	int idx;
916
917	pr_debug("%s: addr %#llx\n", __func__, addr);
918	retval = 0xFFFFFFFF;
919
920	addr = addr & 0xffff;
921	idx = addr >> 5;
922
923	switch (addr & 0x1f) {
924	case 0x00:
925		retval = read_IRQreg_ivpr(opp, idx);
926		break;
927	case 0x10:
928		retval = read_IRQreg_idr(opp, idx);
929		break;
930	case 0x18:
931		retval = read_IRQreg_ilr(opp, idx);
932		break;
933	}
934
935	pr_debug("%s: => 0x%08x\n", __func__, retval);
936	*ptr = retval;
937	return 0;
938}
939
940static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
941{
942	struct openpic *opp = opaque;
943	int idx = opp->irq_msi;
944	int srs, ibs;
945
946	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
947	if (addr & 0xF)
948		return 0;
949
950	switch (addr) {
951	case MSIIR_OFFSET:
952		srs = val >> MSIIR_SRS_SHIFT;
953		idx += srs;
954		ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
955		opp->msi[srs].msir |= 1 << ibs;
956		openpic_set_irq(opp, idx, 1);
957		break;
958	default:
959		/* most registers are read-only, thus ignored */
960		break;
961	}
962
963	return 0;
964}
965
966static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
967{
968	struct openpic *opp = opaque;
969	uint32_t r = 0;
970	int i, srs;
971
972	pr_debug("%s: addr %#llx\n", __func__, addr);
973	if (addr & 0xF)
974		return -ENXIO;
975
976	srs = addr >> 4;
977
978	switch (addr) {
979	case 0x00:
980	case 0x10:
981	case 0x20:
982	case 0x30:
983	case 0x40:
984	case 0x50:
985	case 0x60:
986	case 0x70:		/* MSIRs */
987		r = opp->msi[srs].msir;
988		/* Clear on read */
989		opp->msi[srs].msir = 0;
990		openpic_set_irq(opp, opp->irq_msi + srs, 0);
991		break;
992	case 0x120:		/* MSISR */
993		for (i = 0; i < MAX_MSI; i++)
994			r |= (opp->msi[i].msir ? 1 : 0) << i;
995		break;
996	}
997
998	pr_debug("%s: => 0x%08x\n", __func__, r);
999	*ptr = r;
1000	return 0;
1001}
1002
1003static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
1004{
1005	uint32_t r = 0;
1006
1007	pr_debug("%s: addr %#llx\n", __func__, addr);
1008
1009	/* TODO: EISR/EIMR */
1010
1011	*ptr = r;
1012	return 0;
1013}
1014
1015static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
1016{
1017	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
1018
1019	/* TODO: EISR/EIMR */
1020	return 0;
1021}
1022
1023static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
1024				      u32 val, int idx)
1025{
1026	struct openpic *opp = opaque;
1027	struct irq_source *src;
1028	struct irq_dest *dst;
1029	int s_IRQ, n_IRQ;
1030
1031	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
1032		addr, val);
1033
1034	if (idx < 0)
1035		return 0;
1036
1037	if (addr & 0xF)
1038		return 0;
1039
1040	dst = &opp->dst[idx];
1041	addr &= 0xFF0;
1042	switch (addr) {
1043	case 0x40:		/* IPIDR */
1044	case 0x50:
1045	case 0x60:
1046	case 0x70:
1047		idx = (addr - 0x40) >> 4;
1048		/* we use IDE as mask which CPUs to deliver the IPI to still. */
1049		opp->src[opp->irq_ipi0 + idx].destmask |= val;
1050		openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
1051		openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
1052		break;
1053	case 0x80:		/* CTPR */
1054		dst->ctpr = val & 0x0000000F;
1055
1056		pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
1057			__func__, idx, dst->ctpr, dst->raised.priority,
1058			dst->servicing.priority);
1059
1060		if (dst->raised.priority <= dst->ctpr) {
1061			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
1062				__func__, idx);
1063			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1064		} else if (dst->raised.priority > dst->servicing.priority) {
1065			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
1066				__func__, idx, dst->raised.next);
1067			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1068		}
1069
1070		break;
1071	case 0x90:		/* WHOAMI */
1072		/* Read-only register */
1073		break;
1074	case 0xA0:		/* IACK */
1075		/* Read-only register */
1076		break;
1077	case 0xB0: {		/* EOI */
1078		int notify_eoi;
1079
1080		pr_debug("EOI\n");
1081		s_IRQ = IRQ_get_next(opp, &dst->servicing);
1082
1083		if (s_IRQ < 0) {
1084			pr_debug("%s: EOI with no interrupt in service\n",
1085				__func__);
1086			break;
1087		}
1088
1089		IRQ_resetbit(&dst->servicing, s_IRQ);
1090		/* Notify listeners that the IRQ is over */
1091		notify_eoi = s_IRQ;
1092		/* Set up next servicing IRQ */
1093		s_IRQ = IRQ_get_next(opp, &dst->servicing);
1094		/* Check queued interrupts. */
1095		n_IRQ = IRQ_get_next(opp, &dst->raised);
1096		src = &opp->src[n_IRQ];
1097		if (n_IRQ != -1 &&
1098		    (s_IRQ == -1 ||
1099		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
1100			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
1101				idx, n_IRQ);
1102			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1103		}
1104
1105		spin_unlock(&opp->lock);
1106		kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
1107		spin_lock(&opp->lock);
1108
1109		break;
1110	}
1111	default:
1112		break;
1113	}
1114
1115	return 0;
1116}
1117
1118static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
1119{
1120	struct openpic *opp = opaque;
1121
1122	return openpic_cpu_write_internal(opp, addr, val,
1123					 (addr & 0x1f000) >> 12);
1124}
1125
1126static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
1127			     int cpu)
1128{
1129	struct irq_source *src;
1130	int retval, irq;
1131
1132	pr_debug("Lower OpenPIC INT output\n");
1133	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1134
1135	irq = IRQ_get_next(opp, &dst->raised);
1136	pr_debug("IACK: irq=%d\n", irq);
1137
1138	if (irq == -1)
1139		/* No more interrupt pending */
1140		return opp->spve;
1141
1142	src = &opp->src[irq];
1143	if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
1144	    !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
1145		pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
1146			__func__, irq, dst->ctpr, src->ivpr);
1147		openpic_update_irq(opp, irq);
1148		retval = opp->spve;
1149	} else {
1150		/* IRQ enter servicing state */
1151		IRQ_setbit(&dst->servicing, irq);
1152		retval = IVPR_VECTOR(opp, src->ivpr);
1153	}
1154
1155	if (!src->level) {
1156		/* edge-sensitive IRQ */
1157		src->ivpr &= ~IVPR_ACTIVITY_MASK;
1158		src->pending = 0;
1159		IRQ_resetbit(&dst->raised, irq);
1160	}
1161
1162	if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
1163		src->destmask &= ~(1 << cpu);
1164		if (src->destmask && !src->level) {
1165			/* trigger on CPUs that didn't know about it yet */
1166			openpic_set_irq(opp, irq, 1);
1167			openpic_set_irq(opp, irq, 0);
1168			/* if all CPUs knew about it, set active bit again */
1169			src->ivpr |= IVPR_ACTIVITY_MASK;
1170		}
1171	}
1172
1173	return retval;
1174}
1175
1176void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
1177{
1178	struct openpic *opp = vcpu->arch.mpic;
1179	int cpu = vcpu->arch.irq_cpu_id;
1180	unsigned long flags;
1181
1182	spin_lock_irqsave(&opp->lock, flags);
1183
1184	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
1185		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
1186
1187	spin_unlock_irqrestore(&opp->lock, flags);
1188}
1189
1190static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
1191				     u32 *ptr, int idx)
1192{
1193	struct openpic *opp = opaque;
1194	struct irq_dest *dst;
1195	uint32_t retval;
1196
1197	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
1198	retval = 0xFFFFFFFF;
1199
1200	if (idx < 0)
1201		goto out;
1202
1203	if (addr & 0xF)
1204		goto out;
1205
1206	dst = &opp->dst[idx];
1207	addr &= 0xFF0;
1208	switch (addr) {
1209	case 0x80:		/* CTPR */
1210		retval = dst->ctpr;
1211		break;
1212	case 0x90:		/* WHOAMI */
1213		retval = idx;
1214		break;
1215	case 0xA0:		/* IACK */
1216		retval = openpic_iack(opp, dst, idx);
1217		break;
1218	case 0xB0:		/* EOI */
1219		retval = 0;
1220		break;
1221	default:
1222		break;
1223	}
1224	pr_debug("%s: => 0x%08x\n", __func__, retval);
1225
1226out:
1227	*ptr = retval;
1228	return 0;
1229}
1230
1231static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
1232{
1233	struct openpic *opp = opaque;
1234
1235	return openpic_cpu_read_internal(opp, addr, ptr,
1236					 (addr & 0x1f000) >> 12);
1237}
1238
1239struct mem_reg {
1240	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
1241	int (*write)(void *opaque, gpa_t addr, u32 val);
1242	gpa_t start_addr;
1243	int size;
1244};
1245
1246static const struct mem_reg openpic_gbl_mmio = {
1247	.write = openpic_gbl_write,
1248	.read = openpic_gbl_read,
1249	.start_addr = OPENPIC_GLB_REG_START,
1250	.size = OPENPIC_GLB_REG_SIZE,
1251};
1252
1253static const struct mem_reg openpic_tmr_mmio = {
1254	.write = openpic_tmr_write,
1255	.read = openpic_tmr_read,
1256	.start_addr = OPENPIC_TMR_REG_START,
1257	.size = OPENPIC_TMR_REG_SIZE,
1258};
1259
1260static const struct mem_reg openpic_cpu_mmio = {
1261	.write = openpic_cpu_write,
1262	.read = openpic_cpu_read,
1263	.start_addr = OPENPIC_CPU_REG_START,
1264	.size = OPENPIC_CPU_REG_SIZE,
1265};
1266
1267static const struct mem_reg openpic_src_mmio = {
1268	.write = openpic_src_write,
1269	.read = openpic_src_read,
1270	.start_addr = OPENPIC_SRC_REG_START,
1271	.size = OPENPIC_SRC_REG_SIZE,
1272};
1273
1274static const struct mem_reg openpic_msi_mmio = {
1275	.read = openpic_msi_read,
1276	.write = openpic_msi_write,
1277	.start_addr = OPENPIC_MSI_REG_START,
1278	.size = OPENPIC_MSI_REG_SIZE,
1279};
1280
1281static const struct mem_reg openpic_summary_mmio = {
1282	.read = openpic_summary_read,
1283	.write = openpic_summary_write,
1284	.start_addr = OPENPIC_SUMMARY_REG_START,
1285	.size = OPENPIC_SUMMARY_REG_SIZE,
1286};
1287
1288static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
1289{
1290	if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
1291		WARN(1, "kvm mpic: too many mmio regions\n");
1292		return;
1293	}
1294
1295	opp->mmio_regions[opp->num_mmio_regions++] = mr;
1296}
1297
1298static void fsl_common_init(struct openpic *opp)
1299{
1300	int i;
1301	int virq = MAX_SRC;
1302
1303	add_mmio_region(opp, &openpic_msi_mmio);
1304	add_mmio_region(opp, &openpic_summary_mmio);
1305
1306	opp->vid = VID_REVISION_1_2;
1307	opp->vir = VIR_GENERIC;
1308	opp->vector_mask = 0xFFFF;
1309	opp->tfrr_reset = 0;
1310	opp->ivpr_reset = IVPR_MASK_MASK;
1311	opp->idr_reset = 1 << 0;
1312	opp->max_irq = MAX_IRQ;
1313
1314	opp->irq_ipi0 = virq;
1315	virq += MAX_IPI;
1316	opp->irq_tim0 = virq;
1317	virq += MAX_TMR;
1318
1319	BUG_ON(virq > MAX_IRQ);
1320
1321	opp->irq_msi = 224;
1322
1323	for (i = 0; i < opp->fsl->max_ext; i++)
1324		opp->src[i].level = false;
1325
1326	/* Internal interrupts, including message and MSI */
1327	for (i = 16; i < MAX_SRC; i++) {
1328		opp->src[i].type = IRQ_TYPE_FSLINT;
1329		opp->src[i].level = true;
1330	}
1331
1332	/* timers and IPIs */
1333	for (i = MAX_SRC; i < virq; i++) {
1334		opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
1335		opp->src[i].level = false;
1336	}
1337}
1338
1339static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
1340{
1341	int i;
1342
1343	for (i = 0; i < opp->num_mmio_regions; i++) {
1344		const struct mem_reg *mr = opp->mmio_regions[i];
1345
1346		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1347			continue;
1348
1349		return mr->read(opp, addr - mr->start_addr, ptr);
1350	}
1351
1352	return -ENXIO;
1353}
1354
1355static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
1356{
1357	int i;
1358
1359	for (i = 0; i < opp->num_mmio_regions; i++) {
1360		const struct mem_reg *mr = opp->mmio_regions[i];
1361
1362		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1363			continue;
1364
1365		return mr->write(opp, addr - mr->start_addr, val);
1366	}
1367
1368	return -ENXIO;
1369}
1370
1371static int kvm_mpic_read(struct kvm_vcpu *vcpu,
1372			 struct kvm_io_device *this,
1373			 gpa_t addr, int len, void *ptr)
1374{
1375	struct openpic *opp = container_of(this, struct openpic, mmio);
1376	int ret;
1377	union {
1378		u32 val;
1379		u8 bytes[4];
1380	} u;
1381
1382	if (addr & (len - 1)) {
1383		pr_debug("%s: bad alignment %llx/%d\n",
1384			 __func__, addr, len);
1385		return -EINVAL;
1386	}
1387
1388	spin_lock_irq(&opp->lock);
1389	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
1390	spin_unlock_irq(&opp->lock);
1391
1392	/*
1393	 * Technically only 32-bit accesses are allowed, but be nice to
1394	 * people dumping registers a byte at a time -- it works in real
1395	 * hardware (reads only, not writes).
1396	 */
1397	if (len == 4) {
1398		*(u32 *)ptr = u.val;
1399		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
1400			 __func__, addr, ret, u.val);
1401	} else if (len == 1) {
1402		*(u8 *)ptr = u.bytes[addr & 3];
1403		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
1404			 __func__, addr, ret, u.bytes[addr & 3]);
1405	} else {
1406		pr_debug("%s: bad length %d\n", __func__, len);
1407		return -EINVAL;
1408	}
1409
1410	return ret;
1411}
1412
1413static int kvm_mpic_write(struct kvm_vcpu *vcpu,
1414			  struct kvm_io_device *this,
1415			  gpa_t addr, int len, const void *ptr)
1416{
1417	struct openpic *opp = container_of(this, struct openpic, mmio);
1418	int ret;
1419
1420	if (len != 4) {
1421		pr_debug("%s: bad length %d\n", __func__, len);
1422		return -EOPNOTSUPP;
1423	}
1424	if (addr & 3) {
1425		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
1426		return -EOPNOTSUPP;
1427	}
1428
1429	spin_lock_irq(&opp->lock);
1430	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
1431				      *(const u32 *)ptr);
1432	spin_unlock_irq(&opp->lock);
1433
1434	pr_debug("%s: addr %llx ret %d val %x\n",
1435		 __func__, addr, ret, *(const u32 *)ptr);
1436
1437	return ret;
1438}
1439
1440static const struct kvm_io_device_ops mpic_mmio_ops = {
1441	.read = kvm_mpic_read,
1442	.write = kvm_mpic_write,
1443};
1444
1445static void map_mmio(struct openpic *opp)
1446{
1447	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
1448
1449	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
1450				opp->reg_base, OPENPIC_REG_SIZE,
1451				&opp->mmio);
1452}
1453
1454static void unmap_mmio(struct openpic *opp)
1455{
1456	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
1457}
1458
1459static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
1460{
1461	u64 base;
1462
1463	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
1464		return -EFAULT;
1465
1466	if (base & 0x3ffff) {
1467		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
1468			 __func__, base);
1469		return -EINVAL;
1470	}
1471
1472	if (base == opp->reg_base)
1473		return 0;
1474
1475	mutex_lock(&opp->kvm->slots_lock);
1476
1477	unmap_mmio(opp);
1478	opp->reg_base = base;
1479
1480	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
1481		 __func__, base);
1482
1483	if (base == 0)
1484		goto out;
1485
1486	map_mmio(opp);
1487
1488out:
1489	mutex_unlock(&opp->kvm->slots_lock);
1490	return 0;
1491}
1492
1493#define ATTR_SET		0
1494#define ATTR_GET		1
1495
1496static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
1497{
1498	int ret;
1499
1500	if (addr & 3)
1501		return -ENXIO;
1502
1503	spin_lock_irq(&opp->lock);
1504
1505	if (type == ATTR_SET)
1506		ret = kvm_mpic_write_internal(opp, addr, *val);
1507	else
1508		ret = kvm_mpic_read_internal(opp, addr, val);
1509
1510	spin_unlock_irq(&opp->lock);
1511
1512	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
1513
1514	return ret;
1515}
1516
1517static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1518{
1519	struct openpic *opp = dev->private;
1520	u32 attr32;
1521
1522	switch (attr->group) {
1523	case KVM_DEV_MPIC_GRP_MISC:
1524		switch (attr->attr) {
1525		case KVM_DEV_MPIC_BASE_ADDR:
1526			return set_base_addr(opp, attr);
1527		}
1528
1529		break;
1530
1531	case KVM_DEV_MPIC_GRP_REGISTER:
1532		if (get_user(attr32, (u32 __user *)(long)attr->addr))
1533			return -EFAULT;
1534
1535		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
1536
1537	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1538		if (attr->attr > MAX_SRC)
1539			return -EINVAL;
1540
1541		if (get_user(attr32, (u32 __user *)(long)attr->addr))
1542			return -EFAULT;
1543
1544		if (attr32 != 0 && attr32 != 1)
1545			return -EINVAL;
1546
1547		spin_lock_irq(&opp->lock);
1548		openpic_set_irq(opp, attr->attr, attr32);
1549		spin_unlock_irq(&opp->lock);
1550		return 0;
1551	}
1552
1553	return -ENXIO;
1554}
1555
1556static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1557{
1558	struct openpic *opp = dev->private;
1559	u64 attr64;
1560	u32 attr32;
1561	int ret;
1562
1563	switch (attr->group) {
1564	case KVM_DEV_MPIC_GRP_MISC:
1565		switch (attr->attr) {
1566		case KVM_DEV_MPIC_BASE_ADDR:
1567			mutex_lock(&opp->kvm->slots_lock);
1568			attr64 = opp->reg_base;
1569			mutex_unlock(&opp->kvm->slots_lock);
1570
1571			if (copy_to_user((u64 __user *)(long)attr->addr,
1572					 &attr64, sizeof(u64)))
1573				return -EFAULT;
1574
1575			return 0;
1576		}
1577
1578		break;
1579
1580	case KVM_DEV_MPIC_GRP_REGISTER:
1581		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
1582		if (ret)
1583			return ret;
1584
1585		if (put_user(attr32, (u32 __user *)(long)attr->addr))
1586			return -EFAULT;
1587
1588		return 0;
1589
1590	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1591		if (attr->attr > MAX_SRC)
1592			return -EINVAL;
1593
1594		spin_lock_irq(&opp->lock);
1595		attr32 = opp->src[attr->attr].pending;
1596		spin_unlock_irq(&opp->lock);
1597
1598		if (put_user(attr32, (u32 __user *)(long)attr->addr))
1599			return -EFAULT;
1600
1601		return 0;
1602	}
1603
1604	return -ENXIO;
1605}
1606
1607static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1608{
1609	switch (attr->group) {
1610	case KVM_DEV_MPIC_GRP_MISC:
1611		switch (attr->attr) {
1612		case KVM_DEV_MPIC_BASE_ADDR:
1613			return 0;
1614		}
1615
1616		break;
1617
1618	case KVM_DEV_MPIC_GRP_REGISTER:
1619		return 0;
1620
1621	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1622		if (attr->attr > MAX_SRC)
1623			break;
1624
1625		return 0;
1626	}
1627
1628	return -ENXIO;
1629}
1630
1631static void mpic_destroy(struct kvm_device *dev)
1632{
1633	struct openpic *opp = dev->private;
1634
1635	dev->kvm->arch.mpic = NULL;
1636	kfree(opp);
1637	kfree(dev);
1638}
1639
1640static int mpic_set_default_irq_routing(struct openpic *opp)
1641{
1642	struct kvm_irq_routing_entry *routing;
1643
1644	/* Create a nop default map, so that dereferencing it still works */
1645	routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
1646	if (!routing)
1647		return -ENOMEM;
1648
1649	kvm_set_irq_routing(opp->kvm, routing, 0, 0);
1650
1651	kfree(routing);
1652	return 0;
1653}
1654
1655static int mpic_create(struct kvm_device *dev, u32 type)
1656{
1657	struct openpic *opp;
1658	int ret;
1659
1660	/* We only support one MPIC at a time for now */
1661	if (dev->kvm->arch.mpic)
1662		return -EINVAL;
1663
1664	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
1665	if (!opp)
1666		return -ENOMEM;
1667
1668	dev->private = opp;
1669	opp->kvm = dev->kvm;
1670	opp->dev = dev;
1671	opp->model = type;
1672	spin_lock_init(&opp->lock);
1673
1674	add_mmio_region(opp, &openpic_gbl_mmio);
1675	add_mmio_region(opp, &openpic_tmr_mmio);
1676	add_mmio_region(opp, &openpic_src_mmio);
1677	add_mmio_region(opp, &openpic_cpu_mmio);
1678
1679	switch (opp->model) {
1680	case KVM_DEV_TYPE_FSL_MPIC_20:
1681		opp->fsl = &fsl_mpic_20;
1682		opp->brr1 = 0x00400200;
1683		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
1684		opp->nb_irqs = 80;
1685		opp->mpic_mode_mask = GCR_MODE_MIXED;
1686
1687		fsl_common_init(opp);
1688
1689		break;
1690
1691	case KVM_DEV_TYPE_FSL_MPIC_42:
1692		opp->fsl = &fsl_mpic_42;
1693		opp->brr1 = 0x00400402;
1694		opp->flags |= OPENPIC_FLAG_ILR;
1695		opp->nb_irqs = 196;
1696		opp->mpic_mode_mask = GCR_MODE_PROXY;
1697
1698		fsl_common_init(opp);
1699
1700		break;
1701
1702	default:
1703		ret = -ENODEV;
1704		goto err;
1705	}
1706
1707	ret = mpic_set_default_irq_routing(opp);
1708	if (ret)
1709		goto err;
1710
1711	openpic_reset(opp);
1712
1713	smp_wmb();
1714	dev->kvm->arch.mpic = opp;
1715
1716	return 0;
1717
1718err:
1719	kfree(opp);
1720	return ret;
1721}
1722
1723struct kvm_device_ops kvm_mpic_ops = {
1724	.name = "kvm-mpic",
1725	.create = mpic_create,
1726	.destroy = mpic_destroy,
1727	.set_attr = mpic_set_attr,
1728	.get_attr = mpic_get_attr,
1729	.has_attr = mpic_has_attr,
1730};
1731
1732int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1733			     u32 cpu)
1734{
1735	struct openpic *opp = dev->private;
1736	int ret = 0;
1737
1738	if (dev->ops != &kvm_mpic_ops)
1739		return -EPERM;
1740	if (opp->kvm != vcpu->kvm)
1741		return -EPERM;
1742	if (cpu < 0 || cpu >= MAX_CPU)
1743		return -EPERM;
1744
1745	spin_lock_irq(&opp->lock);
1746
1747	if (opp->dst[cpu].vcpu) {
1748		ret = -EEXIST;
1749		goto out;
1750	}
1751	if (vcpu->arch.irq_type) {
1752		ret = -EBUSY;
1753		goto out;
1754	}
1755
1756	opp->dst[cpu].vcpu = vcpu;
1757	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
1758
1759	vcpu->arch.mpic = opp;
1760	vcpu->arch.irq_cpu_id = cpu;
1761	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
1762
1763	/* This might need to be changed if GCR gets extended */
1764	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
1765		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
1766
1767out:
1768	spin_unlock_irq(&opp->lock);
1769	return ret;
1770}
1771
1772/*
1773 * This should only happen immediately before the mpic is destroyed,
1774 * so we shouldn't need to worry about anything still trying to
1775 * access the vcpu pointer.
1776 */
1777void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
1778{
1779	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
1780
1781	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
1782}
1783
1784/*
1785 * Return value:
1786 *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
1787 *  = 0   Interrupt was coalesced (previous irq is still pending)
1788 *  > 0   Number of CPUs interrupt was delivered to
1789 */
1790static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
1791			struct kvm *kvm, int irq_source_id, int level,
1792			bool line_status)
1793{
1794	u32 irq = e->irqchip.pin;
1795	struct openpic *opp = kvm->arch.mpic;
1796	unsigned long flags;
1797
1798	spin_lock_irqsave(&opp->lock, flags);
1799	openpic_set_irq(opp, irq, level);
1800	spin_unlock_irqrestore(&opp->lock, flags);
1801
1802	/* All code paths we care about don't check for the return value */
1803	return 0;
1804}
1805
1806int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1807		struct kvm *kvm, int irq_source_id, int level, bool line_status)
1808{
1809	struct openpic *opp = kvm->arch.mpic;
1810	unsigned long flags;
1811
1812	spin_lock_irqsave(&opp->lock, flags);
1813
1814	/*
1815	 * XXX We ignore the target address for now, as we only support
1816	 *     a single MSI bank.
1817	 */
1818	openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
1819	spin_unlock_irqrestore(&opp->lock, flags);
1820
1821	/* All code paths we care about don't check for the return value */
1822	return 0;
1823}
1824
1825int kvm_set_routing_entry(struct kvm *kvm,
1826			  struct kvm_kernel_irq_routing_entry *e,
1827			  const struct kvm_irq_routing_entry *ue)
1828{
1829	int r = -EINVAL;
1830
1831	switch (ue->type) {
1832	case KVM_IRQ_ROUTING_IRQCHIP:
1833		e->set = mpic_set_irq;
1834		e->irqchip.irqchip = ue->u.irqchip.irqchip;
1835		e->irqchip.pin = ue->u.irqchip.pin;
1836		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
1837			goto out;
1838		break;
1839	case KVM_IRQ_ROUTING_MSI:
1840		e->set = kvm_set_msi;
1841		e->msi.address_lo = ue->u.msi.address_lo;
1842		e->msi.address_hi = ue->u.msi.address_hi;
1843		e->msi.data = ue->u.msi.data;
1844		break;
1845	default:
1846		goto out;
1847	}
1848
1849	r = 0;
1850out:
1851	return r;
1852}
1853