• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/x86/kvm/
1
2/*
3 * Local APIC virtualization
4 *
5 * Copyright (C) 2006 Qumranet, Inc.
6 * Copyright (C) 2007 Novell
7 * Copyright (C) 2007 Intel
8 * Copyright 2009 Red Hat, Inc. and/or its affilates.
9 *
10 * Authors:
11 *   Dor Laor <dor.laor@qumranet.com>
12 *   Gregory Haskins <ghaskins@novell.com>
13 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
14 *
15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
16 *
17 * This work is licensed under the terms of the GNU GPL, version 2.  See
18 * the COPYING file in the top-level directory.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/kvm.h>
23#include <linux/mm.h>
24#include <linux/highmem.h>
25#include <linux/smp.h>
26#include <linux/hrtimer.h>
27#include <linux/io.h>
28#include <linux/module.h>
29#include <linux/math64.h>
30#include <linux/slab.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33#include <asm/page.h>
34#include <asm/current.h>
35#include <asm/apicdef.h>
36#include <asm/atomic.h>
37#include "kvm_cache_regs.h"
38#include "irq.h"
39#include "trace.h"
40#include "x86.h"
41
42#ifndef CONFIG_X86_64
43#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
44#else
45#define mod_64(x, y) ((x) % (y))
46#endif
47
48#define PRId64 "d"
49#define PRIx64 "llx"
50#define PRIu64 "u"
51#define PRIo64 "o"
52
53#define APIC_BUS_CYCLE_NS 1
54
55/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
56#define apic_debug(fmt, arg...)
57
58#define APIC_LVT_NUM			6
59/* 14 is the version for Xeon and Pentium 8.4.8*/
60#define APIC_VERSION			(0x14UL | ((APIC_LVT_NUM - 1) << 16))
61#define LAPIC_MMIO_LENGTH		(1 << 12)
62/* followed define is not in apicdef.h */
63#define APIC_SHORT_MASK			0xc0000
64#define APIC_DEST_NOSHORT		0x0
65#define APIC_DEST_MASK			0x800
66#define MAX_APIC_VECTOR			256
67
68#define VEC_POS(v) ((v) & (32 - 1))
69#define REG_POS(v) (((v) >> 5) << 4)
70
71static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
72{
73	return *((u32 *) (apic->regs + reg_off));
74}
75
76static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
77{
78	*((u32 *) (apic->regs + reg_off)) = val;
79}
80
81static inline int apic_test_and_set_vector(int vec, void *bitmap)
82{
83	return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
84}
85
86static inline int apic_test_and_clear_vector(int vec, void *bitmap)
87{
88	return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
89}
90
91static inline void apic_set_vector(int vec, void *bitmap)
92{
93	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
94}
95
96static inline void apic_clear_vector(int vec, void *bitmap)
97{
98	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
99}
100
101static inline int apic_hw_enabled(struct kvm_lapic *apic)
102{
103	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
104}
105
106static inline int  apic_sw_enabled(struct kvm_lapic *apic)
107{
108	return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
109}
110
111static inline int apic_enabled(struct kvm_lapic *apic)
112{
113	return apic_sw_enabled(apic) &&	apic_hw_enabled(apic);
114}
115
116#define LVT_MASK	\
117	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
118
119#define LINT_MASK	\
120	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
121	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
122
123static inline int kvm_apic_id(struct kvm_lapic *apic)
124{
125	return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
126}
127
128static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
129{
130	return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
131}
132
133static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
134{
135	return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
136}
137
138static inline int apic_lvtt_period(struct kvm_lapic *apic)
139{
140	return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
141}
142
143static inline int apic_lvt_nmi_mode(u32 lvt_val)
144{
145	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
146}
147
148void kvm_apic_set_version(struct kvm_vcpu *vcpu)
149{
150	struct kvm_lapic *apic = vcpu->arch.apic;
151	struct kvm_cpuid_entry2 *feat;
152	u32 v = APIC_VERSION;
153
154	if (!irqchip_in_kernel(vcpu->kvm))
155		return;
156
157	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
158	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
159		v |= APIC_LVR_DIRECTED_EOI;
160	apic_set_reg(apic, APIC_LVR, v);
161}
162
163static inline int apic_x2apic_mode(struct kvm_lapic *apic)
164{
165	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
166}
167
168static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
169	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
170	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
171	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
172	LINT_MASK, LINT_MASK,	/* LVT0-1 */
173	LVT_MASK		/* LVTERR */
174};
175
176static int find_highest_vector(void *bitmap)
177{
178	u32 *word = bitmap;
179	int word_offset = MAX_APIC_VECTOR >> 5;
180
181	while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
182		continue;
183
184	if (likely(!word_offset && !word[0]))
185		return -1;
186	else
187		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
188}
189
190static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
191{
192	apic->irr_pending = true;
193	return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
194}
195
196static inline int apic_search_irr(struct kvm_lapic *apic)
197{
198	return find_highest_vector(apic->regs + APIC_IRR);
199}
200
201static inline int apic_find_highest_irr(struct kvm_lapic *apic)
202{
203	int result;
204
205	if (!apic->irr_pending)
206		return -1;
207
208	result = apic_search_irr(apic);
209	ASSERT(result == -1 || result >= 16);
210
211	return result;
212}
213
214static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
215{
216	apic->irr_pending = false;
217	apic_clear_vector(vec, apic->regs + APIC_IRR);
218	if (apic_search_irr(apic) != -1)
219		apic->irr_pending = true;
220}
221
222int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
223{
224	struct kvm_lapic *apic = vcpu->arch.apic;
225	int highest_irr;
226
227	/* This may race with setting of irr in __apic_accept_irq() and
228	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
229	 * will cause vmexit immediately and the value will be recalculated
230	 * on the next vmentry.
231	 */
232	if (!apic)
233		return 0;
234	highest_irr = apic_find_highest_irr(apic);
235
236	return highest_irr;
237}
238
239static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
240			     int vector, int level, int trig_mode);
241
242int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
243{
244	struct kvm_lapic *apic = vcpu->arch.apic;
245
246	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
247			irq->level, irq->trig_mode);
248}
249
250static inline int apic_find_highest_isr(struct kvm_lapic *apic)
251{
252	int result;
253
254	result = find_highest_vector(apic->regs + APIC_ISR);
255	ASSERT(result == -1 || result >= 16);
256
257	return result;
258}
259
260static void apic_update_ppr(struct kvm_lapic *apic)
261{
262	u32 tpr, isrv, ppr;
263	int isr;
264
265	tpr = apic_get_reg(apic, APIC_TASKPRI);
266	isr = apic_find_highest_isr(apic);
267	isrv = (isr != -1) ? isr : 0;
268
269	if ((tpr & 0xf0) >= (isrv & 0xf0))
270		ppr = tpr & 0xff;
271	else
272		ppr = isrv & 0xf0;
273
274	apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
275		   apic, ppr, isr, isrv);
276
277	apic_set_reg(apic, APIC_PROCPRI, ppr);
278}
279
280static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
281{
282	apic_set_reg(apic, APIC_TASKPRI, tpr);
283	apic_update_ppr(apic);
284}
285
286int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
287{
288	return dest == 0xff || kvm_apic_id(apic) == dest;
289}
290
291int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
292{
293	int result = 0;
294	u32 logical_id;
295
296	if (apic_x2apic_mode(apic)) {
297		logical_id = apic_get_reg(apic, APIC_LDR);
298		return logical_id & mda;
299	}
300
301	logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
302
303	switch (apic_get_reg(apic, APIC_DFR)) {
304	case APIC_DFR_FLAT:
305		if (logical_id & mda)
306			result = 1;
307		break;
308	case APIC_DFR_CLUSTER:
309		if (((logical_id >> 4) == (mda >> 0x4))
310		    && (logical_id & mda & 0xf))
311			result = 1;
312		break;
313	default:
314		printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
315		       apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
316		break;
317	}
318
319	return result;
320}
321
322int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
323			   int short_hand, int dest, int dest_mode)
324{
325	int result = 0;
326	struct kvm_lapic *target = vcpu->arch.apic;
327
328	apic_debug("target %p, source %p, dest 0x%x, "
329		   "dest_mode 0x%x, short_hand 0x%x\n",
330		   target, source, dest, dest_mode, short_hand);
331
332	ASSERT(target);
333	switch (short_hand) {
334	case APIC_DEST_NOSHORT:
335		if (dest_mode == 0)
336			/* Physical mode. */
337			result = kvm_apic_match_physical_addr(target, dest);
338		else
339			/* Logical mode. */
340			result = kvm_apic_match_logical_addr(target, dest);
341		break;
342	case APIC_DEST_SELF:
343		result = (target == source);
344		break;
345	case APIC_DEST_ALLINC:
346		result = 1;
347		break;
348	case APIC_DEST_ALLBUT:
349		result = (target != source);
350		break;
351	default:
352		printk(KERN_WARNING "Bad dest shorthand value %x\n",
353		       short_hand);
354		break;
355	}
356
357	return result;
358}
359
360/*
361 * Add a pending IRQ into lapic.
362 * Return 1 if successfully added and 0 if discarded.
363 */
364static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
365			     int vector, int level, int trig_mode)
366{
367	int result = 0;
368	struct kvm_vcpu *vcpu = apic->vcpu;
369
370	switch (delivery_mode) {
371	case APIC_DM_LOWEST:
372		vcpu->arch.apic_arb_prio++;
373	case APIC_DM_FIXED:
374		if (unlikely(!apic_enabled(apic)))
375			break;
376
377		if (trig_mode) {
378			apic_debug("level trig mode for vector %d", vector);
379			apic_set_vector(vector, apic->regs + APIC_TMR);
380		} else
381			apic_clear_vector(vector, apic->regs + APIC_TMR);
382
383		result = !apic_test_and_set_irr(vector, apic);
384		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
385					  trig_mode, vector, !result);
386		if (!result) {
387			if (trig_mode)
388				apic_debug("level trig mode repeatedly for "
389						"vector %d", vector);
390			break;
391		}
392
393		kvm_vcpu_kick(vcpu);
394		break;
395
396	case APIC_DM_REMRD:
397		printk(KERN_DEBUG "Ignoring delivery mode 3\n");
398		break;
399
400	case APIC_DM_SMI:
401		printk(KERN_DEBUG "Ignoring guest SMI\n");
402		break;
403
404	case APIC_DM_NMI:
405		result = 1;
406		kvm_inject_nmi(vcpu);
407		kvm_vcpu_kick(vcpu);
408		break;
409
410	case APIC_DM_INIT:
411		if (level) {
412			result = 1;
413			if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
414				printk(KERN_DEBUG
415				       "INIT on a runnable vcpu %d\n",
416				       vcpu->vcpu_id);
417			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
418			kvm_vcpu_kick(vcpu);
419		} else {
420			apic_debug("Ignoring de-assert INIT to vcpu %d\n",
421				   vcpu->vcpu_id);
422		}
423		break;
424
425	case APIC_DM_STARTUP:
426		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
427			   vcpu->vcpu_id, vector);
428		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
429			result = 1;
430			vcpu->arch.sipi_vector = vector;
431			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
432			kvm_vcpu_kick(vcpu);
433		}
434		break;
435
436	case APIC_DM_EXTINT:
437		/*
438		 * Should only be called by kvm_apic_local_deliver() with LVT0,
439		 * before NMI watchdog was enabled. Already handled by
440		 * kvm_apic_accept_pic_intr().
441		 */
442		break;
443
444	default:
445		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
446		       delivery_mode);
447		break;
448	}
449	return result;
450}
451
452int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
453{
454	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
455}
456
457static void apic_set_eoi(struct kvm_lapic *apic)
458{
459	int vector = apic_find_highest_isr(apic);
460	int trigger_mode;
461	/*
462	 * Not every write EOI will has corresponding ISR,
463	 * one example is when Kernel check timer on setup_IO_APIC
464	 */
465	if (vector == -1)
466		return;
467
468	apic_clear_vector(vector, apic->regs + APIC_ISR);
469	apic_update_ppr(apic);
470
471	if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
472		trigger_mode = IOAPIC_LEVEL_TRIG;
473	else
474		trigger_mode = IOAPIC_EDGE_TRIG;
475	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
476		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
477}
478
479static void apic_send_ipi(struct kvm_lapic *apic)
480{
481	u32 icr_low = apic_get_reg(apic, APIC_ICR);
482	u32 icr_high = apic_get_reg(apic, APIC_ICR2);
483	struct kvm_lapic_irq irq;
484
485	irq.vector = icr_low & APIC_VECTOR_MASK;
486	irq.delivery_mode = icr_low & APIC_MODE_MASK;
487	irq.dest_mode = icr_low & APIC_DEST_MASK;
488	irq.level = icr_low & APIC_INT_ASSERT;
489	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
490	irq.shorthand = icr_low & APIC_SHORT_MASK;
491	if (apic_x2apic_mode(apic))
492		irq.dest_id = icr_high;
493	else
494		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
495
496	trace_kvm_apic_ipi(icr_low, irq.dest_id);
497
498	apic_debug("icr_high 0x%x, icr_low 0x%x, "
499		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
500		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
501		   icr_high, icr_low, irq.shorthand, irq.dest_id,
502		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
503		   irq.vector);
504
505	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
506}
507
508static u32 apic_get_tmcct(struct kvm_lapic *apic)
509{
510	ktime_t remaining;
511	s64 ns;
512	u32 tmcct;
513
514	ASSERT(apic != NULL);
515
516	/* if initial count is 0, current count should also be 0 */
517	if (apic_get_reg(apic, APIC_TMICT) == 0)
518		return 0;
519
520	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
521	if (ktime_to_ns(remaining) < 0)
522		remaining = ktime_set(0, 0);
523
524	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
525	tmcct = div64_u64(ns,
526			 (APIC_BUS_CYCLE_NS * apic->divide_count));
527
528	return tmcct;
529}
530
531static void __report_tpr_access(struct kvm_lapic *apic, bool write)
532{
533	struct kvm_vcpu *vcpu = apic->vcpu;
534	struct kvm_run *run = vcpu->run;
535
536	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
537	run->tpr_access.rip = kvm_rip_read(vcpu);
538	run->tpr_access.is_write = write;
539}
540
541static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
542{
543	if (apic->vcpu->arch.tpr_access_reporting)
544		__report_tpr_access(apic, write);
545}
546
547static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
548{
549	u32 val = 0;
550
551	if (offset >= LAPIC_MMIO_LENGTH)
552		return 0;
553
554	switch (offset) {
555	case APIC_ID:
556		if (apic_x2apic_mode(apic))
557			val = kvm_apic_id(apic);
558		else
559			val = kvm_apic_id(apic) << 24;
560		break;
561	case APIC_ARBPRI:
562		printk(KERN_WARNING "Access APIC ARBPRI register "
563		       "which is for P6\n");
564		break;
565
566	case APIC_TMCCT:	/* Timer CCR */
567		val = apic_get_tmcct(apic);
568		break;
569
570	case APIC_TASKPRI:
571		report_tpr_access(apic, false);
572		/* fall thru */
573	default:
574		apic_update_ppr(apic);
575		val = apic_get_reg(apic, offset);
576		break;
577	}
578
579	return val;
580}
581
582static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
583{
584	return container_of(dev, struct kvm_lapic, dev);
585}
586
587static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
588		void *data)
589{
590	unsigned char alignment = offset & 0xf;
591	u32 result;
592	/* this bitmask has a bit cleared for each reserver register */
593	static const u64 rmask = 0x43ff01ffffffe70cULL;
594
595	if ((alignment + len) > 4) {
596		apic_debug("KVM_APIC_READ: alignment error %x %d\n",
597			   offset, len);
598		return 1;
599	}
600
601	if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
602		apic_debug("KVM_APIC_READ: read reserved register %x\n",
603			   offset);
604		return 1;
605	}
606
607	result = __apic_read(apic, offset & ~0xf);
608
609	trace_kvm_apic_read(offset, result);
610
611	switch (len) {
612	case 1:
613	case 2:
614	case 4:
615		memcpy(data, (char *)&result + alignment, len);
616		break;
617	default:
618		printk(KERN_ERR "Local APIC read with len = %x, "
619		       "should be 1,2, or 4 instead\n", len);
620		break;
621	}
622	return 0;
623}
624
625static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
626{
627	return apic_hw_enabled(apic) &&
628	    addr >= apic->base_address &&
629	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
630}
631
632static int apic_mmio_read(struct kvm_io_device *this,
633			   gpa_t address, int len, void *data)
634{
635	struct kvm_lapic *apic = to_lapic(this);
636	u32 offset = address - apic->base_address;
637
638	if (!apic_mmio_in_range(apic, address))
639		return -EOPNOTSUPP;
640
641	apic_reg_read(apic, offset, len, data);
642
643	return 0;
644}
645
646static void update_divide_count(struct kvm_lapic *apic)
647{
648	u32 tmp1, tmp2, tdcr;
649
650	tdcr = apic_get_reg(apic, APIC_TDCR);
651	tmp1 = tdcr & 0xf;
652	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
653	apic->divide_count = 0x1 << (tmp2 & 0x7);
654
655	apic_debug("timer divide count is 0x%x\n",
656				   apic->divide_count);
657}
658
659static void start_apic_timer(struct kvm_lapic *apic)
660{
661	ktime_t now = apic->lapic_timer.timer.base->get_time();
662
663	apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
664		    APIC_BUS_CYCLE_NS * apic->divide_count;
665	atomic_set(&apic->lapic_timer.pending, 0);
666
667	if (!apic->lapic_timer.period)
668		return;
669	/*
670	 * Do not allow the guest to program periodic timers with small
671	 * interval, since the hrtimers are not throttled by the host
672	 * scheduler.
673	 */
674	if (apic_lvtt_period(apic)) {
675		if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
676			apic->lapic_timer.period = NSEC_PER_MSEC/2;
677	}
678
679	hrtimer_start(&apic->lapic_timer.timer,
680		      ktime_add_ns(now, apic->lapic_timer.period),
681		      HRTIMER_MODE_ABS);
682
683	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
684			   PRIx64 ", "
685			   "timer initial count 0x%x, period %lldns, "
686			   "expire @ 0x%016" PRIx64 ".\n", __func__,
687			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
688			   apic_get_reg(apic, APIC_TMICT),
689			   apic->lapic_timer.period,
690			   ktime_to_ns(ktime_add_ns(now,
691					apic->lapic_timer.period)));
692}
693
694static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
695{
696	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
697
698	if (apic_lvt_nmi_mode(lvt0_val)) {
699		if (!nmi_wd_enabled) {
700			apic_debug("Receive NMI setting on APIC_LVT0 "
701				   "for cpu %d\n", apic->vcpu->vcpu_id);
702			apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
703		}
704	} else if (nmi_wd_enabled)
705		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
706}
707
708static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
709{
710	int ret = 0;
711
712	trace_kvm_apic_write(reg, val);
713
714	switch (reg) {
715	case APIC_ID:		/* Local APIC ID */
716		if (!apic_x2apic_mode(apic))
717			apic_set_reg(apic, APIC_ID, val);
718		else
719			ret = 1;
720		break;
721
722	case APIC_TASKPRI:
723		report_tpr_access(apic, true);
724		apic_set_tpr(apic, val & 0xff);
725		break;
726
727	case APIC_EOI:
728		apic_set_eoi(apic);
729		break;
730
731	case APIC_LDR:
732		if (!apic_x2apic_mode(apic))
733			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
734		else
735			ret = 1;
736		break;
737
738	case APIC_DFR:
739		if (!apic_x2apic_mode(apic))
740			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
741		else
742			ret = 1;
743		break;
744
745	case APIC_SPIV: {
746		u32 mask = 0x3ff;
747		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
748			mask |= APIC_SPIV_DIRECTED_EOI;
749		apic_set_reg(apic, APIC_SPIV, val & mask);
750		if (!(val & APIC_SPIV_APIC_ENABLED)) {
751			int i;
752			u32 lvt_val;
753
754			for (i = 0; i < APIC_LVT_NUM; i++) {
755				lvt_val = apic_get_reg(apic,
756						       APIC_LVTT + 0x10 * i);
757				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
758					     lvt_val | APIC_LVT_MASKED);
759			}
760			atomic_set(&apic->lapic_timer.pending, 0);
761
762		}
763		break;
764	}
765	case APIC_ICR:
766		/* No delay here, so we always clear the pending bit */
767		apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
768		apic_send_ipi(apic);
769		break;
770
771	case APIC_ICR2:
772		if (!apic_x2apic_mode(apic))
773			val &= 0xff000000;
774		apic_set_reg(apic, APIC_ICR2, val);
775		break;
776
777	case APIC_LVT0:
778		apic_manage_nmi_watchdog(apic, val);
779	case APIC_LVTT:
780	case APIC_LVTTHMR:
781	case APIC_LVTPC:
782	case APIC_LVT1:
783	case APIC_LVTERR:
784		/* TODO: Check vector */
785		if (!apic_sw_enabled(apic))
786			val |= APIC_LVT_MASKED;
787
788		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
789		apic_set_reg(apic, reg, val);
790
791		break;
792
793	case APIC_TMICT:
794		hrtimer_cancel(&apic->lapic_timer.timer);
795		apic_set_reg(apic, APIC_TMICT, val);
796		start_apic_timer(apic);
797		break;
798
799	case APIC_TDCR:
800		if (val & 4)
801			printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
802		apic_set_reg(apic, APIC_TDCR, val);
803		update_divide_count(apic);
804		break;
805
806	case APIC_ESR:
807		if (apic_x2apic_mode(apic) && val != 0) {
808			printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
809			ret = 1;
810		}
811		break;
812
813	case APIC_SELF_IPI:
814		if (apic_x2apic_mode(apic)) {
815			apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
816		} else
817			ret = 1;
818		break;
819	default:
820		ret = 1;
821		break;
822	}
823	if (ret)
824		apic_debug("Local APIC Write to read-only register %x\n", reg);
825	return ret;
826}
827
828static int apic_mmio_write(struct kvm_io_device *this,
829			    gpa_t address, int len, const void *data)
830{
831	struct kvm_lapic *apic = to_lapic(this);
832	unsigned int offset = address - apic->base_address;
833	u32 val;
834
835	if (!apic_mmio_in_range(apic, address))
836		return -EOPNOTSUPP;
837
838	/*
839	 * APIC register must be aligned on 128-bits boundary.
840	 * 32/64/128 bits registers must be accessed thru 32 bits.
841	 * Refer SDM 8.4.1
842	 */
843	if (len != 4 || (offset & 0xf)) {
844		/* Don't shout loud, $infamous_os would cause only noise. */
845		apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
846		return 0;
847	}
848
849	val = *(u32*)data;
850
851	/* too common printing */
852	if (offset != APIC_EOI)
853		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
854			   "0x%x\n", __func__, offset, len, val);
855
856	apic_reg_write(apic, offset & 0xff0, val);
857
858	return 0;
859}
860
861void kvm_free_lapic(struct kvm_vcpu *vcpu)
862{
863	if (!vcpu->arch.apic)
864		return;
865
866	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
867
868	if (vcpu->arch.apic->regs_page)
869		__free_page(vcpu->arch.apic->regs_page);
870
871	kfree(vcpu->arch.apic);
872}
873
874/*
875 *----------------------------------------------------------------------
876 * LAPIC interface
877 *----------------------------------------------------------------------
878 */
879
880void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
881{
882	struct kvm_lapic *apic = vcpu->arch.apic;
883
884	if (!apic)
885		return;
886	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
887		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
888}
889
890u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
891{
892	struct kvm_lapic *apic = vcpu->arch.apic;
893	u64 tpr;
894
895	if (!apic)
896		return 0;
897	tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
898
899	return (tpr & 0xf0) >> 4;
900}
901
902void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
903{
904	struct kvm_lapic *apic = vcpu->arch.apic;
905
906	if (!apic) {
907		value |= MSR_IA32_APICBASE_BSP;
908		vcpu->arch.apic_base = value;
909		return;
910	}
911
912	if (!kvm_vcpu_is_bsp(apic->vcpu))
913		value &= ~MSR_IA32_APICBASE_BSP;
914
915	vcpu->arch.apic_base = value;
916	if (apic_x2apic_mode(apic)) {
917		u32 id = kvm_apic_id(apic);
918		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
919		apic_set_reg(apic, APIC_LDR, ldr);
920	}
921	apic->base_address = apic->vcpu->arch.apic_base &
922			     MSR_IA32_APICBASE_BASE;
923
924	/* with FSB delivery interrupt, we can restart APIC functionality */
925	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
926		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
927
928}
929
930void kvm_lapic_reset(struct kvm_vcpu *vcpu)
931{
932	struct kvm_lapic *apic;
933	int i;
934
935	apic_debug("%s\n", __func__);
936
937	ASSERT(vcpu);
938	apic = vcpu->arch.apic;
939	ASSERT(apic != NULL);
940
941	/* Stop the timer in case it's a reset to an active apic */
942	hrtimer_cancel(&apic->lapic_timer.timer);
943
944	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
945	kvm_apic_set_version(apic->vcpu);
946
947	for (i = 0; i < APIC_LVT_NUM; i++)
948		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
949	apic_set_reg(apic, APIC_LVT0,
950		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
951
952	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
953	apic_set_reg(apic, APIC_SPIV, 0xff);
954	apic_set_reg(apic, APIC_TASKPRI, 0);
955	apic_set_reg(apic, APIC_LDR, 0);
956	apic_set_reg(apic, APIC_ESR, 0);
957	apic_set_reg(apic, APIC_ICR, 0);
958	apic_set_reg(apic, APIC_ICR2, 0);
959	apic_set_reg(apic, APIC_TDCR, 0);
960	apic_set_reg(apic, APIC_TMICT, 0);
961	for (i = 0; i < 8; i++) {
962		apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
963		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
964		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
965	}
966	apic->irr_pending = false;
967	update_divide_count(apic);
968	atomic_set(&apic->lapic_timer.pending, 0);
969	if (kvm_vcpu_is_bsp(vcpu))
970		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
971	apic_update_ppr(apic);
972
973	vcpu->arch.apic_arb_prio = 0;
974
975	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
976		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
977		   vcpu, kvm_apic_id(apic),
978		   vcpu->arch.apic_base, apic->base_address);
979}
980
981bool kvm_apic_present(struct kvm_vcpu *vcpu)
982{
983	return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
984}
985
986int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
987{
988	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
989}
990
991/*
992 *----------------------------------------------------------------------
993 * timer interface
994 *----------------------------------------------------------------------
995 */
996
997static bool lapic_is_periodic(struct kvm_timer *ktimer)
998{
999	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
1000					      lapic_timer);
1001	return apic_lvtt_period(apic);
1002}
1003
1004int apic_has_pending_timer(struct kvm_vcpu *vcpu)
1005{
1006	struct kvm_lapic *lapic = vcpu->arch.apic;
1007
1008	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
1009		return atomic_read(&lapic->lapic_timer.pending);
1010
1011	return 0;
1012}
1013
1014static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1015{
1016	u32 reg = apic_get_reg(apic, lvt_type);
1017	int vector, mode, trig_mode;
1018
1019	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
1020		vector = reg & APIC_VECTOR_MASK;
1021		mode = reg & APIC_MODE_MASK;
1022		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1023		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
1024	}
1025	return 0;
1026}
1027
1028void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
1029{
1030	struct kvm_lapic *apic = vcpu->arch.apic;
1031
1032	if (apic)
1033		kvm_apic_local_deliver(apic, APIC_LVT0);
1034}
1035
1036static struct kvm_timer_ops lapic_timer_ops = {
1037	.is_periodic = lapic_is_periodic,
1038};
1039
1040static const struct kvm_io_device_ops apic_mmio_ops = {
1041	.read     = apic_mmio_read,
1042	.write    = apic_mmio_write,
1043};
1044
1045int kvm_create_lapic(struct kvm_vcpu *vcpu)
1046{
1047	struct kvm_lapic *apic;
1048
1049	ASSERT(vcpu != NULL);
1050	apic_debug("apic_init %d\n", vcpu->vcpu_id);
1051
1052	apic = kzalloc(sizeof(*apic), GFP_KERNEL);
1053	if (!apic)
1054		goto nomem;
1055
1056	vcpu->arch.apic = apic;
1057
1058	apic->regs_page = alloc_page(GFP_KERNEL);
1059	if (apic->regs_page == NULL) {
1060		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1061		       vcpu->vcpu_id);
1062		goto nomem_free_apic;
1063	}
1064	apic->regs = page_address(apic->regs_page);
1065	memset(apic->regs, 0, PAGE_SIZE);
1066	apic->vcpu = vcpu;
1067
1068	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
1069		     HRTIMER_MODE_ABS);
1070	apic->lapic_timer.timer.function = kvm_timer_fn;
1071	apic->lapic_timer.t_ops = &lapic_timer_ops;
1072	apic->lapic_timer.kvm = vcpu->kvm;
1073	apic->lapic_timer.vcpu = vcpu;
1074
1075	apic->base_address = APIC_DEFAULT_PHYS_BASE;
1076	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
1077
1078	kvm_lapic_reset(vcpu);
1079	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
1080
1081	return 0;
1082nomem_free_apic:
1083	kfree(apic);
1084nomem:
1085	return -ENOMEM;
1086}
1087
1088int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
1089{
1090	struct kvm_lapic *apic = vcpu->arch.apic;
1091	int highest_irr;
1092
1093	if (!apic || !apic_enabled(apic))
1094		return -1;
1095
1096	apic_update_ppr(apic);
1097	highest_irr = apic_find_highest_irr(apic);
1098	if ((highest_irr == -1) ||
1099	    ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
1100		return -1;
1101	return highest_irr;
1102}
1103
1104int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
1105{
1106	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
1107	int r = 0;
1108
1109	if (!apic_hw_enabled(vcpu->arch.apic))
1110		r = 1;
1111	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
1112	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
1113		r = 1;
1114	return r;
1115}
1116
1117void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1118{
1119	struct kvm_lapic *apic = vcpu->arch.apic;
1120
1121	if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
1122		if (kvm_apic_local_deliver(apic, APIC_LVTT))
1123			atomic_dec(&apic->lapic_timer.pending);
1124	}
1125}
1126
1127int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1128{
1129	int vector = kvm_apic_has_interrupt(vcpu);
1130	struct kvm_lapic *apic = vcpu->arch.apic;
1131
1132	if (vector == -1)
1133		return -1;
1134
1135	apic_set_vector(vector, apic->regs + APIC_ISR);
1136	apic_update_ppr(apic);
1137	apic_clear_irr(vector, apic);
1138	return vector;
1139}
1140
1141void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
1142{
1143	struct kvm_lapic *apic = vcpu->arch.apic;
1144
1145	apic->base_address = vcpu->arch.apic_base &
1146			     MSR_IA32_APICBASE_BASE;
1147	kvm_apic_set_version(vcpu);
1148
1149	apic_update_ppr(apic);
1150	hrtimer_cancel(&apic->lapic_timer.timer);
1151	update_divide_count(apic);
1152	start_apic_timer(apic);
1153	apic->irr_pending = true;
1154}
1155
1156void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1157{
1158	struct kvm_lapic *apic = vcpu->arch.apic;
1159	struct hrtimer *timer;
1160
1161	if (!apic)
1162		return;
1163
1164	timer = &apic->lapic_timer.timer;
1165	if (hrtimer_cancel(timer))
1166		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
1167}
1168
1169void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1170{
1171	u32 data;
1172	void *vapic;
1173
1174	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
1175		return;
1176
1177	vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
1178	data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
1179	kunmap_atomic(vapic, KM_USER0);
1180
1181	apic_set_tpr(vcpu->arch.apic, data & 0xff);
1182}
1183
1184void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1185{
1186	u32 data, tpr;
1187	int max_irr, max_isr;
1188	struct kvm_lapic *apic;
1189	void *vapic;
1190
1191	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
1192		return;
1193
1194	apic = vcpu->arch.apic;
1195	tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
1196	max_irr = apic_find_highest_irr(apic);
1197	if (max_irr < 0)
1198		max_irr = 0;
1199	max_isr = apic_find_highest_isr(apic);
1200	if (max_isr < 0)
1201		max_isr = 0;
1202	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
1203
1204	vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
1205	*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
1206	kunmap_atomic(vapic, KM_USER0);
1207}
1208
1209void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
1210{
1211	if (!irqchip_in_kernel(vcpu->kvm))
1212		return;
1213
1214	vcpu->arch.apic->vapic_addr = vapic_addr;
1215}
1216
1217int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1218{
1219	struct kvm_lapic *apic = vcpu->arch.apic;
1220	u32 reg = (msr - APIC_BASE_MSR) << 4;
1221
1222	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1223		return 1;
1224
1225	/* if this is ICR write vector before command */
1226	if (msr == 0x830)
1227		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1228	return apic_reg_write(apic, reg, (u32)data);
1229}
1230
1231int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
1232{
1233	struct kvm_lapic *apic = vcpu->arch.apic;
1234	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
1235
1236	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1237		return 1;
1238
1239	if (apic_reg_read(apic, reg, 4, &low))
1240		return 1;
1241	if (msr == 0x830)
1242		apic_reg_read(apic, APIC_ICR2, 4, &high);
1243
1244	*data = (((u64)high) << 32) | low;
1245
1246	return 0;
1247}
1248
1249int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
1250{
1251	struct kvm_lapic *apic = vcpu->arch.apic;
1252
1253	if (!irqchip_in_kernel(vcpu->kvm))
1254		return 1;
1255
1256	/* if this is ICR write vector before command */
1257	if (reg == APIC_ICR)
1258		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1259	return apic_reg_write(apic, reg, (u32)data);
1260}
1261
1262int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
1263{
1264	struct kvm_lapic *apic = vcpu->arch.apic;
1265	u32 low, high = 0;
1266
1267	if (!irqchip_in_kernel(vcpu->kvm))
1268		return 1;
1269
1270	if (apic_reg_read(apic, reg, 4, &low))
1271		return 1;
1272	if (reg == APIC_ICR)
1273		apic_reg_read(apic, APIC_ICR2, 4, &high);
1274
1275	*data = (((u64)high) << 32) | low;
1276
1277	return 0;
1278}
1279