vlapic.c revision 268891
150472Speter/*-
237Srgrimes * Copyright (c) 2011 NetApp, Inc.
337Srgrimes * All rights reserved.
437Srgrimes *
581020Srwatson * Redistribution and use in source and binary forms, with or without
681020Srwatson * modification, are permitted provided that the following conditions
781020Srwatson * are met:
837Srgrimes * 1. Redistributions of source code must retain the above copyright
981020Srwatson *    notice, this list of conditions and the following disclaimer.
1081020Srwatson * 2. Redistributions in binary form must reproduce the above copyright
1181020Srwatson *    notice, this list of conditions and the following disclaimer in the
1281020Srwatson *    documentation and/or other materials provided with the distribution.
1366621Skris *
1475017Speter * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
1566621Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1675017Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1766568Sjkh * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
1875017Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
193190Spst * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2018639Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2137Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2281020Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2341444Sdillon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2441444Sdillon * SUCH DAMAGE.
2581020Srwatson *
2681020Srwatson * $FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 268891 2014-07-19 22:06:46Z jhb $
2781020Srwatson */
2881020Srwatson
2918639Spst#include <sys/cdefs.h>
3018639Spst__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 268891 2014-07-19 22:06:46Z jhb $");
3118639Spst
3218639Spst#include <sys/param.h>
3318639Spst#include <sys/lock.h>
3418639Spst#include <sys/kernel.h>
3518639Spst#include <sys/malloc.h>
3618639Spst#include <sys/mutex.h>
3775017Speter#include <sys/systm.h>
3818639Spst#include <sys/smp.h>
3975017Speter
4018639Spst#include <x86/specialreg.h>
4175017Speter#include <x86/apicreg.h>
4218639Spst
4375017Speter#include <machine/clock.h>
4418639Spst#include <machine/smp.h>
4575017Speter
465183Swollman#include <machine/vmm.h>
4775017Speter
4818639Spst#include "vmm_ipi.h"
4975017Speter#include "vmm_lapic.h"
5018639Spst#include "vmm_ktr.h"
5175017Speter#include "vmm_stat.h"
5218639Spst
5375017Speter#include "vlapic.h"
5418639Spst#include "vlapic_priv.h"
5575017Speter#include "vioapic.h"
5618639Spst
5737Srgrimes#define	PRIO(x)			((x) >> 4)
5818639Spst
5919607Speter#define VLAPIC_VERSION		(16)
6019607Speter
6119607Speter#define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
6238337Smarkm
6318639Spst/*
6455115Speter * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
6555115Speter * vlapic_callout_handler() and vcpu accesses to:
6655115Speter * - timer_freq_bt, timer_period_bt, timer_fire_bt
6718639Spst * - timer LVT register
6855115Speter */
6955115Speter#define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
70591Srgrimes#define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
7118639Spst#define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
72831Sats
7318639Spst#define VLAPIC_BUS_FREQ	tsc_freq
7418639Spst
7518639Spststatic __inline uint32_t
7618639Spstvlapic_get_id(struct vlapic *vlapic)
7718639Spst{
7818639Spst
7918639Spst	if (x2apic(vlapic))
8018639Spst		return (vlapic->vcpuid);
8118639Spst	else
824652Sats		return (vlapic->vcpuid << 24);
83831Sats}
8421613Sache
8521613Sachestatic uint32_t
8621613Sachex2apic_ldr(struct vlapic *vlapic)
8721613Sache{
8848845Ssheldonh	int apicid;
8940911Sphk	uint32_t ldr;
9048845Ssheldonh
9175017Speter	apicid = vlapic_get_id(vlapic);
9240911Sphk	ldr = 1 << (apicid & 0xf);
9349034Ssheldonh	ldr |= (apicid & 0xffff0) << 12;
9449059Sgreen	return (ldr);
9549059Sgreen}
9618639Spst
9749059Sgreenvoid
9875017Spetervlapic_dfr_write_handler(struct vlapic *vlapic)
9929951Sjkh{
10048845Ssheldonh	struct LAPIC *lapic;
10148845Ssheldonh
10248846Sgreen	lapic = vlapic->apic_page;
10348845Ssheldonh	if (x2apic(vlapic)) {
10455779Sdbaker		VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
10555779Sdbaker		    lapic->dfr);
10655779Sdbaker		lapic->dfr = 0;
10755779Sdbaker		return;
10837741Shoek	}
10937741Shoek
11037741Shoek	lapic->dfr &= APIC_DFR_MODEL_MASK;
11129951Sjkh	lapic->dfr |= APIC_DFR_RESERVED;
11229951Sjkh
11329951Sjkh	if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
11429951Sjkh		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
11529951Sjkh	else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
116		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
117	else
118		VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
119}
120
121void
122vlapic_ldr_write_handler(struct vlapic *vlapic)
123{
124	struct LAPIC *lapic;
125
126	lapic = vlapic->apic_page;
127
128	/* LDR is read-only in x2apic mode */
129	if (x2apic(vlapic)) {
130		VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
131		    lapic->ldr);
132		lapic->ldr = x2apic_ldr(vlapic);
133	} else {
134		lapic->ldr &= ~APIC_LDR_RESERVED;
135		VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
136	}
137}
138
139void
140vlapic_id_write_handler(struct vlapic *vlapic)
141{
142	struct LAPIC *lapic;
143
144	/*
145	 * We don't allow the ID register to be modified so reset it back to
146	 * its default value.
147	 */
148	lapic = vlapic->apic_page;
149	lapic->id = vlapic_get_id(vlapic);
150}
151
152static int
153vlapic_timer_divisor(uint32_t dcr)
154{
155	switch (dcr & 0xB) {
156	case APIC_TDCR_1:
157		return (1);
158	case APIC_TDCR_2:
159		return (2);
160	case APIC_TDCR_4:
161		return (4);
162	case APIC_TDCR_8:
163		return (8);
164	case APIC_TDCR_16:
165		return (16);
166	case APIC_TDCR_32:
167		return (32);
168	case APIC_TDCR_64:
169		return (64);
170	case APIC_TDCR_128:
171		return (128);
172	default:
173		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
174	}
175}
176
177#if 0
178static inline void
179vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
180{
181	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
182	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
183	    *lvt & APIC_LVTT_M);
184}
185#endif
186
187static uint32_t
188vlapic_get_ccr(struct vlapic *vlapic)
189{
190	struct bintime bt_now, bt_rem;
191	struct LAPIC *lapic;
192	uint32_t ccr;
193
194	ccr = 0;
195	lapic = vlapic->apic_page;
196
197	VLAPIC_TIMER_LOCK(vlapic);
198	if (callout_active(&vlapic->callout)) {
199		/*
200		 * If the timer is scheduled to expire in the future then
201		 * compute the value of 'ccr' based on the remaining time.
202		 */
203		binuptime(&bt_now);
204		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
205			bt_rem = vlapic->timer_fire_bt;
206			bintime_sub(&bt_rem, &bt_now);
207			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
208			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
209		}
210	}
211	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
212	    "icr_timer is %#x", ccr, lapic->icr_timer));
213	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
214	    ccr, lapic->icr_timer);
215	VLAPIC_TIMER_UNLOCK(vlapic);
216	return (ccr);
217}
218
219void
220vlapic_dcr_write_handler(struct vlapic *vlapic)
221{
222	struct LAPIC *lapic;
223	int divisor;
224
225	lapic = vlapic->apic_page;
226	VLAPIC_TIMER_LOCK(vlapic);
227
228	divisor = vlapic_timer_divisor(lapic->dcr_timer);
229	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
230	    lapic->dcr_timer, divisor);
231
232	/*
233	 * Update the timer frequency and the timer period.
234	 *
235	 * XXX changes to the frequency divider will not take effect until
236	 * the timer is reloaded.
237	 */
238	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
239	vlapic->timer_period_bt = vlapic->timer_freq_bt;
240	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
241
242	VLAPIC_TIMER_UNLOCK(vlapic);
243}
244
245void
246vlapic_esr_write_handler(struct vlapic *vlapic)
247{
248	struct LAPIC *lapic;
249
250	lapic = vlapic->apic_page;
251	lapic->esr = vlapic->esr_pending;
252	vlapic->esr_pending = 0;
253}
254
255int
256vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
257{
258	struct LAPIC *lapic;
259	uint32_t *irrptr, *tmrptr, mask;
260	int idx;
261
262	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
263
264	lapic = vlapic->apic_page;
265	if (!(lapic->svr & APIC_SVR_ENABLE)) {
266		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
267		    "interrupt %d", vector);
268		return (0);
269	}
270
271	if (vector < 16) {
272		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
273		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
274		    vector);
275		return (1);
276	}
277
278	if (vlapic->ops.set_intr_ready)
279		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
280
281	idx = (vector / 32) * 4;
282	mask = 1 << (vector % 32);
283
284	irrptr = &lapic->irr0;
285	atomic_set_int(&irrptr[idx], mask);
286
287	/*
288	 * Verify that the trigger-mode of the interrupt matches with
289	 * the vlapic TMR registers.
290	 */
291	tmrptr = &lapic->tmr0;
292	if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
293		VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
294		    "interrupt is %s-triggered", idx / 4, tmrptr[idx],
295		    level ? "level" : "edge");
296	}
297
298	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
299	return (1);
300}
301
302static __inline uint32_t *
303vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
304{
305	struct LAPIC	*lapic = vlapic->apic_page;
306	int 		 i;
307
308	switch (offset) {
309	case APIC_OFFSET_CMCI_LVT:
310		return (&lapic->lvt_cmci);
311	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
312		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
313		return ((&lapic->lvt_timer) + i);;
314	default:
315		panic("vlapic_get_lvt: invalid LVT\n");
316	}
317}
318
319static __inline int
320lvt_off_to_idx(uint32_t offset)
321{
322	int index;
323
324	switch (offset) {
325	case APIC_OFFSET_CMCI_LVT:
326		index = APIC_LVT_CMCI;
327		break;
328	case APIC_OFFSET_TIMER_LVT:
329		index = APIC_LVT_TIMER;
330		break;
331	case APIC_OFFSET_THERM_LVT:
332		index = APIC_LVT_THERMAL;
333		break;
334	case APIC_OFFSET_PERF_LVT:
335		index = APIC_LVT_PMC;
336		break;
337	case APIC_OFFSET_LINT0_LVT:
338		index = APIC_LVT_LINT0;
339		break;
340	case APIC_OFFSET_LINT1_LVT:
341		index = APIC_LVT_LINT1;
342		break;
343	case APIC_OFFSET_ERROR_LVT:
344		index = APIC_LVT_ERROR;
345		break;
346	default:
347		index = -1;
348		break;
349	}
350	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
351	    "invalid lvt index %d for offset %#x", index, offset));
352
353	return (index);
354}
355
356static __inline uint32_t
357vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
358{
359	int idx;
360	uint32_t val;
361
362	idx = lvt_off_to_idx(offset);
363	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
364	return (val);
365}
366
367void
368vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
369{
370	uint32_t *lvtptr, mask, val;
371	struct LAPIC *lapic;
372	int idx;
373
374	lapic = vlapic->apic_page;
375	lvtptr = vlapic_get_lvtptr(vlapic, offset);
376	val = *lvtptr;
377	idx = lvt_off_to_idx(offset);
378
379	if (!(lapic->svr & APIC_SVR_ENABLE))
380		val |= APIC_LVT_M;
381	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
382	switch (offset) {
383	case APIC_OFFSET_TIMER_LVT:
384		mask |= APIC_LVTT_TM;
385		break;
386	case APIC_OFFSET_ERROR_LVT:
387		break;
388	case APIC_OFFSET_LINT0_LVT:
389	case APIC_OFFSET_LINT1_LVT:
390		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
391		/* FALLTHROUGH */
392	default:
393		mask |= APIC_LVT_DM;
394		break;
395	}
396	val &= mask;
397	*lvtptr = val;
398	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
399}
400
401static void
402vlapic_mask_lvts(struct vlapic *vlapic)
403{
404	struct LAPIC *lapic = vlapic->apic_page;
405
406	lapic->lvt_cmci |= APIC_LVT_M;
407	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
408
409	lapic->lvt_timer |= APIC_LVT_M;
410	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
411
412	lapic->lvt_thermal |= APIC_LVT_M;
413	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
414
415	lapic->lvt_pcint |= APIC_LVT_M;
416	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
417
418	lapic->lvt_lint0 |= APIC_LVT_M;
419	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
420
421	lapic->lvt_lint1 |= APIC_LVT_M;
422	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
423
424	lapic->lvt_error |= APIC_LVT_M;
425	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
426}
427
428static int
429vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
430{
431	uint32_t vec, mode;
432
433	if (lvt & APIC_LVT_M)
434		return (0);
435
436	vec = lvt & APIC_LVT_VECTOR;
437	mode = lvt & APIC_LVT_DM;
438
439	switch (mode) {
440	case APIC_LVT_DM_FIXED:
441		if (vec < 16) {
442			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
443			return (0);
444		}
445		if (vlapic_set_intr_ready(vlapic, vec, false))
446			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
447		break;
448	case APIC_LVT_DM_NMI:
449		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
450		break;
451	case APIC_LVT_DM_EXTINT:
452		vm_inject_extint(vlapic->vm, vlapic->vcpuid);
453		break;
454	default:
455		// Other modes ignored
456		return (0);
457	}
458	return (1);
459}
460
461#if 1
462static void
463dump_isrvec_stk(struct vlapic *vlapic)
464{
465	int i;
466	uint32_t *isrptr;
467
468	isrptr = &vlapic->apic_page->isr0;
469	for (i = 0; i < 8; i++)
470		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
471
472	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
473		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
474}
475#endif
476
477/*
478 * Algorithm adopted from section "Interrupt, Task and Processor Priority"
479 * in Intel Architecture Manual Vol 3a.
480 */
481static void
482vlapic_update_ppr(struct vlapic *vlapic)
483{
484	int isrvec, tpr, ppr;
485
486	/*
487	 * Note that the value on the stack at index 0 is always 0.
488	 *
489	 * This is a placeholder for the value of ISRV when none of the
490	 * bits is set in the ISRx registers.
491	 */
492	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
493	tpr = vlapic->apic_page->tpr;
494
495#if 1
496	{
497		int i, lastprio, curprio, vector, idx;
498		uint32_t *isrptr;
499
500		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
501			panic("isrvec_stk is corrupted: %d", isrvec);
502
503		/*
504		 * Make sure that the priority of the nested interrupts is
505		 * always increasing.
506		 */
507		lastprio = -1;
508		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
509			curprio = PRIO(vlapic->isrvec_stk[i]);
510			if (curprio <= lastprio) {
511				dump_isrvec_stk(vlapic);
512				panic("isrvec_stk does not satisfy invariant");
513			}
514			lastprio = curprio;
515		}
516
517		/*
518		 * Make sure that each bit set in the ISRx registers has a
519		 * corresponding entry on the isrvec stack.
520		 */
521		i = 1;
522		isrptr = &vlapic->apic_page->isr0;
523		for (vector = 0; vector < 256; vector++) {
524			idx = (vector / 32) * 4;
525			if (isrptr[idx] & (1 << (vector % 32))) {
526				if (i > vlapic->isrvec_stk_top ||
527				    vlapic->isrvec_stk[i] != vector) {
528					dump_isrvec_stk(vlapic);
529					panic("ISR and isrvec_stk out of sync");
530				}
531				i++;
532			}
533		}
534	}
535#endif
536
537	if (PRIO(tpr) >= PRIO(isrvec))
538		ppr = tpr;
539	else
540		ppr = isrvec & 0xf0;
541
542	vlapic->apic_page->ppr = ppr;
543	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
544}
545
546static void
547vlapic_process_eoi(struct vlapic *vlapic)
548{
549	struct LAPIC	*lapic = vlapic->apic_page;
550	uint32_t	*isrptr, *tmrptr;
551	int		i, idx, bitpos, vector;
552
553	isrptr = &lapic->isr0;
554	tmrptr = &lapic->tmr0;
555
556	/*
557	 * The x86 architecture reserves the the first 32 vectors for use
558	 * by the processor.
559	 */
560	for (i = 7; i > 0; i--) {
561		idx = i * 4;
562		bitpos = fls(isrptr[idx]);
563		if (bitpos-- != 0) {
564			if (vlapic->isrvec_stk_top <= 0) {
565				panic("invalid vlapic isrvec_stk_top %d",
566				      vlapic->isrvec_stk_top);
567			}
568			isrptr[idx] &= ~(1 << bitpos);
569			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
570			vlapic->isrvec_stk_top--;
571			vlapic_update_ppr(vlapic);
572			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
573				vector = i * 32 + bitpos;
574				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
575				    vector);
576			}
577			return;
578		}
579	}
580}
581
582static __inline int
583vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
584{
585
586	return (lvt & mask);
587}
588
589static __inline int
590vlapic_periodic_timer(struct vlapic *vlapic)
591{
592	uint32_t lvt;
593
594	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
595
596	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
597}
598
599static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
600
601void
602vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
603{
604	uint32_t lvt;
605
606	vlapic->esr_pending |= mask;
607	if (vlapic->esr_firing)
608		return;
609	vlapic->esr_firing = 1;
610
611	// The error LVT always uses the fixed delivery mode.
612	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
613	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
614		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
615	}
616	vlapic->esr_firing = 0;
617}
618
619static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
620
621static void
622vlapic_fire_timer(struct vlapic *vlapic)
623{
624	uint32_t lvt;
625
626	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
627
628	// The timer LVT always uses the fixed delivery mode.
629	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
630	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
631		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
632	}
633}
634
635static VMM_STAT(VLAPIC_INTR_CMC,
636    "corrected machine check interrupts generated by vlapic");
637
638void
639vlapic_fire_cmci(struct vlapic *vlapic)
640{
641	uint32_t lvt;
642
643	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
644	if (vlapic_fire_lvt(vlapic, lvt)) {
645		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
646	}
647}
648
649static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
650    "lvts triggered");
651
652int
653vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
654{
655	uint32_t lvt;
656
657	if (vlapic_enabled(vlapic) == false) {
658		/*
659		 * When the local APIC is global/hardware disabled,
660		 * LINT[1:0] pins are configured as INTR and NMI pins,
661		 * respectively.
662		*/
663		switch (vector) {
664			case APIC_LVT_LINT0:
665				vm_inject_extint(vlapic->vm, vlapic->vcpuid);
666				break;
667			case APIC_LVT_LINT1:
668				vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
669				break;
670			default:
671				break;
672		}
673		return (0);
674	}
675
676	switch (vector) {
677	case APIC_LVT_LINT0:
678		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
679		break;
680	case APIC_LVT_LINT1:
681		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
682		break;
683	case APIC_LVT_TIMER:
684		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
685		lvt |= APIC_LVT_DM_FIXED;
686		break;
687	case APIC_LVT_ERROR:
688		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
689		lvt |= APIC_LVT_DM_FIXED;
690		break;
691	case APIC_LVT_PMC:
692		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
693		break;
694	case APIC_LVT_THERMAL:
695		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
696		break;
697	case APIC_LVT_CMCI:
698		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
699		break;
700	default:
701		return (EINVAL);
702	}
703	if (vlapic_fire_lvt(vlapic, lvt)) {
704		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
705		    LVTS_TRIGGERRED, vector, 1);
706	}
707	return (0);
708}
709
710static void
711vlapic_callout_handler(void *arg)
712{
713	struct vlapic *vlapic;
714	struct bintime bt, btnow;
715	sbintime_t rem_sbt;
716
717	vlapic = arg;
718
719	VLAPIC_TIMER_LOCK(vlapic);
720	if (callout_pending(&vlapic->callout))	/* callout was reset */
721		goto done;
722
723	if (!callout_active(&vlapic->callout))	/* callout was stopped */
724		goto done;
725
726	callout_deactivate(&vlapic->callout);
727
728	vlapic_fire_timer(vlapic);
729
730	if (vlapic_periodic_timer(vlapic)) {
731		binuptime(&btnow);
732		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
733		    ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
734		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
735		    vlapic->timer_fire_bt.frac));
736
737		/*
738		 * Compute the delta between when the timer was supposed to
739		 * fire and the present time.
740		 */
741		bt = btnow;
742		bintime_sub(&bt, &vlapic->timer_fire_bt);
743
744		rem_sbt = bttosbt(vlapic->timer_period_bt);
745		if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
746			/*
747			 * Adjust the time until the next countdown downward
748			 * to account for the lost time.
749			 */
750			rem_sbt -= bttosbt(bt);
751		} else {
752			/*
753			 * If the delta is greater than the timer period then
754			 * just reset our time base instead of trying to catch
755			 * up.
756			 */
757			vlapic->timer_fire_bt = btnow;
758			VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
759			    "usecs, period is %lu usecs - resetting time base",
760			    bttosbt(bt) / SBT_1US,
761			    bttosbt(vlapic->timer_period_bt) / SBT_1US);
762		}
763
764		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
765		callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
766		    vlapic_callout_handler, vlapic, 0);
767	}
768done:
769	VLAPIC_TIMER_UNLOCK(vlapic);
770}
771
772void
773vlapic_icrtmr_write_handler(struct vlapic *vlapic)
774{
775	struct LAPIC *lapic;
776	sbintime_t sbt;
777	uint32_t icr_timer;
778
779	VLAPIC_TIMER_LOCK(vlapic);
780
781	lapic = vlapic->apic_page;
782	icr_timer = lapic->icr_timer;
783
784	vlapic->timer_period_bt = vlapic->timer_freq_bt;
785	bintime_mul(&vlapic->timer_period_bt, icr_timer);
786
787	if (icr_timer != 0) {
788		binuptime(&vlapic->timer_fire_bt);
789		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
790
791		sbt = bttosbt(vlapic->timer_period_bt);
792		callout_reset_sbt(&vlapic->callout, sbt, 0,
793		    vlapic_callout_handler, vlapic, 0);
794	} else
795		callout_stop(&vlapic->callout);
796
797	VLAPIC_TIMER_UNLOCK(vlapic);
798}
799
800/*
801 * This function populates 'dmask' with the set of vcpus that match the
802 * addressing specified by the (dest, phys, lowprio) tuple.
803 *
804 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
805 * or xAPIC (8-bit) destination field.
806 */
807static void
808vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
809    bool lowprio, bool x2apic_dest)
810{
811	struct vlapic *vlapic;
812	uint32_t dfr, ldr, ldest, cluster;
813	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
814	cpuset_t amask;
815	int vcpuid;
816
817	if ((x2apic_dest && dest == 0xffffffff) ||
818	    (!x2apic_dest && dest == 0xff)) {
819		/*
820		 * Broadcast in both logical and physical modes.
821		 */
822		*dmask = vm_active_cpus(vm);
823		return;
824	}
825
826	if (phys) {
827		/*
828		 * Physical mode: destination is APIC ID.
829		 */
830		CPU_ZERO(dmask);
831		vcpuid = vm_apicid2vcpuid(vm, dest);
832		if (vcpuid < VM_MAXCPU)
833			CPU_SET(vcpuid, dmask);
834	} else {
835		/*
836		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
837		 * bitmask. This model is only avilable in the xAPIC mode.
838		 */
839		mda_flat_ldest = dest & 0xff;
840
841		/*
842		 * In the "Cluster Model" the MDA is used to identify a
843		 * specific cluster and a set of APICs in that cluster.
844		 */
845		if (x2apic_dest) {
846			mda_cluster_id = dest >> 16;
847			mda_cluster_ldest = dest & 0xffff;
848		} else {
849			mda_cluster_id = (dest >> 4) & 0xf;
850			mda_cluster_ldest = dest & 0xf;
851		}
852
853		/*
854		 * Logical mode: match each APIC that has a bit set
855		 * in it's LDR that matches a bit in the ldest.
856		 */
857		CPU_ZERO(dmask);
858		amask = vm_active_cpus(vm);
859		while ((vcpuid = CPU_FFS(&amask)) != 0) {
860			vcpuid--;
861			CPU_CLR(vcpuid, &amask);
862
863			vlapic = vm_lapic(vm, vcpuid);
864			dfr = vlapic->apic_page->dfr;
865			ldr = vlapic->apic_page->ldr;
866
867			if ((dfr & APIC_DFR_MODEL_MASK) ==
868			    APIC_DFR_MODEL_FLAT) {
869				ldest = ldr >> 24;
870				mda_ldest = mda_flat_ldest;
871			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
872			    APIC_DFR_MODEL_CLUSTER) {
873				if (x2apic(vlapic)) {
874					cluster = ldr >> 16;
875					ldest = ldr & 0xffff;
876				} else {
877					cluster = ldr >> 28;
878					ldest = (ldr >> 24) & 0xf;
879				}
880				if (cluster != mda_cluster_id)
881					continue;
882				mda_ldest = mda_cluster_ldest;
883			} else {
884				/*
885				 * Guest has configured a bad logical
886				 * model for this vcpu - skip it.
887				 */
888				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
889				    "model %x - cannot deliver interrupt", dfr);
890				continue;
891			}
892
893			if ((mda_ldest & ldest) != 0) {
894				CPU_SET(vcpuid, dmask);
895				if (lowprio)
896					break;
897			}
898		}
899	}
900}
901
902static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
903
904int
905vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
906{
907	int i;
908	bool phys;
909	cpuset_t dmask;
910	uint64_t icrval;
911	uint32_t dest, vec, mode;
912	struct vlapic *vlapic2;
913	struct vm_exit *vmexit;
914	struct LAPIC *lapic;
915
916	lapic = vlapic->apic_page;
917	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
918	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
919
920	if (x2apic(vlapic))
921		dest = icrval >> 32;
922	else
923		dest = icrval >> (32 + 24);
924	vec = icrval & APIC_VECTOR_MASK;
925	mode = icrval & APIC_DELMODE_MASK;
926
927	if (mode == APIC_DELMODE_FIXED && vec < 16) {
928		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
929		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
930		return (0);
931	}
932
933	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
934
935	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
936		switch (icrval & APIC_DEST_MASK) {
937		case APIC_DEST_DESTFLD:
938			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
939			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
940			    x2apic(vlapic));
941			break;
942		case APIC_DEST_SELF:
943			CPU_SETOF(vlapic->vcpuid, &dmask);
944			break;
945		case APIC_DEST_ALLISELF:
946			dmask = vm_active_cpus(vlapic->vm);
947			break;
948		case APIC_DEST_ALLESELF:
949			dmask = vm_active_cpus(vlapic->vm);
950			CPU_CLR(vlapic->vcpuid, &dmask);
951			break;
952		default:
953			CPU_ZERO(&dmask);	/* satisfy gcc */
954			break;
955		}
956
957		while ((i = CPU_FFS(&dmask)) != 0) {
958			i--;
959			CPU_CLR(i, &dmask);
960			if (mode == APIC_DELMODE_FIXED) {
961				lapic_intr_edge(vlapic->vm, i, vec);
962				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
963						    IPIS_SENT, i, 1);
964				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
965				    "to vcpuid %d", vec, i);
966			} else {
967				vm_inject_nmi(vlapic->vm, i);
968				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
969				    "to vcpuid %d", i);
970			}
971		}
972
973		return (0);	/* handled completely in the kernel */
974	}
975
976	if (mode == APIC_DELMODE_INIT) {
977		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
978			return (0);
979
980		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
981			vlapic2 = vm_lapic(vlapic->vm, dest);
982
983			/* move from INIT to waiting-for-SIPI state */
984			if (vlapic2->boot_state == BS_INIT) {
985				vlapic2->boot_state = BS_SIPI;
986			}
987
988			return (0);
989		}
990	}
991
992	if (mode == APIC_DELMODE_STARTUP) {
993		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
994			vlapic2 = vm_lapic(vlapic->vm, dest);
995
996			/*
997			 * Ignore SIPIs in any state other than wait-for-SIPI
998			 */
999			if (vlapic2->boot_state != BS_SIPI)
1000				return (0);
1001
1002			/*
1003			 * XXX this assumes that the startup IPI always succeeds
1004			 */
1005			vlapic2->boot_state = BS_RUNNING;
1006			vm_activate_cpu(vlapic2->vm, dest);
1007
1008			*retu = true;
1009			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
1010			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
1011			vmexit->u.spinup_ap.vcpu = dest;
1012			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
1013
1014			return (0);
1015		}
1016	}
1017
1018	/*
1019	 * This will cause a return to userland.
1020	 */
1021	return (1);
1022}
1023
1024void
1025vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val)
1026{
1027	int vec;
1028
1029	KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode"));
1030
1031	vec = val & 0xff;
1032	lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1033	vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT,
1034	    vlapic->vcpuid, 1);
1035	VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec);
1036}
1037
1038int
1039vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1040{
1041	struct LAPIC	*lapic = vlapic->apic_page;
1042	int	  	 idx, i, bitpos, vector;
1043	uint32_t	*irrptr, val;
1044
1045	if (vlapic->ops.pending_intr)
1046		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
1047
1048	irrptr = &lapic->irr0;
1049
1050	/*
1051	 * The x86 architecture reserves the the first 32 vectors for use
1052	 * by the processor.
1053	 */
1054	for (i = 7; i > 0; i--) {
1055		idx = i * 4;
1056		val = atomic_load_acq_int(&irrptr[idx]);
1057		bitpos = fls(val);
1058		if (bitpos != 0) {
1059			vector = i * 32 + (bitpos - 1);
1060			if (PRIO(vector) > PRIO(lapic->ppr)) {
1061				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
1062				if (vecptr != NULL)
1063					*vecptr = vector;
1064				return (1);
1065			} else
1066				break;
1067		}
1068	}
1069	return (0);
1070}
1071
1072void
1073vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1074{
1075	struct LAPIC	*lapic = vlapic->apic_page;
1076	uint32_t	*irrptr, *isrptr;
1077	int		idx, stk_top;
1078
1079	if (vlapic->ops.intr_accepted)
1080		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1081
1082	/*
1083	 * clear the ready bit for vector being accepted in irr
1084	 * and set the vector as in service in isr.
1085	 */
1086	idx = (vector / 32) * 4;
1087
1088	irrptr = &lapic->irr0;
1089	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1090	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
1091
1092	isrptr = &lapic->isr0;
1093	isrptr[idx] |= 1 << (vector % 32);
1094	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
1095
1096	/*
1097	 * Update the PPR
1098	 */
1099	vlapic->isrvec_stk_top++;
1100
1101	stk_top = vlapic->isrvec_stk_top;
1102	if (stk_top >= ISRVEC_STK_SIZE)
1103		panic("isrvec_stk_top overflow %d", stk_top);
1104
1105	vlapic->isrvec_stk[stk_top] = vector;
1106	vlapic_update_ppr(vlapic);
1107}
1108
1109void
1110vlapic_svr_write_handler(struct vlapic *vlapic)
1111{
1112	struct LAPIC *lapic;
1113	uint32_t old, new, changed;
1114
1115	lapic = vlapic->apic_page;
1116
1117	new = lapic->svr;
1118	old = vlapic->svr_last;
1119	vlapic->svr_last = new;
1120
1121	changed = old ^ new;
1122	if ((changed & APIC_SVR_ENABLE) != 0) {
1123		if ((new & APIC_SVR_ENABLE) == 0) {
1124			/*
1125			 * The apic is now disabled so stop the apic timer
1126			 * and mask all the LVT entries.
1127			 */
1128			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
1129			VLAPIC_TIMER_LOCK(vlapic);
1130			callout_stop(&vlapic->callout);
1131			VLAPIC_TIMER_UNLOCK(vlapic);
1132			vlapic_mask_lvts(vlapic);
1133		} else {
1134			/*
1135			 * The apic is now enabled so restart the apic timer
1136			 * if it is configured in periodic mode.
1137			 */
1138			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
1139			if (vlapic_periodic_timer(vlapic))
1140				vlapic_icrtmr_write_handler(vlapic);
1141		}
1142	}
1143}
1144
1145int
1146vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
1147    uint64_t *data, bool *retu)
1148{
1149	struct LAPIC	*lapic = vlapic->apic_page;
1150	uint32_t	*reg;
1151	int		 i;
1152
1153	/* Ignore MMIO accesses in x2APIC mode */
1154	if (x2apic(vlapic) && mmio_access) {
1155		VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode",
1156		    offset);
1157		*data = 0;
1158		goto done;
1159	}
1160
1161	if (!x2apic(vlapic) && !mmio_access) {
1162		/*
1163		 * XXX Generate GP fault for MSR accesses in xAPIC mode
1164		 */
1165		VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in "
1166		    "xAPIC mode", offset);
1167		*data = 0;
1168		goto done;
1169	}
1170
1171	if (offset > sizeof(*lapic)) {
1172		*data = 0;
1173		goto done;
1174	}
1175
1176	offset &= ~3;
1177	switch(offset)
1178	{
1179		case APIC_OFFSET_ID:
1180			*data = lapic->id;
1181			break;
1182		case APIC_OFFSET_VER:
1183			*data = lapic->version;
1184			break;
1185		case APIC_OFFSET_TPR:
1186			*data = lapic->tpr;
1187			break;
1188		case APIC_OFFSET_APR:
1189			*data = lapic->apr;
1190			break;
1191		case APIC_OFFSET_PPR:
1192			*data = lapic->ppr;
1193			break;
1194		case APIC_OFFSET_EOI:
1195			*data = lapic->eoi;
1196			break;
1197		case APIC_OFFSET_LDR:
1198			*data = lapic->ldr;
1199			break;
1200		case APIC_OFFSET_DFR:
1201			*data = lapic->dfr;
1202			break;
1203		case APIC_OFFSET_SVR:
1204			*data = lapic->svr;
1205			break;
1206		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1207			i = (offset - APIC_OFFSET_ISR0) >> 2;
1208			reg = &lapic->isr0;
1209			*data = *(reg + i);
1210			break;
1211		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1212			i = (offset - APIC_OFFSET_TMR0) >> 2;
1213			reg = &lapic->tmr0;
1214			*data = *(reg + i);
1215			break;
1216		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1217			i = (offset - APIC_OFFSET_IRR0) >> 2;
1218			reg = &lapic->irr0;
1219			*data = atomic_load_acq_int(reg + i);
1220			break;
1221		case APIC_OFFSET_ESR:
1222			*data = lapic->esr;
1223			break;
1224		case APIC_OFFSET_ICR_LOW:
1225			*data = lapic->icr_lo;
1226			if (x2apic(vlapic))
1227				*data |= (uint64_t)lapic->icr_hi << 32;
1228			break;
1229		case APIC_OFFSET_ICR_HI:
1230			*data = lapic->icr_hi;
1231			break;
1232		case APIC_OFFSET_CMCI_LVT:
1233		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1234			*data = vlapic_get_lvt(vlapic, offset);
1235#ifdef INVARIANTS
1236			reg = vlapic_get_lvtptr(vlapic, offset);
1237			KASSERT(*data == *reg, ("inconsistent lvt value at "
1238			    "offset %#lx: %#lx/%#x", offset, *data, *reg));
1239#endif
1240			break;
1241		case APIC_OFFSET_TIMER_ICR:
1242			*data = lapic->icr_timer;
1243			break;
1244		case APIC_OFFSET_TIMER_CCR:
1245			*data = vlapic_get_ccr(vlapic);
1246			break;
1247		case APIC_OFFSET_TIMER_DCR:
1248			*data = lapic->dcr_timer;
1249			break;
1250		case APIC_OFFSET_SELF_IPI:
1251			/*
1252			 * XXX generate a GP fault if vlapic is in x2apic mode
1253			 */
1254			*data = 0;
1255			break;
1256		case APIC_OFFSET_RRR:
1257		default:
1258			*data = 0;
1259			break;
1260	}
1261done:
1262	VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
1263	return 0;
1264}
1265
1266int
1267vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
1268    uint64_t data, bool *retu)
1269{
1270	struct LAPIC	*lapic = vlapic->apic_page;
1271	uint32_t	*regptr;
1272	int		retval;
1273
1274	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
1275	    ("vlapic_write: invalid offset %#lx", offset));
1276
1277	VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx",
1278	    offset, data);
1279
1280	if (offset > sizeof(*lapic))
1281		return (0);
1282
1283	/* Ignore MMIO accesses in x2APIC mode */
1284	if (x2apic(vlapic) && mmio_access) {
1285		VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx "
1286		    "in x2APIC mode", data, offset);
1287		return (0);
1288	}
1289
1290	/*
1291	 * XXX Generate GP fault for MSR accesses in xAPIC mode
1292	 */
1293	if (!x2apic(vlapic) && !mmio_access) {
1294		VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx "
1295		    "in xAPIC mode", data, offset);
1296		return (0);
1297	}
1298
1299	retval = 0;
1300	switch(offset)
1301	{
1302		case APIC_OFFSET_ID:
1303			lapic->id = data;
1304			vlapic_id_write_handler(vlapic);
1305			break;
1306		case APIC_OFFSET_TPR:
1307			lapic->tpr = data & 0xff;
1308			vlapic_update_ppr(vlapic);
1309			break;
1310		case APIC_OFFSET_EOI:
1311			vlapic_process_eoi(vlapic);
1312			break;
1313		case APIC_OFFSET_LDR:
1314			lapic->ldr = data;
1315			vlapic_ldr_write_handler(vlapic);
1316			break;
1317		case APIC_OFFSET_DFR:
1318			lapic->dfr = data;
1319			vlapic_dfr_write_handler(vlapic);
1320			break;
1321		case APIC_OFFSET_SVR:
1322			lapic->svr = data;
1323			vlapic_svr_write_handler(vlapic);
1324			break;
1325		case APIC_OFFSET_ICR_LOW:
1326			lapic->icr_lo = data;
1327			if (x2apic(vlapic))
1328				lapic->icr_hi = data >> 32;
1329			retval = vlapic_icrlo_write_handler(vlapic, retu);
1330			break;
1331		case APIC_OFFSET_ICR_HI:
1332			lapic->icr_hi = data;
1333			break;
1334		case APIC_OFFSET_CMCI_LVT:
1335		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1336			regptr = vlapic_get_lvtptr(vlapic, offset);
1337			*regptr = data;
1338			vlapic_lvt_write_handler(vlapic, offset);
1339			break;
1340		case APIC_OFFSET_TIMER_ICR:
1341			lapic->icr_timer = data;
1342			vlapic_icrtmr_write_handler(vlapic);
1343			break;
1344
1345		case APIC_OFFSET_TIMER_DCR:
1346			lapic->dcr_timer = data;
1347			vlapic_dcr_write_handler(vlapic);
1348			break;
1349
1350		case APIC_OFFSET_ESR:
1351			vlapic_esr_write_handler(vlapic);
1352			break;
1353
1354		case APIC_OFFSET_SELF_IPI:
1355			if (x2apic(vlapic))
1356				vlapic_self_ipi_handler(vlapic, data);
1357			break;
1358
1359		case APIC_OFFSET_VER:
1360		case APIC_OFFSET_APR:
1361		case APIC_OFFSET_PPR:
1362		case APIC_OFFSET_RRR:
1363		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1364		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1365		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1366		case APIC_OFFSET_TIMER_CCR:
1367		default:
1368			// Read only.
1369			break;
1370	}
1371
1372	return (retval);
1373}
1374
1375static void
1376vlapic_reset(struct vlapic *vlapic)
1377{
1378	struct LAPIC *lapic;
1379
1380	lapic = vlapic->apic_page;
1381	bzero(lapic, sizeof(struct LAPIC));
1382
1383	lapic->id = vlapic_get_id(vlapic);
1384	lapic->version = VLAPIC_VERSION;
1385	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1386	lapic->dfr = 0xffffffff;
1387	lapic->svr = APIC_SVR_VECTOR;
1388	vlapic_mask_lvts(vlapic);
1389	vlapic_reset_tmr(vlapic);
1390
1391	lapic->dcr_timer = 0;
1392	vlapic_dcr_write_handler(vlapic);
1393
1394	if (vlapic->vcpuid == 0)
1395		vlapic->boot_state = BS_RUNNING;	/* BSP */
1396	else
1397		vlapic->boot_state = BS_INIT;		/* AP */
1398
1399	vlapic->svr_last = lapic->svr;
1400}
1401
1402void
1403vlapic_init(struct vlapic *vlapic)
1404{
1405	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1406	KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
1407	    ("vlapic_init: vcpuid is not initialized"));
1408	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1409	    "initialized"));
1410
1411	/*
1412	 * If the vlapic is configured in x2apic mode then it will be
1413	 * accessed in the critical section via the MSR emulation code.
1414	 *
1415	 * Therefore the timer mutex must be a spinlock because blockable
1416	 * mutexes cannot be acquired in a critical section.
1417	 */
1418	mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
1419	callout_init(&vlapic->callout, 1);
1420
1421	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1422
1423	if (vlapic->vcpuid == 0)
1424		vlapic->msr_apicbase |= APICBASE_BSP;
1425
1426	vlapic_reset(vlapic);
1427}
1428
1429void
1430vlapic_cleanup(struct vlapic *vlapic)
1431{
1432
1433	callout_drain(&vlapic->callout);
1434}
1435
1436uint64_t
1437vlapic_get_apicbase(struct vlapic *vlapic)
1438{
1439
1440	return (vlapic->msr_apicbase);
1441}
1442
1443int
1444vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
1445{
1446
1447	if (vlapic->msr_apicbase != new) {
1448		VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx "
1449		    "not supported", vlapic->msr_apicbase, new);
1450		return (-1);
1451	}
1452
1453	return (0);
1454}
1455
1456void
1457vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1458{
1459	struct vlapic *vlapic;
1460	struct LAPIC *lapic;
1461
1462	vlapic = vm_lapic(vm, vcpuid);
1463
1464	if (state == X2APIC_DISABLED)
1465		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1466	else
1467		vlapic->msr_apicbase |= APICBASE_X2APIC;
1468
1469	/*
1470	 * Reset the local APIC registers whose values are mode-dependent.
1471	 *
1472	 * XXX this works because the APIC mode can be changed only at vcpu
1473	 * initialization time.
1474	 */
1475	lapic = vlapic->apic_page;
1476	lapic->id = vlapic_get_id(vlapic);
1477	if (x2apic(vlapic)) {
1478		lapic->ldr = x2apic_ldr(vlapic);
1479		lapic->dfr = 0;
1480	} else {
1481		lapic->ldr = 0;
1482		lapic->dfr = 0xffffffff;
1483	}
1484
1485	if (state == X2APIC_ENABLED) {
1486		if (vlapic->ops.enable_x2apic_mode)
1487			(*vlapic->ops.enable_x2apic_mode)(vlapic);
1488	}
1489}
1490
1491void
1492vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1493    int delmode, int vec)
1494{
1495	bool lowprio;
1496	int vcpuid;
1497	cpuset_t dmask;
1498
1499	if (delmode != IOART_DELFIXED &&
1500	    delmode != IOART_DELLOPRI &&
1501	    delmode != IOART_DELEXINT) {
1502		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
1503		return;
1504	}
1505	lowprio = (delmode == IOART_DELLOPRI);
1506
1507	/*
1508	 * We don't provide any virtual interrupt redirection hardware so
1509	 * all interrupts originating from the ioapic or MSI specify the
1510	 * 'dest' in the legacy xAPIC format.
1511	 */
1512	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1513
1514	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1515		vcpuid--;
1516		CPU_CLR(vcpuid, &dmask);
1517		if (delmode == IOART_DELEXINT) {
1518			vm_inject_extint(vm, vcpuid);
1519		} else {
1520			lapic_set_intr(vm, vcpuid, vec, level);
1521		}
1522	}
1523}
1524
1525void
1526vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
1527{
1528	/*
1529	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1530	 *
1531	 * This is done by leveraging features like Posted Interrupts (Intel)
1532	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1533	 *
1534	 * If neither of these features are available then fallback to
1535	 * sending an IPI to 'hostcpu'.
1536	 */
1537	if (vlapic->ops.post_intr)
1538		(*vlapic->ops.post_intr)(vlapic, hostcpu);
1539	else
1540		ipi_cpu(hostcpu, ipinum);
1541}
1542
1543bool
1544vlapic_enabled(struct vlapic *vlapic)
1545{
1546	struct LAPIC *lapic = vlapic->apic_page;
1547
1548	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
1549	    (lapic->svr & APIC_SVR_ENABLE) != 0)
1550		return (true);
1551	else
1552		return (false);
1553}
1554
1555static void
1556vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
1557{
1558	struct LAPIC *lapic;
1559	uint32_t *tmrptr, mask;
1560	int idx;
1561
1562	lapic = vlapic->apic_page;
1563	tmrptr = &lapic->tmr0;
1564	idx = (vector / 32) * 4;
1565	mask = 1 << (vector % 32);
1566	if (level)
1567		tmrptr[idx] |= mask;
1568	else
1569		tmrptr[idx] &= ~mask;
1570
1571	if (vlapic->ops.set_tmr != NULL)
1572		(*vlapic->ops.set_tmr)(vlapic, vector, level);
1573}
1574
1575void
1576vlapic_reset_tmr(struct vlapic *vlapic)
1577{
1578	int vector;
1579
1580	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
1581
1582	for (vector = 0; vector <= 255; vector++)
1583		vlapic_set_tmr(vlapic, vector, false);
1584}
1585
1586void
1587vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
1588    int delmode, int vector)
1589{
1590	cpuset_t dmask;
1591	bool lowprio;
1592
1593	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
1594
1595	/*
1596	 * A level trigger is valid only for fixed and lowprio delivery modes.
1597	 */
1598	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
1599		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
1600		    "delivery-mode %d", delmode);
1601		return;
1602	}
1603
1604	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
1605	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
1606
1607	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
1608		return;
1609
1610	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
1611	vlapic_set_tmr(vlapic, vector, true);
1612}
1613