1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/types.h>
30#include <sys/ioctl.h>
31
32#include <machine/specialreg.h>
33#include <machine/vmm.h>
34#include <machine/vmm_dev.h>
35#include <machine/vmm_snapshot.h>
36
37#include <string.h>
38
39#include "vmmapi.h"
40#include "internal.h"
41
42const char *vm_capstrmap[] = {
43	[VM_CAP_HALT_EXIT]  = "hlt_exit",
44	[VM_CAP_MTRAP_EXIT] = "mtrap_exit",
45	[VM_CAP_PAUSE_EXIT] = "pause_exit",
46	[VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest",
47	[VM_CAP_ENABLE_INVPCID] = "enable_invpcid",
48	[VM_CAP_BPT_EXIT] = "bpt_exit",
49	[VM_CAP_RDPID] = "rdpid",
50	[VM_CAP_RDTSCP] = "rdtscp",
51	[VM_CAP_IPI_EXIT] = "ipi_exit",
52	[VM_CAP_MASK_HWINTR] = "mask_hwintr",
53	[VM_CAP_RFLAGS_TF] = "rflags_tf",
54	[VM_CAP_MAX] = NULL,
55};
56
57#define	VM_MD_IOCTLS			\
58	VM_SET_SEGMENT_DESCRIPTOR,	\
59	VM_GET_SEGMENT_DESCRIPTOR,	\
60	VM_SET_KERNEMU_DEV,		\
61	VM_GET_KERNEMU_DEV,		\
62	VM_LAPIC_IRQ,			\
63	VM_LAPIC_LOCAL_IRQ,		\
64	VM_LAPIC_MSI,			\
65	VM_IOAPIC_ASSERT_IRQ,		\
66	VM_IOAPIC_DEASSERT_IRQ,		\
67	VM_IOAPIC_PULSE_IRQ,		\
68	VM_IOAPIC_PINCOUNT,		\
69	VM_ISA_ASSERT_IRQ,		\
70	VM_ISA_DEASSERT_IRQ,		\
71	VM_ISA_PULSE_IRQ,		\
72	VM_ISA_SET_IRQ_TRIGGER,		\
73	VM_INJECT_NMI,			\
74	VM_SET_X2APIC_STATE,		\
75	VM_GET_X2APIC_STATE,		\
76	VM_GET_HPET_CAPABILITIES,	\
77	VM_RTC_WRITE,			\
78	VM_RTC_READ,			\
79	VM_RTC_SETTIME,			\
80	VM_RTC_GETTIME,			\
81	VM_GET_GPA_PMAP,		\
82	VM_GLA2GPA,			\
83	VM_SET_INTINFO,			\
84	VM_GET_INTINFO,			\
85	VM_RESTART_INSTRUCTION,		\
86	VM_SNAPSHOT_REQ,		\
87	VM_RESTORE_TIME
88
89const cap_ioctl_t vm_ioctl_cmds[] = {
90	VM_COMMON_IOCTLS,
91	VM_PPT_IOCTLS,
92	VM_MD_IOCTLS,
93};
94size_t vm_ioctl_ncmds = nitems(vm_ioctl_cmds);
95
96int
97vm_set_desc(struct vcpu *vcpu, int reg,
98	    uint64_t base, uint32_t limit, uint32_t access)
99{
100	int error;
101	struct vm_seg_desc vmsegdesc;
102
103	bzero(&vmsegdesc, sizeof(vmsegdesc));
104	vmsegdesc.regnum = reg;
105	vmsegdesc.desc.base = base;
106	vmsegdesc.desc.limit = limit;
107	vmsegdesc.desc.access = access;
108
109	error = vcpu_ioctl(vcpu, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
110	return (error);
111}
112
113int
114vm_get_desc(struct vcpu *vcpu, int reg, uint64_t *base, uint32_t *limit,
115    uint32_t *access)
116{
117	int error;
118	struct vm_seg_desc vmsegdesc;
119
120	bzero(&vmsegdesc, sizeof(vmsegdesc));
121	vmsegdesc.regnum = reg;
122
123	error = vcpu_ioctl(vcpu, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
124	if (error == 0) {
125		*base = vmsegdesc.desc.base;
126		*limit = vmsegdesc.desc.limit;
127		*access = vmsegdesc.desc.access;
128	}
129	return (error);
130}
131
132int
133vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *seg_desc)
134{
135	int error;
136
137	error = vm_get_desc(vcpu, reg, &seg_desc->base, &seg_desc->limit,
138	    &seg_desc->access);
139	return (error);
140}
141
142int
143vm_lapic_irq(struct vcpu *vcpu, int vector)
144{
145	struct vm_lapic_irq vmirq;
146
147	bzero(&vmirq, sizeof(vmirq));
148	vmirq.vector = vector;
149
150	return (vcpu_ioctl(vcpu, VM_LAPIC_IRQ, &vmirq));
151}
152
153int
154vm_lapic_local_irq(struct vcpu *vcpu, int vector)
155{
156	struct vm_lapic_irq vmirq;
157
158	bzero(&vmirq, sizeof(vmirq));
159	vmirq.vector = vector;
160
161	return (vcpu_ioctl(vcpu, VM_LAPIC_LOCAL_IRQ, &vmirq));
162}
163
164int
165vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
166{
167	struct vm_lapic_msi vmmsi;
168
169	bzero(&vmmsi, sizeof(vmmsi));
170	vmmsi.addr = addr;
171	vmmsi.msg = msg;
172
173	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
174}
175
176int
177vm_raise_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg,
178    int bus __unused, int slot __unused, int func __unused)
179{
180	return (vm_lapic_msi(ctx, addr, msg));
181}
182
183int
184vm_apicid2vcpu(struct vmctx *ctx __unused, int apicid)
185{
186	/*
187	 * The apic id associated with the 'vcpu' has the same numerical value
188	 * as the 'vcpu' itself.
189	 */
190	return (apicid);
191}
192
193int
194vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
195{
196	struct vm_ioapic_irq ioapic_irq;
197
198	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
199	ioapic_irq.irq = irq;
200
201	return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
202}
203
204int
205vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
206{
207	struct vm_ioapic_irq ioapic_irq;
208
209	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
210	ioapic_irq.irq = irq;
211
212	return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
213}
214
215int
216vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
217{
218	struct vm_ioapic_irq ioapic_irq;
219
220	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
221	ioapic_irq.irq = irq;
222
223	return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
224}
225
226int
227vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
228{
229
230	return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
231}
232
233int
234vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
235{
236	struct vm_isa_irq isa_irq;
237
238	bzero(&isa_irq, sizeof(struct vm_isa_irq));
239	isa_irq.atpic_irq = atpic_irq;
240	isa_irq.ioapic_irq = ioapic_irq;
241
242	return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
243}
244
245int
246vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
247{
248	struct vm_isa_irq isa_irq;
249
250	bzero(&isa_irq, sizeof(struct vm_isa_irq));
251	isa_irq.atpic_irq = atpic_irq;
252	isa_irq.ioapic_irq = ioapic_irq;
253
254	return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
255}
256
257int
258vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
259{
260	struct vm_isa_irq isa_irq;
261
262	bzero(&isa_irq, sizeof(struct vm_isa_irq));
263	isa_irq.atpic_irq = atpic_irq;
264	isa_irq.ioapic_irq = ioapic_irq;
265
266	return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
267}
268
269int
270vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
271    enum vm_intr_trigger trigger)
272{
273	struct vm_isa_irq_trigger isa_irq_trigger;
274
275	bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
276	isa_irq_trigger.atpic_irq = atpic_irq;
277	isa_irq_trigger.trigger = trigger;
278
279	return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
280}
281
282int
283vm_inject_nmi(struct vcpu *vcpu)
284{
285	struct vm_nmi vmnmi;
286
287	bzero(&vmnmi, sizeof(vmnmi));
288
289	return (vcpu_ioctl(vcpu, VM_INJECT_NMI, &vmnmi));
290}
291
292int
293vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid,
294    uint32_t errcode, int restart_instruction)
295{
296	struct vm_exception exc;
297
298	exc.vector = vector;
299	exc.error_code = errcode;
300	exc.error_code_valid = errcode_valid;
301	exc.restart_instruction = restart_instruction;
302
303	return (vcpu_ioctl(vcpu, VM_INJECT_EXCEPTION, &exc));
304}
305
306int
307vm_readwrite_kernemu_device(struct vcpu *vcpu, vm_paddr_t gpa,
308    bool write, int size, uint64_t *value)
309{
310	struct vm_readwrite_kernemu_device irp = {
311		.access_width = fls(size) - 1,
312		.gpa = gpa,
313		.value = write ? *value : ~0ul,
314	};
315	long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV);
316	int rc;
317
318	rc = vcpu_ioctl(vcpu, cmd, &irp);
319	if (rc == 0 && !write)
320		*value = irp.value;
321	return (rc);
322}
323
324int
325vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state)
326{
327	int error;
328	struct vm_x2apic x2apic;
329
330	bzero(&x2apic, sizeof(x2apic));
331
332	error = vcpu_ioctl(vcpu, VM_GET_X2APIC_STATE, &x2apic);
333	*state = x2apic.state;
334	return (error);
335}
336
337int
338vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state)
339{
340	int error;
341	struct vm_x2apic x2apic;
342
343	bzero(&x2apic, sizeof(x2apic));
344	x2apic.state = state;
345
346	error = vcpu_ioctl(vcpu, VM_SET_X2APIC_STATE, &x2apic);
347
348	return (error);
349}
350
351int
352vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
353{
354	int error;
355	struct vm_hpet_cap cap;
356
357	bzero(&cap, sizeof(struct vm_hpet_cap));
358	error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
359	if (capabilities != NULL)
360		*capabilities = cap.capabilities;
361	return (error);
362}
363
364int
365vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
366{
367	struct vm_rtc_data rtcdata;
368	int error;
369
370	bzero(&rtcdata, sizeof(struct vm_rtc_data));
371	rtcdata.offset = offset;
372	rtcdata.value = value;
373	error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
374	return (error);
375}
376
377int
378vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
379{
380	struct vm_rtc_data rtcdata;
381	int error;
382
383	bzero(&rtcdata, sizeof(struct vm_rtc_data));
384	rtcdata.offset = offset;
385	error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
386	if (error == 0)
387		*retval = rtcdata.value;
388	return (error);
389}
390
391int
392vm_rtc_settime(struct vmctx *ctx, time_t secs)
393{
394	struct vm_rtc_time rtctime;
395	int error;
396
397	bzero(&rtctime, sizeof(struct vm_rtc_time));
398	rtctime.secs = secs;
399	error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
400	return (error);
401}
402
403int
404vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
405{
406	struct vm_rtc_time rtctime;
407	int error;
408
409	bzero(&rtctime, sizeof(struct vm_rtc_time));
410	error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
411	if (error == 0)
412		*secs = rtctime.secs;
413	return (error);
414}
415
416/*
417 * From Intel Vol 3a:
418 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
419 */
420int
421vcpu_reset(struct vcpu *vcpu)
422{
423	int error;
424	uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
425	uint32_t desc_access, desc_limit;
426	uint16_t sel;
427
428	zero = 0;
429
430	rflags = 0x2;
431	error = vm_set_register(vcpu, VM_REG_GUEST_RFLAGS, rflags);
432	if (error)
433		goto done;
434
435	rip = 0xfff0;
436	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RIP, rip)) != 0)
437		goto done;
438
439	/*
440	 * According to Intels Software Developer Manual CR0 should be
441	 * initialized with CR0_ET | CR0_NW | CR0_CD but that crashes some
442	 * guests like Windows.
443	 */
444	cr0 = CR0_NE;
445	if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
446		goto done;
447
448	if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR2, zero)) != 0)
449		goto done;
450
451	if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR3, zero)) != 0)
452		goto done;
453
454	cr4 = 0;
455	if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
456		goto done;
457
458	/*
459	 * CS: present, r/w, accessed, 16-bit, byte granularity, usable
460	 */
461	desc_base = 0xffff0000;
462	desc_limit = 0xffff;
463	desc_access = 0x0093;
464	error = vm_set_desc(vcpu, VM_REG_GUEST_CS,
465			    desc_base, desc_limit, desc_access);
466	if (error)
467		goto done;
468
469	sel = 0xf000;
470	if ((error = vm_set_register(vcpu, VM_REG_GUEST_CS, sel)) != 0)
471		goto done;
472
473	/*
474	 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
475	 */
476	desc_base = 0;
477	desc_limit = 0xffff;
478	desc_access = 0x0093;
479	error = vm_set_desc(vcpu, VM_REG_GUEST_SS,
480			    desc_base, desc_limit, desc_access);
481	if (error)
482		goto done;
483
484	error = vm_set_desc(vcpu, VM_REG_GUEST_DS,
485			    desc_base, desc_limit, desc_access);
486	if (error)
487		goto done;
488
489	error = vm_set_desc(vcpu, VM_REG_GUEST_ES,
490			    desc_base, desc_limit, desc_access);
491	if (error)
492		goto done;
493
494	error = vm_set_desc(vcpu, VM_REG_GUEST_FS,
495			    desc_base, desc_limit, desc_access);
496	if (error)
497		goto done;
498
499	error = vm_set_desc(vcpu, VM_REG_GUEST_GS,
500			    desc_base, desc_limit, desc_access);
501	if (error)
502		goto done;
503
504	sel = 0;
505	if ((error = vm_set_register(vcpu, VM_REG_GUEST_SS, sel)) != 0)
506		goto done;
507	if ((error = vm_set_register(vcpu, VM_REG_GUEST_DS, sel)) != 0)
508		goto done;
509	if ((error = vm_set_register(vcpu, VM_REG_GUEST_ES, sel)) != 0)
510		goto done;
511	if ((error = vm_set_register(vcpu, VM_REG_GUEST_FS, sel)) != 0)
512		goto done;
513	if ((error = vm_set_register(vcpu, VM_REG_GUEST_GS, sel)) != 0)
514		goto done;
515
516	if ((error = vm_set_register(vcpu, VM_REG_GUEST_EFER, zero)) != 0)
517		goto done;
518
519	/* General purpose registers */
520	rdx = 0xf00;
521	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RAX, zero)) != 0)
522		goto done;
523	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBX, zero)) != 0)
524		goto done;
525	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RCX, zero)) != 0)
526		goto done;
527	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
528		goto done;
529	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSI, zero)) != 0)
530		goto done;
531	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDI, zero)) != 0)
532		goto done;
533	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBP, zero)) != 0)
534		goto done;
535	if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSP, zero)) != 0)
536		goto done;
537	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R8, zero)) != 0)
538		goto done;
539	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R9, zero)) != 0)
540		goto done;
541	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R10, zero)) != 0)
542		goto done;
543	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R11, zero)) != 0)
544		goto done;
545	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R12, zero)) != 0)
546		goto done;
547	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R13, zero)) != 0)
548		goto done;
549	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R14, zero)) != 0)
550		goto done;
551	if ((error = vm_set_register(vcpu, VM_REG_GUEST_R15, zero)) != 0)
552		goto done;
553
554	/* GDTR, IDTR */
555	desc_base = 0;
556	desc_limit = 0xffff;
557	desc_access = 0;
558	error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR,
559			    desc_base, desc_limit, desc_access);
560	if (error != 0)
561		goto done;
562
563	error = vm_set_desc(vcpu, VM_REG_GUEST_IDTR,
564			    desc_base, desc_limit, desc_access);
565	if (error != 0)
566		goto done;
567
568	/* TR */
569	desc_base = 0;
570	desc_limit = 0xffff;
571	desc_access = 0x0000008b;
572	error = vm_set_desc(vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
573	if (error)
574		goto done;
575
576	sel = 0;
577	if ((error = vm_set_register(vcpu, VM_REG_GUEST_TR, sel)) != 0)
578		goto done;
579
580	/* LDTR */
581	desc_base = 0;
582	desc_limit = 0xffff;
583	desc_access = 0x00000082;
584	error = vm_set_desc(vcpu, VM_REG_GUEST_LDTR, desc_base,
585			    desc_limit, desc_access);
586	if (error)
587		goto done;
588
589	sel = 0;
590	if ((error = vm_set_register(vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
591		goto done;
592
593	if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR6,
594		 0xffff0ff0)) != 0)
595		goto done;
596	if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR7, 0x400)) !=
597	    0)
598		goto done;
599
600	if ((error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW,
601		 zero)) != 0)
602		goto done;
603
604	error = 0;
605done:
606	return (error);
607}
608