hyperv.c revision 302167
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * Implements low-level interactions with Hypver-V/Azure
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hyperv.c 302167 2016-06-24 02:06:13Z sephe $");
34
35#include <sys/param.h>
36#include <sys/kernel.h>
37#include <sys/malloc.h>
38#include <sys/pcpu.h>
39#include <sys/timetc.h>
40#include <machine/bus.h>
41#include <machine/md_var.h>
42#include <vm/vm.h>
43#include <vm/vm_param.h>
44#include <vm/pmap.h>
45
46#include <dev/hyperv/include/hyperv_busdma.h>
47#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
48#include <dev/hyperv/vmbus/hyperv_reg.h>
49#include <dev/hyperv/vmbus/hyperv_var.h>
50#include <dev/hyperv/vmbus/vmbus_var.h>
51
52#define HYPERV_FREEBSD_BUILD		0ULL
53#define HYPERV_FREEBSD_VERSION		((uint64_t)__FreeBSD_version)
54#define HYPERV_FREEBSD_OSID		0ULL
55
56#define MSR_HV_GUESTID_BUILD_FREEBSD	\
57	(HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK)
58#define MSR_HV_GUESTID_VERSION_FREEBSD	\
59	((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \
60	 MSR_HV_GUESTID_VERSION_MASK)
61#define MSR_HV_GUESTID_OSID_FREEBSD	\
62	((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \
63	 MSR_HV_GUESTID_OSID_MASK)
64
65#define MSR_HV_GUESTID_FREEBSD		\
66	(MSR_HV_GUESTID_BUILD_FREEBSD |	\
67	 MSR_HV_GUESTID_VERSION_FREEBSD | \
68	 MSR_HV_GUESTID_OSID_FREEBSD |	\
69	 MSR_HV_GUESTID_OSTYPE_FREEBSD)
70
71struct hypercall_ctx {
72	void			*hc_addr;
73	struct hyperv_dma	hc_dma;
74};
75
76static u_int	hyperv_get_timecount(struct timecounter *tc);
77
78u_int		hyperv_features;
79u_int		hyperv_recommends;
80
81static u_int	hyperv_pm_features;
82static u_int	hyperv_features3;
83
84static struct timecounter	hyperv_timecounter = {
85	.tc_get_timecount	= hyperv_get_timecount,
86	.tc_poll_pps		= NULL,
87	.tc_counter_mask	= 0xffffffff,
88	.tc_frequency		= HYPERV_TIMER_FREQ,
89	.tc_name		= "Hyper-V",
90	.tc_quality		= 2000,
91	.tc_flags		= 0,
92	.tc_priv		= NULL
93};
94
95static struct hypercall_ctx	hypercall_context;
96
97static u_int
98hyperv_get_timecount(struct timecounter *tc __unused)
99{
100	return rdmsr(MSR_HV_TIME_REF_COUNT);
101}
102
103/**
104 * @brief Invoke the specified hypercall
105 */
106static uint64_t
107hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
108{
109#ifdef __x86_64__
110	uint64_t hv_status = 0;
111	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
112	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
113	volatile void *hypercall_page = hypercall_context.hc_addr;
114
115	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
116	__asm__ __volatile__ ("call *%3" : "=a"(hv_status):
117				"c" (control), "d" (input_address),
118				"m" (hypercall_page));
119	return (hv_status);
120#else
121	uint32_t control_high = control >> 32;
122	uint32_t control_low = control & 0xFFFFFFFF;
123	uint32_t hv_status_high = 1;
124	uint32_t hv_status_low = 1;
125	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
126	uint32_t input_address_high = input_address >> 32;
127	uint32_t input_address_low = input_address & 0xFFFFFFFF;
128	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
129	uint32_t output_address_high = output_address >> 32;
130	uint32_t output_address_low = output_address & 0xFFFFFFFF;
131	volatile void *hypercall_page = hypercall_context.hc_addr;
132
133	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
134				"=a"(hv_status_low) : "d" (control_high),
135				"a" (control_low), "b" (input_address_high),
136				"c" (input_address_low),
137				"D"(output_address_high),
138				"S"(output_address_low), "m" (hypercall_page));
139	return (hv_status_low | ((uint64_t)hv_status_high << 32));
140#endif /* __x86_64__ */
141}
142
143/**
144 * @brief Post a message using the hypervisor message IPC.
145 * (This involves a hypercall.)
146 */
147hv_vmbus_status
148hv_vmbus_post_msg_via_msg_ipc(
149	hv_vmbus_connection_id	connection_id,
150	hv_vmbus_msg_type	message_type,
151	void*			payload,
152	size_t			payload_size)
153{
154	struct alignedinput {
155	    uint64_t alignment8;
156	    hv_vmbus_input_post_message msg;
157	};
158
159	hv_vmbus_input_post_message*	aligned_msg;
160	hv_vmbus_status 		status;
161	size_t				addr;
162
163	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
164	    return (EMSGSIZE);
165
166	addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
167			    M_ZERO | M_NOWAIT);
168	KASSERT(addr != 0,
169	    ("Error VMBUS: malloc failed to allocate message buffer!"));
170	if (addr == 0)
171	    return (ENOMEM);
172
173	aligned_msg = (hv_vmbus_input_post_message*)
174	    (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
175
176	aligned_msg->connection_id = connection_id;
177	aligned_msg->message_type = message_type;
178	aligned_msg->payload_size = payload_size;
179	memcpy((void*) aligned_msg->payload, payload, payload_size);
180
181	status = hv_vmbus_do_hypercall(
182		    HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
183
184	free((void *) addr, M_DEVBUF);
185	return (status);
186}
187
188/**
189 * @brief Signal an event on the specified connection using the hypervisor
190 * event IPC. (This involves a hypercall.)
191 */
192hv_vmbus_status
193hv_vmbus_signal_event(void *con_id)
194{
195	hv_vmbus_status status;
196
197	status = hv_vmbus_do_hypercall(
198		    HV_CALL_SIGNAL_EVENT,
199		    con_id,
200		    0) & 0xFFFF;
201
202	return (status);
203}
204
205int
206hyperv_guid2str(const struct hv_guid *guid, char *buf, size_t sz)
207{
208	const uint8_t *d = guid->data;
209
210	return snprintf(buf, sz, "%02x%02x%02x%02x-"
211	    "%02x%02x-%02x%02x-%02x%02x-"
212	    "%02x%02x%02x%02x%02x%02x",
213	    d[3], d[2], d[1], d[0],
214	    d[5], d[4], d[7], d[6], d[8], d[9],
215	    d[10], d[11], d[12], d[13], d[14], d[15]);
216}
217
218static bool
219hyperv_identify(void)
220{
221	u_int regs[4];
222	unsigned int maxleaf;
223
224	if (vm_guest != VM_GUEST_HV)
225		return (false);
226
227	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
228	maxleaf = regs[0];
229	if (maxleaf < CPUID_LEAF_HV_LIMITS)
230		return (false);
231
232	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
233	if (regs[0] != CPUID_HV_IFACE_HYPERV)
234		return (false);
235
236	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
237	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) {
238		/*
239		 * Hyper-V w/o Hypercall is impossible; someone
240		 * is faking Hyper-V.
241		 */
242		return (false);
243	}
244	hyperv_features = regs[0];
245	hyperv_pm_features = regs[2];
246	hyperv_features3 = regs[3];
247
248	do_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
249	printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
250	    regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]);
251
252	printf("  Features=0x%b\n", hyperv_features,
253	    "\020"
254	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
255	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
256	    "\003SYNIC"		/* MSRs for SynIC */
257	    "\004SYNTM"		/* MSRs for SynTimer */
258	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
259	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
260	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
261	    "\010RESET"		/* MSR_HV_RESET */
262	    "\011STATS"		/* MSR_HV_STATS_ */
263	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
264	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
265	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
266	    "\015DEBUG");	/* MSR_HV_SYNTH_DEBUG_ */
267	printf("  PM Features=0x%b [C%u]\n",
268	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK),
269	    "\020"
270	    "\005C3HPET",	/* HPET is required for C3 state */
271	    CPUPM_HV_CSTATE(hyperv_pm_features));
272	printf("  Features3=0x%b\n", hyperv_features3,
273	    "\020"
274	    "\001MWAIT"		/* MWAIT */
275	    "\002DEBUG"		/* guest debug support */
276	    "\003PERFMON"	/* performance monitor */
277	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
278	    "\005XMMHC"		/* hypercall input through XMM regs */
279	    "\006IDLE"		/* guest idle support */
280	    "\007SLEEP"		/* hypervisor sleep support */
281	    "\010NUMA"		/* NUMA distance query support */
282	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
283	    "\012SYNCMC"	/* inject synthetic machine checks */
284	    "\013CRASH"		/* MSRs for guest crash */
285	    "\014DEBUGMSR"	/* MSRs for guest debug */
286	    "\015NPIEP"		/* NPIEP */
287	    "\016HVDIS");	/* disabling hypervisor */
288
289	do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
290	hyperv_recommends = regs[0];
291	if (bootverbose)
292		printf("  Recommends: %08x %08x\n", regs[0], regs[1]);
293
294	do_cpuid(CPUID_LEAF_HV_LIMITS, regs);
295	if (bootverbose) {
296		printf("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
297		    regs[0], regs[1], regs[2]);
298	}
299
300	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
301		do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
302		if (bootverbose) {
303			printf("  HW Features: %08x, AMD: %08x\n",
304			    regs[0], regs[3]);
305		}
306	}
307
308	return (true);
309}
310
311static void
312hyperv_init(void *dummy __unused)
313{
314	if (!hyperv_identify()) {
315		/* Not Hyper-V; reset guest id to the generic one. */
316		if (vm_guest == VM_GUEST_HV)
317			vm_guest = VM_GUEST_VM;
318		return;
319	}
320
321	/* Set guest id */
322	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD);
323
324	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
325		/* Register Hyper-V timecounter */
326		tc_init(&hyperv_timecounter);
327	}
328}
329SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init,
330    NULL);
331
332static void
333hypercall_memfree(void)
334{
335	hyperv_dmamem_free(&hypercall_context.hc_dma,
336	    hypercall_context.hc_addr);
337	hypercall_context.hc_addr = NULL;
338}
339
340static void
341hypercall_create(void *arg __unused)
342{
343	uint64_t hc, hc_orig;
344
345	if (vm_guest != VM_GUEST_HV)
346		return;
347
348	hypercall_context.hc_addr = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0,
349	    PAGE_SIZE, &hypercall_context.hc_dma, BUS_DMA_WAITOK);
350	if (hypercall_context.hc_addr == NULL) {
351		printf("hyperv: Hypercall page allocation failed\n");
352		/* Can't perform any Hyper-V specific actions */
353		vm_guest = VM_GUEST_VM;
354		return;
355	}
356
357	/* Get the 'reserved' bits, which requires preservation. */
358	hc_orig = rdmsr(MSR_HV_HYPERCALL);
359
360	/*
361	 * Setup the Hypercall page.
362	 *
363	 * NOTE: 'reserved' bits MUST be preserved.
364	 */
365	hc = ((hypercall_context.hc_dma.hv_paddr >> PAGE_SHIFT) <<
366	    MSR_HV_HYPERCALL_PGSHIFT) |
367	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
368	    MSR_HV_HYPERCALL_ENABLE;
369	wrmsr(MSR_HV_HYPERCALL, hc);
370
371	/*
372	 * Confirm that Hypercall page did get setup.
373	 */
374	hc = rdmsr(MSR_HV_HYPERCALL);
375	if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) {
376		printf("hyperv: Hypercall setup failed\n");
377		hypercall_memfree();
378		/* Can't perform any Hyper-V specific actions */
379		vm_guest = VM_GUEST_VM;
380		return;
381	}
382	if (bootverbose)
383		printf("hyperv: Hypercall created\n");
384}
385SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL);
386
387static void
388hypercall_destroy(void *arg __unused)
389{
390	uint64_t hc;
391
392	if (hypercall_context.hc_addr == NULL)
393		return;
394
395	/* Disable Hypercall */
396	hc = rdmsr(MSR_HV_HYPERCALL);
397	wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK));
398	hypercall_memfree();
399
400	if (bootverbose)
401		printf("hyperv: Hypercall destroyed\n");
402}
403SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy,
404    NULL);
405