vmbus.c revision 300127
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 300127 2016-05-18 07:09:44Z sephe $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/proc.h>
42#include <sys/sysctl.h>
43#include <sys/syslog.h>
44#include <sys/systm.h>
45#include <sys/rtprio.h>
46#include <sys/interrupt.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49#include <sys/mutex.h>
50#include <sys/smp.h>
51
52#include <machine/resource.h>
53#include <sys/rman.h>
54
55#include <machine/stdarg.h>
56#include <machine/intr_machdep.h>
57#include <machine/md_var.h>
58#include <machine/segments.h>
59#include <sys/pcpu.h>
60#include <x86/apicvar.h>
61
62#include <dev/hyperv/include/hyperv.h>
63#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
64#include <dev/hyperv/vmbus/vmbus_var.h>
65
66#include <contrib/dev/acpica/include/acpi.h>
67#include "acpi_if.h"
68
69struct vmbus_softc	*vmbus_sc;
70
71static device_t vmbus_devp;
72static int vmbus_inited;
73static hv_setup_args setup_args; /* only CPU 0 supported at this time */
74
75static char *vmbus_ids[] = { "VMBUS", NULL };
76
77static void
78vmbus_msg_task(void *arg __unused, int pending __unused)
79{
80	hv_vmbus_message *msg;
81
82	msg = ((hv_vmbus_message *)hv_vmbus_g_context.syn_ic_msg_page[curcpu]) +
83	    HV_VMBUS_MESSAGE_SINT;
84	for (;;) {
85		const hv_vmbus_channel_msg_table_entry *entry;
86		hv_vmbus_channel_msg_header *hdr;
87		hv_vmbus_channel_msg_type msg_type;
88
89		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
90			break; /* no message */
91
92		hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
93		msg_type = hdr->message_type;
94
95		if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
96			printf("VMBUS: unknown message type = %d\n", msg_type);
97			goto handled;
98		}
99
100		entry = &g_channel_message_table[msg_type];
101		if (entry->messageHandler)
102			entry->messageHandler(hdr);
103handled:
104		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
105		/*
106		 * Make sure the write to message_type (ie set to
107		 * HV_MESSAGE_TYPE_NONE) happens before we read the
108		 * message_pending and EOMing. Otherwise, the EOMing will
109		 * not deliver any more messages
110		 * since there is no empty slot
111		 *
112		 * NOTE:
113		 * mb() is used here, since atomic_thread_fence_seq_cst()
114		 * will become compiler fence on UP kernel.
115		 */
116		mb();
117		if (msg->header.message_flags.u.message_pending) {
118			/*
119			 * This will cause message queue rescan to possibly
120			 * deliver another msg from the hypervisor
121			 */
122			wrmsr(HV_X64_MSR_EOM, 0);
123		}
124	}
125}
126
127/**
128 * @brief Interrupt filter routine for VMBUS.
129 *
130 * The purpose of this routine is to determine the type of VMBUS protocol
131 * message to process - an event or a channel message.
132 */
133static inline int
134hv_vmbus_isr(struct trapframe *frame)
135{
136	struct vmbus_softc *sc = vmbus_get_softc();
137	int cpu = curcpu;
138	hv_vmbus_message *msg;
139	void *page_addr;
140
141	/*
142	 * The Windows team has advised that we check for events
143	 * before checking for messages. This is the way they do it
144	 * in Windows when running as a guest in Hyper-V
145	 */
146	sc->vmbus_event_proc(sc, cpu);
147
148	/* Check if there are actual msgs to be process */
149	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
150	msg = ((hv_vmbus_message *)page_addr) + HV_VMBUS_TIMER_SINT;
151
152	/* we call eventtimer process the message */
153	if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
154		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
155
156		/* call intrrupt handler of event timer */
157		hv_et_intr(frame);
158
159		/*
160		 * Make sure the write to message_type (ie set to
161		 * HV_MESSAGE_TYPE_NONE) happens before we read the
162		 * message_pending and EOMing. Otherwise, the EOMing will
163		 * not deliver any more messages
164		 * since there is no empty slot
165		 *
166		 * NOTE:
167		 * mb() is used here, since atomic_thread_fence_seq_cst()
168		 * will become compiler fence on UP kernel.
169		 */
170		mb();
171
172		if (msg->header.message_flags.u.message_pending) {
173			/*
174			 * This will cause message queue rescan to possibly
175			 * deliver another msg from the hypervisor
176			 */
177			wrmsr(HV_X64_MSR_EOM, 0);
178		}
179	}
180
181	msg = ((hv_vmbus_message *)page_addr) + HV_VMBUS_MESSAGE_SINT;
182	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
183		taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
184		    &hv_vmbus_g_context.hv_msg_task[cpu]);
185	}
186
187	return (FILTER_HANDLED);
188}
189
190u_long *hv_vmbus_intr_cpu[MAXCPU];
191
192void
193hv_vector_handler(struct trapframe *trap_frame)
194{
195	int cpu;
196
197	/*
198	 * Disable preemption.
199	 */
200	critical_enter();
201
202	/*
203	 * Do a little interrupt counting.
204	 */
205	cpu = PCPU_GET(cpuid);
206	(*hv_vmbus_intr_cpu[cpu])++;
207
208	hv_vmbus_isr(trap_frame);
209
210	/*
211	 * Enable preemption.
212	 */
213	critical_exit();
214}
215
216static int
217vmbus_read_ivar(
218	device_t	dev,
219	device_t	child,
220	int		index,
221	uintptr_t*	result)
222{
223	struct hv_device *child_dev_ctx = device_get_ivars(child);
224
225	switch (index) {
226
227	case HV_VMBUS_IVAR_TYPE:
228		*result = (uintptr_t) &child_dev_ctx->class_id;
229		return (0);
230	case HV_VMBUS_IVAR_INSTANCE:
231		*result = (uintptr_t) &child_dev_ctx->device_id;
232		return (0);
233	case HV_VMBUS_IVAR_DEVCTX:
234		*result = (uintptr_t) child_dev_ctx;
235		return (0);
236	case HV_VMBUS_IVAR_NODE:
237		*result = (uintptr_t) child_dev_ctx->device;
238		return (0);
239	}
240	return (ENOENT);
241}
242
243static int
244vmbus_write_ivar(
245	device_t	dev,
246	device_t	child,
247	int		index,
248	uintptr_t	value)
249{
250	switch (index) {
251
252	case HV_VMBUS_IVAR_TYPE:
253	case HV_VMBUS_IVAR_INSTANCE:
254	case HV_VMBUS_IVAR_DEVCTX:
255	case HV_VMBUS_IVAR_NODE:
256		/* read-only */
257		return (EINVAL);
258	}
259	return (ENOENT);
260}
261
262static int
263vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
264{
265	char guidbuf[40];
266	struct hv_device *dev_ctx = device_get_ivars(child);
267
268	if (dev_ctx == NULL)
269		return (0);
270
271	strlcat(buf, "classid=", buflen);
272	snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
273	strlcat(buf, guidbuf, buflen);
274
275	strlcat(buf, " deviceid=", buflen);
276	snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
277	strlcat(buf, guidbuf, buflen);
278
279	return (0);
280}
281
282struct hv_device*
283hv_vmbus_child_device_create(
284	hv_guid		type,
285	hv_guid		instance,
286	hv_vmbus_channel*	channel)
287{
288	hv_device* child_dev;
289
290	/*
291	 * Allocate the new child device
292	 */
293	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
294			M_WAITOK |  M_ZERO);
295
296	child_dev->channel = channel;
297	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
298	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
299
300	return (child_dev);
301}
302
303int
304snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
305{
306	int cnt;
307	const unsigned char *d = guid->data;
308
309	cnt = snprintf(buf, sz,
310		"%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
311		d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
312		d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
313	return (cnt);
314}
315
316int
317hv_vmbus_child_device_register(struct hv_device *child_dev)
318{
319	device_t child;
320
321	if (bootverbose) {
322		char name[40];
323		snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
324		printf("VMBUS: Class ID: %s\n", name);
325	}
326
327	child = device_add_child(vmbus_devp, NULL, -1);
328	child_dev->device = child;
329	device_set_ivars(child, child_dev);
330
331	return (0);
332}
333
334int
335hv_vmbus_child_device_unregister(struct hv_device *child_dev)
336{
337	int ret = 0;
338	/*
339	 * XXXKYS: Ensure that this is the opposite of
340	 * device_add_child()
341	 */
342	mtx_lock(&Giant);
343	ret = device_delete_child(vmbus_devp, child_dev->device);
344	mtx_unlock(&Giant);
345	return(ret);
346}
347
348static int
349vmbus_probe(device_t dev)
350{
351	if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
352	    device_get_unit(dev) != 0)
353		return (ENXIO);
354
355	device_set_desc(dev, "Vmbus Devices");
356
357	return (BUS_PROBE_DEFAULT);
358}
359
360extern inthand_t IDTVEC(hv_vmbus_callback);
361
362/**
363 * @brief Main vmbus driver initialization routine.
364 *
365 * Here, we
366 * - initialize the vmbus driver context
367 * - setup various driver entry points
368 * - invoke the vmbus hv main init routine
369 * - get the irq resource
370 * - invoke the vmbus to add the vmbus root device
371 * - setup the vmbus root device
372 * - retrieve the channel offers
373 */
374static int
375vmbus_bus_init(void)
376{
377	struct vmbus_softc *sc;
378	int i, j, n, ret;
379	char buf[MAXCOMLEN + 1];
380	cpuset_t cpu_mask;
381
382	if (vmbus_inited)
383		return (0);
384
385	vmbus_inited = 1;
386	sc = vmbus_get_softc();
387
388	ret = hv_vmbus_init();
389
390	if (ret) {
391		if(bootverbose)
392			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
393		return (ret);
394	}
395
396	/*
397	 * Find a free IDT slot for vmbus callback.
398	 */
399	hv_vmbus_g_context.hv_cb_vector = lapic_ipi_alloc(IDTVEC(hv_vmbus_callback));
400	if (hv_vmbus_g_context.hv_cb_vector < 0) {
401		if(bootverbose)
402			printf("Error VMBUS: Cannot find free IDT slot for "
403			    "vmbus callback!\n");
404		goto cleanup;
405	}
406
407	if(bootverbose)
408		printf("VMBUS: vmbus callback vector %d\n",
409		    hv_vmbus_g_context.hv_cb_vector);
410
411	/*
412	 * Notify the hypervisor of our vector.
413	 */
414	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
415
416	CPU_FOREACH(j) {
417		snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
418		intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
419
420		for (i = 0; i < 2; i++)
421			setup_args.page_buffers[2 * j + i] = NULL;
422	}
423
424	/*
425	 * Per cpu setup.
426	 */
427	CPU_FOREACH(j) {
428		/*
429		 * Setup taskqueue to handle events
430		 */
431		hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
432			taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
433		CPU_SETOF(j, &cpu_mask);
434		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, &cpu_mask,
435			"hvevent%d", j);
436
437		/*
438		 * Setup per-cpu tasks and taskqueues to handle msg.
439		 */
440		hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast(
441		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
442		    &hv_vmbus_g_context.hv_msg_tq[j]);
443		CPU_SETOF(j, &cpu_mask);
444		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_msg_tq[j],
445		    1, PI_NET, &cpu_mask, "hvmsg%d", j);
446		TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0,
447		    vmbus_msg_task, NULL);
448
449		/*
450		 * Prepare the per cpu msg and event pages to be called on each cpu.
451		 */
452		for(i = 0; i < 2; i++) {
453			setup_args.page_buffers[2 * j + i] =
454				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
455		}
456	}
457
458	if (bootverbose)
459		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
460		    smp_started);
461
462	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
463
464	/*
465	 * Connect to VMBus in the root partition
466	 */
467	ret = hv_vmbus_connect();
468
469	if (ret != 0)
470		goto cleanup1;
471
472	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
473	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
474		sc->vmbus_event_proc = vmbus_event_proc_compat;
475	else
476		sc->vmbus_event_proc = vmbus_event_proc;
477
478	hv_vmbus_request_channel_offers();
479
480	vmbus_scan();
481	bus_generic_attach(vmbus_devp);
482	device_printf(vmbus_devp, "device scan, probe and attach done\n");
483
484	return (ret);
485
486	cleanup1:
487	/*
488	 * Free pages alloc'ed
489	 */
490	for (n = 0; n < 2 * MAXCPU; n++)
491		if (setup_args.page_buffers[n] != NULL)
492			free(setup_args.page_buffers[n], M_DEVBUF);
493
494	/*
495	 * remove swi and vmbus callback vector;
496	 */
497	CPU_FOREACH(j) {
498		if (hv_vmbus_g_context.hv_event_queue[j] != NULL) {
499			taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
500			hv_vmbus_g_context.hv_event_queue[j] = NULL;
501		}
502	}
503
504	lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector);
505
506	cleanup:
507	hv_vmbus_cleanup();
508
509	return (ret);
510}
511
512static void
513vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
514{
515}
516
517static int
518vmbus_attach(device_t dev)
519{
520	if(bootverbose)
521		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
522
523	vmbus_devp = dev;
524	vmbus_sc = device_get_softc(dev);
525
526	/*
527	 * Event processing logic will be configured:
528	 * - After the vmbus protocol version negotiation.
529	 * - Before we request channel offers.
530	 */
531	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
532
533#ifndef EARLY_AP_STARTUP
534	/*
535	 * If the system has already booted and thread
536	 * scheduling is possible indicated by the global
537	 * cold set to zero, we just call the driver
538	 * initialization directly.
539	 */
540	if (!cold)
541#endif
542		vmbus_bus_init();
543
544	bus_generic_probe(dev);
545	return (0);
546}
547
548static void
549vmbus_sysinit(void *arg __unused)
550{
551	if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
552		return;
553
554#ifndef EARLY_AP_STARTUP
555	/*
556	 * If the system has already booted and thread
557	 * scheduling is possible, as indicated by the
558	 * global cold set to zero, we just call the driver
559	 * initialization directly.
560	 */
561	if (!cold)
562#endif
563		vmbus_bus_init();
564}
565
566static int
567vmbus_detach(device_t dev)
568{
569	int i;
570
571	hv_vmbus_release_unattached_channels();
572	hv_vmbus_disconnect();
573
574	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
575
576	for(i = 0; i < 2 * MAXCPU; i++) {
577		if (setup_args.page_buffers[i] != NULL)
578			free(setup_args.page_buffers[i], M_DEVBUF);
579	}
580
581	hv_vmbus_cleanup();
582
583	/* remove swi */
584	CPU_FOREACH(i) {
585		if (hv_vmbus_g_context.hv_event_queue[i] != NULL) {
586			taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
587			hv_vmbus_g_context.hv_event_queue[i] = NULL;
588		}
589	}
590
591	lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector);
592
593	return (0);
594}
595
596static device_method_t vmbus_methods[] = {
597	/* Device interface */
598	DEVMETHOD(device_probe,			vmbus_probe),
599	DEVMETHOD(device_attach,		vmbus_attach),
600	DEVMETHOD(device_detach,		vmbus_detach),
601	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
602	DEVMETHOD(device_suspend,		bus_generic_suspend),
603	DEVMETHOD(device_resume,		bus_generic_resume),
604
605	/* Bus interface */
606	DEVMETHOD(bus_add_child,		bus_generic_add_child),
607	DEVMETHOD(bus_print_child,		bus_generic_print_child),
608	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
609	DEVMETHOD(bus_write_ivar,		vmbus_write_ivar),
610	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
611
612	DEVMETHOD_END
613};
614
615static driver_t vmbus_driver = {
616	"vmbus",
617	vmbus_methods,
618	sizeof(struct vmbus_softc)
619};
620
621static devclass_t vmbus_devclass;
622
623DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
624MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
625MODULE_VERSION(vmbus, 1);
626
627#ifndef EARLY_AP_STARTUP
628/*
629 * NOTE:
630 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
631 * initialized.
632 */
633SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
634#endif
635