vmbus.c revision 300480
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 300480 2016-05-23 07:06:53Z sephe $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/proc.h>
42#include <sys/sysctl.h>
43#include <sys/syslog.h>
44#include <sys/systm.h>
45#include <sys/rtprio.h>
46#include <sys/interrupt.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49#include <sys/mutex.h>
50#include <sys/smp.h>
51
52#include <machine/resource.h>
53#include <sys/rman.h>
54
55#include <machine/stdarg.h>
56#include <machine/intr_machdep.h>
57#include <machine/md_var.h>
58#include <machine/segments.h>
59#include <sys/pcpu.h>
60#include <x86/apicvar.h>
61
62#include <dev/hyperv/include/hyperv.h>
63#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
64#include <dev/hyperv/vmbus/vmbus_var.h>
65
66#include <contrib/dev/acpica/include/acpi.h>
67#include "acpi_if.h"
68
69struct vmbus_softc	*vmbus_sc;
70
71static device_t vmbus_devp;
72static int vmbus_inited;
73static hv_setup_args setup_args; /* only CPU 0 supported at this time */
74
75static char *vmbus_ids[] = { "VMBUS", NULL };
76
77static void
78vmbus_msg_task(void *arg __unused, int pending __unused)
79{
80	hv_vmbus_message *msg;
81
82	msg = ((hv_vmbus_message *)hv_vmbus_g_context.syn_ic_msg_page[curcpu]) +
83	    HV_VMBUS_MESSAGE_SINT;
84	for (;;) {
85		const hv_vmbus_channel_msg_table_entry *entry;
86		hv_vmbus_channel_msg_header *hdr;
87		hv_vmbus_channel_msg_type msg_type;
88
89		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
90			break; /* no message */
91
92		hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
93		msg_type = hdr->message_type;
94
95		if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
96			printf("VMBUS: unknown message type = %d\n", msg_type);
97			goto handled;
98		}
99
100		entry = &g_channel_message_table[msg_type];
101		if (entry->messageHandler)
102			entry->messageHandler(hdr);
103handled:
104		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
105		/*
106		 * Make sure the write to message_type (ie set to
107		 * HV_MESSAGE_TYPE_NONE) happens before we read the
108		 * message_pending and EOMing. Otherwise, the EOMing will
109		 * not deliver any more messages
110		 * since there is no empty slot
111		 *
112		 * NOTE:
113		 * mb() is used here, since atomic_thread_fence_seq_cst()
114		 * will become compiler fence on UP kernel.
115		 */
116		mb();
117		if (msg->header.message_flags.u.message_pending) {
118			/*
119			 * This will cause message queue rescan to possibly
120			 * deliver another msg from the hypervisor
121			 */
122			wrmsr(HV_X64_MSR_EOM, 0);
123		}
124	}
125}
126
127/**
128 * @brief Interrupt filter routine for VMBUS.
129 *
130 * The purpose of this routine is to determine the type of VMBUS protocol
131 * message to process - an event or a channel message.
132 */
133static inline int
134hv_vmbus_isr(struct trapframe *frame)
135{
136	struct vmbus_softc *sc = vmbus_get_softc();
137	int cpu = curcpu;
138	hv_vmbus_message *msg;
139	void *page_addr;
140
141	/*
142	 * The Windows team has advised that we check for events
143	 * before checking for messages. This is the way they do it
144	 * in Windows when running as a guest in Hyper-V
145	 */
146	sc->vmbus_event_proc(sc, cpu);
147
148	/* Check if there are actual msgs to be process */
149	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
150	msg = ((hv_vmbus_message *)page_addr) + HV_VMBUS_TIMER_SINT;
151
152	/* we call eventtimer process the message */
153	if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
154		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
155
156		/* call intrrupt handler of event timer */
157		hv_et_intr(frame);
158
159		/*
160		 * Make sure the write to message_type (ie set to
161		 * HV_MESSAGE_TYPE_NONE) happens before we read the
162		 * message_pending and EOMing. Otherwise, the EOMing will
163		 * not deliver any more messages
164		 * since there is no empty slot
165		 *
166		 * NOTE:
167		 * mb() is used here, since atomic_thread_fence_seq_cst()
168		 * will become compiler fence on UP kernel.
169		 */
170		mb();
171
172		if (msg->header.message_flags.u.message_pending) {
173			/*
174			 * This will cause message queue rescan to possibly
175			 * deliver another msg from the hypervisor
176			 */
177			wrmsr(HV_X64_MSR_EOM, 0);
178		}
179	}
180
181	msg = ((hv_vmbus_message *)page_addr) + HV_VMBUS_MESSAGE_SINT;
182	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
183		taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
184		    &hv_vmbus_g_context.hv_msg_task[cpu]);
185	}
186
187	return (FILTER_HANDLED);
188}
189
190u_long *hv_vmbus_intr_cpu[MAXCPU];
191
192void
193hv_vector_handler(struct trapframe *trap_frame)
194{
195	int cpu;
196
197	/*
198	 * Disable preemption.
199	 */
200	critical_enter();
201
202	/*
203	 * Do a little interrupt counting.
204	 */
205	cpu = PCPU_GET(cpuid);
206	(*hv_vmbus_intr_cpu[cpu])++;
207
208	hv_vmbus_isr(trap_frame);
209
210	/*
211	 * Enable preemption.
212	 */
213	critical_exit();
214}
215
216static int
217vmbus_read_ivar(
218	device_t	dev,
219	device_t	child,
220	int		index,
221	uintptr_t*	result)
222{
223	struct hv_device *child_dev_ctx = device_get_ivars(child);
224
225	switch (index) {
226
227	case HV_VMBUS_IVAR_TYPE:
228		*result = (uintptr_t) &child_dev_ctx->class_id;
229		return (0);
230	case HV_VMBUS_IVAR_INSTANCE:
231		*result = (uintptr_t) &child_dev_ctx->device_id;
232		return (0);
233	case HV_VMBUS_IVAR_DEVCTX:
234		*result = (uintptr_t) child_dev_ctx;
235		return (0);
236	case HV_VMBUS_IVAR_NODE:
237		*result = (uintptr_t) child_dev_ctx->device;
238		return (0);
239	}
240	return (ENOENT);
241}
242
243static int
244vmbus_write_ivar(
245	device_t	dev,
246	device_t	child,
247	int		index,
248	uintptr_t	value)
249{
250	switch (index) {
251
252	case HV_VMBUS_IVAR_TYPE:
253	case HV_VMBUS_IVAR_INSTANCE:
254	case HV_VMBUS_IVAR_DEVCTX:
255	case HV_VMBUS_IVAR_NODE:
256		/* read-only */
257		return (EINVAL);
258	}
259	return (ENOENT);
260}
261
262static int
263vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
264{
265	char guidbuf[40];
266	struct hv_device *dev_ctx = device_get_ivars(child);
267
268	if (dev_ctx == NULL)
269		return (0);
270
271	strlcat(buf, "classid=", buflen);
272	snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
273	strlcat(buf, guidbuf, buflen);
274
275	strlcat(buf, " deviceid=", buflen);
276	snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
277	strlcat(buf, guidbuf, buflen);
278
279	return (0);
280}
281
282struct hv_device*
283hv_vmbus_child_device_create(
284	hv_guid		type,
285	hv_guid		instance,
286	hv_vmbus_channel*	channel)
287{
288	hv_device* child_dev;
289
290	/*
291	 * Allocate the new child device
292	 */
293	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
294			M_WAITOK |  M_ZERO);
295
296	child_dev->channel = channel;
297	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
298	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
299
300	return (child_dev);
301}
302
303int
304snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
305{
306	int cnt;
307	const unsigned char *d = guid->data;
308
309	cnt = snprintf(buf, sz,
310		"%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
311		d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
312		d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
313	return (cnt);
314}
315
316int
317hv_vmbus_child_device_register(struct hv_device *child_dev)
318{
319	device_t child;
320
321	if (bootverbose) {
322		char name[40];
323		snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
324		printf("VMBUS: Class ID: %s\n", name);
325	}
326
327	child = device_add_child(vmbus_devp, NULL, -1);
328	child_dev->device = child;
329	device_set_ivars(child, child_dev);
330
331	return (0);
332}
333
334int
335hv_vmbus_child_device_unregister(struct hv_device *child_dev)
336{
337	int ret = 0;
338	/*
339	 * XXXKYS: Ensure that this is the opposite of
340	 * device_add_child()
341	 */
342	mtx_lock(&Giant);
343	ret = device_delete_child(vmbus_devp, child_dev->device);
344	mtx_unlock(&Giant);
345	return(ret);
346}
347
348static int
349vmbus_probe(device_t dev)
350{
351	if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
352	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV)
353		return (ENXIO);
354
355	device_set_desc(dev, "Hyper-V Vmbus");
356
357	return (BUS_PROBE_DEFAULT);
358}
359
360extern inthand_t IDTVEC(hv_vmbus_callback);
361
362/**
363 * @brief Main vmbus driver initialization routine.
364 *
365 * Here, we
366 * - initialize the vmbus driver context
367 * - setup various driver entry points
368 * - invoke the vmbus hv main init routine
369 * - get the irq resource
370 * - invoke the vmbus to add the vmbus root device
371 * - setup the vmbus root device
372 * - retrieve the channel offers
373 */
374static int
375vmbus_bus_init(void)
376{
377	struct vmbus_softc *sc;
378	int i, j, n, ret;
379	char buf[MAXCOMLEN + 1];
380	cpuset_t cpu_mask;
381
382	if (vmbus_inited)
383		return (0);
384
385	vmbus_inited = 1;
386	sc = vmbus_get_softc();
387
388	/*
389	 * Find a free IDT slot for vmbus callback.
390	 */
391	hv_vmbus_g_context.hv_cb_vector = lapic_ipi_alloc(IDTVEC(hv_vmbus_callback));
392	if (hv_vmbus_g_context.hv_cb_vector < 0) {
393		if(bootverbose)
394			printf("Error VMBUS: Cannot find free IDT slot for "
395			    "vmbus callback!\n");
396		ret = ENXIO;
397		goto cleanup;
398	}
399
400	if(bootverbose)
401		printf("VMBUS: vmbus callback vector %d\n",
402		    hv_vmbus_g_context.hv_cb_vector);
403
404	/*
405	 * Notify the hypervisor of our vector.
406	 */
407	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
408
409	CPU_FOREACH(j) {
410		snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
411		intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
412
413		for (i = 0; i < 2; i++)
414			setup_args.page_buffers[2 * j + i] = NULL;
415	}
416
417	/*
418	 * Per cpu setup.
419	 */
420	CPU_FOREACH(j) {
421		/*
422		 * Setup taskqueue to handle events
423		 */
424		hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
425			taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
426		CPU_SETOF(j, &cpu_mask);
427		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, &cpu_mask,
428			"hvevent%d", j);
429
430		/*
431		 * Setup per-cpu tasks and taskqueues to handle msg.
432		 */
433		hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast(
434		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
435		    &hv_vmbus_g_context.hv_msg_tq[j]);
436		CPU_SETOF(j, &cpu_mask);
437		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_msg_tq[j],
438		    1, PI_NET, &cpu_mask, "hvmsg%d", j);
439		TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0,
440		    vmbus_msg_task, NULL);
441
442		/*
443		 * Prepare the per cpu msg and event pages to be called on each cpu.
444		 */
445		for(i = 0; i < 2; i++) {
446			setup_args.page_buffers[2 * j + i] =
447				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
448		}
449	}
450
451	if (bootverbose)
452		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
453		    smp_started);
454
455	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
456
457	/*
458	 * Connect to VMBus in the root partition
459	 */
460	ret = hv_vmbus_connect();
461
462	if (ret != 0)
463		goto cleanup1;
464
465	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
466	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
467		sc->vmbus_event_proc = vmbus_event_proc_compat;
468	else
469		sc->vmbus_event_proc = vmbus_event_proc;
470
471	hv_vmbus_request_channel_offers();
472
473	vmbus_scan();
474	bus_generic_attach(vmbus_devp);
475	device_printf(vmbus_devp, "device scan, probe and attach done\n");
476
477	return (ret);
478
479	cleanup1:
480	/*
481	 * Free pages alloc'ed
482	 */
483	for (n = 0; n < 2 * MAXCPU; n++)
484		if (setup_args.page_buffers[n] != NULL)
485			free(setup_args.page_buffers[n], M_DEVBUF);
486
487	/*
488	 * remove swi and vmbus callback vector;
489	 */
490	CPU_FOREACH(j) {
491		if (hv_vmbus_g_context.hv_event_queue[j] != NULL) {
492			taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
493			hv_vmbus_g_context.hv_event_queue[j] = NULL;
494		}
495	}
496
497	lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector);
498
499	cleanup:
500	return (ret);
501}
502
503static void
504vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
505{
506}
507
508static int
509vmbus_attach(device_t dev)
510{
511	if(bootverbose)
512		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
513
514	vmbus_devp = dev;
515	vmbus_sc = device_get_softc(dev);
516
517	/*
518	 * Event processing logic will be configured:
519	 * - After the vmbus protocol version negotiation.
520	 * - Before we request channel offers.
521	 */
522	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
523
524#ifndef EARLY_AP_STARTUP
525	/*
526	 * If the system has already booted and thread
527	 * scheduling is possible indicated by the global
528	 * cold set to zero, we just call the driver
529	 * initialization directly.
530	 */
531	if (!cold)
532#endif
533		vmbus_bus_init();
534
535	bus_generic_probe(dev);
536	return (0);
537}
538
539static void
540vmbus_sysinit(void *arg __unused)
541{
542	if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
543		return;
544
545#ifndef EARLY_AP_STARTUP
546	/*
547	 * If the system has already booted and thread
548	 * scheduling is possible, as indicated by the
549	 * global cold set to zero, we just call the driver
550	 * initialization directly.
551	 */
552	if (!cold)
553#endif
554		vmbus_bus_init();
555}
556
557static int
558vmbus_detach(device_t dev)
559{
560	int i;
561
562	hv_vmbus_release_unattached_channels();
563	hv_vmbus_disconnect();
564
565	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
566
567	for(i = 0; i < 2 * MAXCPU; i++) {
568		if (setup_args.page_buffers[i] != NULL)
569			free(setup_args.page_buffers[i], M_DEVBUF);
570	}
571
572	/* remove swi */
573	CPU_FOREACH(i) {
574		if (hv_vmbus_g_context.hv_event_queue[i] != NULL) {
575			taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
576			hv_vmbus_g_context.hv_event_queue[i] = NULL;
577		}
578	}
579
580	lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector);
581
582	return (0);
583}
584
585static device_method_t vmbus_methods[] = {
586	/* Device interface */
587	DEVMETHOD(device_probe,			vmbus_probe),
588	DEVMETHOD(device_attach,		vmbus_attach),
589	DEVMETHOD(device_detach,		vmbus_detach),
590	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
591	DEVMETHOD(device_suspend,		bus_generic_suspend),
592	DEVMETHOD(device_resume,		bus_generic_resume),
593
594	/* Bus interface */
595	DEVMETHOD(bus_add_child,		bus_generic_add_child),
596	DEVMETHOD(bus_print_child,		bus_generic_print_child),
597	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
598	DEVMETHOD(bus_write_ivar,		vmbus_write_ivar),
599	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
600
601	DEVMETHOD_END
602};
603
604static driver_t vmbus_driver = {
605	"vmbus",
606	vmbus_methods,
607	sizeof(struct vmbus_softc)
608};
609
610static devclass_t vmbus_devclass;
611
612DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
613MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
614MODULE_VERSION(vmbus, 1);
615
616#ifndef EARLY_AP_STARTUP
617/*
618 * NOTE:
619 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
620 * initialized.
621 */
622SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
623#endif
624