vmbus.c revision 295308
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 295308 2016-02-05 07:20:31Z sephe $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/proc.h>
42#include <sys/sysctl.h>
43#include <sys/syslog.h>
44#include <sys/systm.h>
45#include <sys/rtprio.h>
46#include <sys/interrupt.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49#include <sys/mutex.h>
50#include <sys/smp.h>
51
52#include <machine/resource.h>
53#include <sys/rman.h>
54
55#include <machine/stdarg.h>
56#include <machine/intr_machdep.h>
57#include <machine/md_var.h>
58#include <machine/segments.h>
59#include <sys/pcpu.h>
60#include <x86/apicvar.h>
61
62#include "hv_vmbus_priv.h"
63
64#include <contrib/dev/acpica/include/acpi.h>
65#include "acpi_if.h"
66
67static device_t vmbus_devp;
68static int vmbus_inited;
69static hv_setup_args setup_args; /* only CPU 0 supported at this time */
70
71static char *vmbus_ids[] = { "VMBUS", NULL };
72
73/**
74 * @brief Software interrupt thread routine to handle channel messages from
75 * the hypervisor.
76 */
77static void
78vmbus_msg_swintr(void *arg)
79{
80	int 			cpu;
81	void*			page_addr;
82	hv_vmbus_channel_msg_header	 *hdr;
83	hv_vmbus_channel_msg_table_entry *entry;
84	hv_vmbus_channel_msg_type msg_type;
85	hv_vmbus_message*	msg;
86
87	cpu = (int)(long)arg;
88	KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
89	    "cpu out of range!"));
90
91	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
92	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
93
94	for (;;) {
95		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
96			break; /* no message */
97
98		hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
99		msg_type = hdr->message_type;
100
101		if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
102			printf("VMBUS: unknown message type = %d\n", msg_type);
103			goto handled;
104		}
105
106		entry = &g_channel_message_table[msg_type];
107
108		if (entry->messageHandler)
109			entry->messageHandler(hdr);
110handled:
111	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
112
113	    /*
114	     * Make sure the write to message_type (ie set to
115	     * HV_MESSAGE_TYPE_NONE) happens before we read the
116	     * message_pending and EOMing. Otherwise, the EOMing will
117	     * not deliver any more messages
118	     * since there is no empty slot
119	     */
120	    wmb();
121
122	    if (msg->header.message_flags.u.message_pending) {
123			/*
124			 * This will cause message queue rescan to possibly
125			 * deliver another msg from the hypervisor
126			 */
127			wrmsr(HV_X64_MSR_EOM, 0);
128	    }
129	}
130}
131
132/**
133 * @brief Interrupt filter routine for VMBUS.
134 *
135 * The purpose of this routine is to determine the type of VMBUS protocol
136 * message to process - an event or a channel message.
137 */
138static inline int
139hv_vmbus_isr(struct trapframe *frame)
140{
141	int				cpu;
142	hv_vmbus_message*		msg;
143	hv_vmbus_synic_event_flags*	event;
144	void*				page_addr;
145
146	cpu = PCPU_GET(cpuid);
147
148	/*
149	 * The Windows team has advised that we check for events
150	 * before checking for messages. This is the way they do it
151	 * in Windows when running as a guest in Hyper-V
152	 */
153
154	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
155	event = (hv_vmbus_synic_event_flags*)
156		    page_addr + HV_VMBUS_MESSAGE_SINT;
157
158	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
159	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
160		/* Since we are a child, we only need to check bit 0 */
161		if (synch_test_and_clear_bit(0, &event->flags32[0])) {
162			hv_vmbus_on_events(cpu);
163		}
164	} else {
165		/*
166		 * On host with Win8 or above, we can directly look at
167		 * the event page. If bit n is set, we have an interrupt
168		 * on the channel with id n.
169		 * Directly schedule the event software interrupt on
170		 * current cpu.
171		 */
172		hv_vmbus_on_events(cpu);
173	}
174
175	/* Check if there are actual msgs to be process */
176	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
177	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
178
179	/* we call eventtimer process the message */
180	if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
181		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
182
183		/*
184		 * Make sure the write to message_type (ie set to
185		 * HV_MESSAGE_TYPE_NONE) happens before we read the
186		 * message_pending and EOMing. Otherwise, the EOMing will
187		 * not deliver any more messages
188		 * since there is no empty slot
189		 */
190		wmb();
191
192		if (msg->header.message_flags.u.message_pending) {
193			/*
194			 * This will cause message queue rescan to possibly
195			 * deliver another msg from the hypervisor
196			 */
197			wrmsr(HV_X64_MSR_EOM, 0);
198		}
199		hv_et_intr(frame);
200		return (FILTER_HANDLED);
201	}
202
203	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
204		swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
205	}
206
207	return (FILTER_HANDLED);
208}
209
210u_long *hv_vmbus_intr_cpu[MAXCPU];
211
212void
213hv_vector_handler(struct trapframe *trap_frame)
214{
215	int cpu;
216
217	/*
218	 * Disable preemption.
219	 */
220	critical_enter();
221
222	/*
223	 * Do a little interrupt counting.
224	 */
225	cpu = PCPU_GET(cpuid);
226	(*hv_vmbus_intr_cpu[cpu])++;
227
228	hv_vmbus_isr(trap_frame);
229
230	/*
231	 * Enable preemption.
232	 */
233	critical_exit();
234}
235
236static int
237vmbus_read_ivar(
238	device_t	dev,
239	device_t	child,
240	int		index,
241	uintptr_t*	result)
242{
243	struct hv_device *child_dev_ctx = device_get_ivars(child);
244
245	switch (index) {
246
247	case HV_VMBUS_IVAR_TYPE:
248		*result = (uintptr_t) &child_dev_ctx->class_id;
249		return (0);
250	case HV_VMBUS_IVAR_INSTANCE:
251		*result = (uintptr_t) &child_dev_ctx->device_id;
252		return (0);
253	case HV_VMBUS_IVAR_DEVCTX:
254		*result = (uintptr_t) child_dev_ctx;
255		return (0);
256	case HV_VMBUS_IVAR_NODE:
257		*result = (uintptr_t) child_dev_ctx->device;
258		return (0);
259	}
260	return (ENOENT);
261}
262
263static int
264vmbus_write_ivar(
265	device_t	dev,
266	device_t	child,
267	int		index,
268	uintptr_t	value)
269{
270	switch (index) {
271
272	case HV_VMBUS_IVAR_TYPE:
273	case HV_VMBUS_IVAR_INSTANCE:
274	case HV_VMBUS_IVAR_DEVCTX:
275	case HV_VMBUS_IVAR_NODE:
276		/* read-only */
277		return (EINVAL);
278	}
279	return (ENOENT);
280}
281
282struct hv_device*
283hv_vmbus_child_device_create(
284	hv_guid		type,
285	hv_guid		instance,
286	hv_vmbus_channel*	channel)
287{
288	hv_device* child_dev;
289
290	/*
291	 * Allocate the new child device
292	 */
293	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
294			M_WAITOK |  M_ZERO);
295
296	child_dev->channel = channel;
297	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
298	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
299
300	return (child_dev);
301}
302
303static void
304print_dev_guid(struct hv_device *dev)
305{
306	int i;
307	unsigned char guid_name[100];
308	for (i = 0; i < 32; i += 2)
309		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
310	if(bootverbose)
311		printf("VMBUS: Class ID: %s\n", guid_name);
312}
313
314int
315hv_vmbus_child_device_register(struct hv_device *child_dev)
316{
317	device_t child;
318	int ret = 0;
319
320	print_dev_guid(child_dev);
321
322
323	child = device_add_child(vmbus_devp, NULL, -1);
324	child_dev->device = child;
325	device_set_ivars(child, child_dev);
326
327	mtx_lock(&Giant);
328	ret = device_probe_and_attach(child);
329	mtx_unlock(&Giant);
330
331	return (0);
332}
333
334int
335hv_vmbus_child_device_unregister(struct hv_device *child_dev)
336{
337	int ret = 0;
338	/*
339	 * XXXKYS: Ensure that this is the opposite of
340	 * device_add_child()
341	 */
342	mtx_lock(&Giant);
343	ret = device_delete_child(vmbus_devp, child_dev->device);
344	mtx_unlock(&Giant);
345	return(ret);
346}
347
348static int
349vmbus_probe(device_t dev) {
350	if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
351	    device_get_unit(dev) != 0)
352		return (ENXIO);
353
354	device_set_desc(dev, "Vmbus Devices");
355
356	return (BUS_PROBE_DEFAULT);
357}
358
359#ifdef HYPERV
360extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
361
362/**
363 * @brief Find a free IDT slot and setup the interrupt handler.
364 */
365static int
366vmbus_vector_alloc(void)
367{
368	int vector;
369	uintptr_t func;
370	struct gate_descriptor *ip;
371
372	/*
373	 * Search backwards form the highest IDT vector available for use
374	 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
375	 * handler at that vector and use it to interrupt vcpus.
376	 */
377	vector = APIC_SPURIOUS_INT;
378	while (--vector >= APIC_IPI_INTS) {
379		ip = &idt[vector];
380		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
381		if (func == (uintptr_t)&IDTVEC(rsvd)) {
382#ifdef __i386__
383			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
384			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
385#else
386			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
387			    SEL_KPL, 0);
388#endif
389
390			return (vector);
391		}
392	}
393	return (0);
394}
395
396/**
397 * @brief Restore the IDT slot to rsvd.
398 */
399static void
400vmbus_vector_free(int vector)
401{
402        uintptr_t func;
403        struct gate_descriptor *ip;
404
405	if (vector == 0)
406		return;
407
408        KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
409            ("invalid vector %d", vector));
410
411        ip = &idt[vector];
412        func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
413        KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
414            ("invalid vector %d", vector));
415
416        setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
417}
418
419#else /* HYPERV */
420
421static int
422vmbus_vector_alloc(void)
423{
424	return(0);
425}
426
427static void
428vmbus_vector_free(int vector)
429{
430}
431
432#endif /* HYPERV */
433
434/**
435 * @brief Main vmbus driver initialization routine.
436 *
437 * Here, we
438 * - initialize the vmbus driver context
439 * - setup various driver entry points
440 * - invoke the vmbus hv main init routine
441 * - get the irq resource
442 * - invoke the vmbus to add the vmbus root device
443 * - setup the vmbus root device
444 * - retrieve the channel offers
445 */
446static int
447vmbus_bus_init(void)
448{
449	int i, j, n, ret;
450	char buf[MAXCOMLEN + 1];
451	cpuset_t cpu_mask;
452
453	if (vmbus_inited)
454		return (0);
455
456	vmbus_inited = 1;
457
458	ret = hv_vmbus_init();
459
460	if (ret) {
461		if(bootverbose)
462			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
463		return (ret);
464	}
465
466	/*
467	 * Find a free IDT slot for vmbus callback.
468	 */
469	hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
470
471	if (hv_vmbus_g_context.hv_cb_vector == 0) {
472		if(bootverbose)
473			printf("Error VMBUS: Cannot find free IDT slot for "
474			    "vmbus callback!\n");
475		goto cleanup;
476	}
477
478	if(bootverbose)
479		printf("VMBUS: vmbus callback vector %d\n",
480		    hv_vmbus_g_context.hv_cb_vector);
481
482	/*
483	 * Notify the hypervisor of our vector.
484	 */
485	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
486
487	CPU_FOREACH(j) {
488		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
489		hv_vmbus_g_context.msg_swintr[j] = NULL;
490
491		snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
492		intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
493
494		for (i = 0; i < 2; i++)
495			setup_args.page_buffers[2 * j + i] = NULL;
496	}
497
498	/*
499	 * Per cpu setup.
500	 */
501	CPU_FOREACH(j) {
502		/*
503		 * Setup taskqueue to handle events
504		 */
505		hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
506			taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
507		if (hv_vmbus_g_context.hv_event_queue[j] == NULL) {
508			if (bootverbose)
509				printf("VMBUS: failed to setup taskqueue\n");
510			goto cleanup1;
511		}
512		CPU_SETOF(j, &cpu_mask);
513		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, &cpu_mask,
514			"hvevent%d", j);
515
516		/*
517		 * Setup software interrupt thread and handler for msg handling.
518		 */
519		ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
520		    "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
521		    &hv_vmbus_g_context.msg_swintr[j]);
522		if (ret) {
523			if(bootverbose)
524				printf("VMBUS: failed to setup msg swi for "
525				    "cpu %d\n", j);
526			goto cleanup1;
527		}
528
529		/*
530		 * Bind the swi thread to the cpu.
531		 */
532		ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
533		    j);
534		if (ret) {
535			if(bootverbose)
536				printf("VMBUS: failed to bind msg swi thread "
537				    "to cpu %d\n", j);
538			goto cleanup1;
539		}
540
541		/*
542		 * Prepare the per cpu msg and event pages to be called on each cpu.
543		 */
544		for(i = 0; i < 2; i++) {
545			setup_args.page_buffers[2 * j + i] =
546				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
547		}
548	}
549
550	if (bootverbose)
551		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
552		    smp_started);
553
554	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
555
556	/*
557	 * Connect to VMBus in the root partition
558	 */
559	ret = hv_vmbus_connect();
560
561	if (ret != 0)
562		goto cleanup1;
563
564	hv_vmbus_request_channel_offers();
565	return (ret);
566
567	cleanup1:
568	/*
569	 * Free pages alloc'ed
570	 */
571	for (n = 0; n < 2 * MAXCPU; n++)
572		if (setup_args.page_buffers[n] != NULL)
573			free(setup_args.page_buffers[n], M_DEVBUF);
574
575	/*
576	 * remove swi and vmbus callback vector;
577	 */
578	CPU_FOREACH(j) {
579		if (hv_vmbus_g_context.hv_event_queue[j] != NULL)
580			taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
581		if (hv_vmbus_g_context.msg_swintr[j] != NULL)
582			swi_remove(hv_vmbus_g_context.msg_swintr[j]);
583		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
584	}
585
586	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
587
588	cleanup:
589	hv_vmbus_cleanup();
590
591	return (ret);
592}
593
594static int
595vmbus_attach(device_t dev)
596{
597	if(bootverbose)
598		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
599	vmbus_devp = dev;
600
601	/*
602	 * If the system has already booted and thread
603	 * scheduling is possible indicated by the global
604	 * cold set to zero, we just call the driver
605	 * initialization directly.
606	 */
607	if (!cold)
608		vmbus_bus_init();
609
610	return (0);
611}
612
613static void
614vmbus_init(void)
615{
616	if (vm_guest != VM_GUEST_HV)
617		return;
618
619	/*
620	 * If the system has already booted and thread
621	 * scheduling is possible, as indicated by the
622	 * global cold set to zero, we just call the driver
623	 * initialization directly.
624	 */
625	if (!cold)
626		vmbus_bus_init();
627}
628
629static void
630vmbus_bus_exit(void)
631{
632	int i;
633
634	hv_vmbus_release_unattached_channels();
635	hv_vmbus_disconnect();
636
637	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
638
639	for(i = 0; i < 2 * MAXCPU; i++) {
640		if (setup_args.page_buffers[i] != 0)
641			free(setup_args.page_buffers[i], M_DEVBUF);
642	}
643
644	hv_vmbus_cleanup();
645
646	/* remove swi */
647	CPU_FOREACH(i) {
648		if (hv_vmbus_g_context.hv_event_queue[i] != NULL)
649			taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
650		if (hv_vmbus_g_context.msg_swintr[i] != NULL)
651			swi_remove(hv_vmbus_g_context.msg_swintr[i]);
652		hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
653	}
654
655	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
656
657	return;
658}
659
660static void
661vmbus_exit(void)
662{
663	vmbus_bus_exit();
664}
665
666static int
667vmbus_detach(device_t dev)
668{
669	vmbus_exit();
670	return (0);
671}
672
673static void
674vmbus_mod_load(void)
675{
676	if(bootverbose)
677		printf("VMBUS: load\n");
678}
679
680static void
681vmbus_mod_unload(void)
682{
683	if(bootverbose)
684		printf("VMBUS: unload\n");
685}
686
687static int
688vmbus_modevent(module_t mod, int what, void *arg)
689{
690	switch (what) {
691
692	case MOD_LOAD:
693		vmbus_mod_load();
694		break;
695	case MOD_UNLOAD:
696		vmbus_mod_unload();
697		break;
698	}
699
700	return (0);
701}
702
703static device_method_t vmbus_methods[] = {
704	/** Device interface */
705	DEVMETHOD(device_probe, vmbus_probe),
706	DEVMETHOD(device_attach, vmbus_attach),
707	DEVMETHOD(device_detach, vmbus_detach),
708	DEVMETHOD(device_shutdown, bus_generic_shutdown),
709	DEVMETHOD(device_suspend, bus_generic_suspend),
710	DEVMETHOD(device_resume, bus_generic_resume),
711
712	/** Bus interface */
713	DEVMETHOD(bus_add_child, bus_generic_add_child),
714	DEVMETHOD(bus_print_child, bus_generic_print_child),
715	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
716	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
717
718	{ 0, 0 } };
719
720static char driver_name[] = "vmbus";
721static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
722
723
724devclass_t vmbus_devclass;
725
726DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
727MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
728MODULE_VERSION(vmbus, 1);
729
730/* We want to be started after SMP is initialized */
731SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
732
733