hv_vmbus_drv_freebsd.c revision 283280
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 283280 2015-05-22 09:03:55Z whu $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/sysctl.h>
42#include <sys/syslog.h>
43#include <sys/systm.h>
44#include <sys/rtprio.h>
45#include <sys/interrupt.h>
46#include <sys/sx.h>
47#include <sys/taskqueue.h>
48#include <sys/mutex.h>
49#include <sys/smp.h>
50
51#include <machine/resource.h>
52#include <sys/rman.h>
53
54#include <machine/stdarg.h>
55#include <machine/intr_machdep.h>
56#include <machine/md_var.h>
57#include <machine/segments.h>
58#include <sys/pcpu.h>
59#include <machine/apicvar.h>
60
61#include "hv_vmbus_priv.h"
62
63
64#define VMBUS_IRQ	0x5
65
66static device_t vmbus_devp;
67static int vmbus_inited;
68static hv_setup_args setup_args; /* only CPU 0 supported at this time */
69
70/**
71 * @brief Software interrupt thread routine to handle channel messages from
72 * the hypervisor.
73 */
74static void
75vmbus_msg_swintr(void *arg)
76{
77	int 			cpu;
78	void*			page_addr;
79	hv_vmbus_message*	msg;
80	hv_vmbus_message*	copied;
81
82	cpu = (int)(long)arg;
83	KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
84	    "cpu out of range!"));
85
86	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
87	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
88
89	for (;;) {
90		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
91			break; /* no message */
92		} else {
93			copied = malloc(sizeof(hv_vmbus_message),
94					M_DEVBUF, M_NOWAIT);
95			KASSERT(copied != NULL,
96				("Error VMBUS: malloc failed to allocate"
97					" hv_vmbus_message!"));
98			if (copied == NULL)
99				continue;
100			memcpy(copied, msg, sizeof(hv_vmbus_message));
101			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
102			hv_vmbus_on_channel_message, copied);
103	    }
104
105	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
106
107	    /*
108	     * Make sure the write to message_type (ie set to
109	     * HV_MESSAGE_TYPE_NONE) happens before we read the
110	     * message_pending and EOMing. Otherwise, the EOMing will
111	     * not deliver any more messages
112	     * since there is no empty slot
113	     */
114	    wmb();
115
116	    if (msg->header.message_flags.u.message_pending) {
117			/*
118			 * This will cause message queue rescan to possibly
119			 * deliver another msg from the hypervisor
120			 */
121			wrmsr(HV_X64_MSR_EOM, 0);
122	    }
123	}
124}
125
126/**
127 * @brief Interrupt filter routine for VMBUS.
128 *
129 * The purpose of this routine is to determine the type of VMBUS protocol
130 * message to process - an event or a channel message.
131 */
132static inline int
133hv_vmbus_isr(void *unused)
134{
135	int				cpu;
136	hv_vmbus_message*		msg;
137	hv_vmbus_synic_event_flags*	event;
138	void*				page_addr;
139
140	cpu = PCPU_GET(cpuid);
141
142	/*
143	 * The Windows team has advised that we check for events
144	 * before checking for messages. This is the way they do it
145	 * in Windows when running as a guest in Hyper-V
146	 */
147
148	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
149	event = (hv_vmbus_synic_event_flags*)
150		    page_addr + HV_VMBUS_MESSAGE_SINT;
151
152	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
153	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
154		/* Since we are a child, we only need to check bit 0 */
155		if (synch_test_and_clear_bit(0, &event->flags32[0])) {
156			swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
157		}
158	} else {
159		/*
160		 * On host with Win8 or above, we can directly look at
161		 * the event page. If bit n is set, we have an interrupt
162		 * on the channel with id n.
163		 * Directly schedule the event software interrupt on
164		 * current cpu.
165		 */
166		swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
167	}
168
169	/* Check if there are actual msgs to be process */
170	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
171	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
172
173	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
174		swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
175	}
176
177	return FILTER_HANDLED;
178}
179
180#ifdef HV_DEBUG_INTR
181uint32_t hv_intr_count = 0;
182#endif
183uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
184uint32_t hv_vmbus_intr_cpu[MAXCPU];
185
186void
187hv_vector_handler(struct trapframe *trap_frame)
188{
189#ifdef HV_DEBUG_INTR
190	int cpu;
191#endif
192
193	/*
194	 * Disable preemption.
195	 */
196	critical_enter();
197
198#ifdef HV_DEBUG_INTR
199	/*
200	 * Do a little interrupt counting.
201	 */
202	cpu = PCPU_GET(cpuid);
203	hv_vmbus_intr_cpu[cpu]++;
204	hv_intr_count++;
205#endif
206
207	hv_vmbus_isr(NULL);
208
209	/*
210	 * Enable preemption.
211	 */
212	critical_exit();
213}
214
215static int
216vmbus_read_ivar(
217	device_t	dev,
218	device_t	child,
219	int		index,
220	uintptr_t*	result)
221{
222	struct hv_device *child_dev_ctx = device_get_ivars(child);
223
224	switch (index) {
225
226	case HV_VMBUS_IVAR_TYPE:
227		*result = (uintptr_t) &child_dev_ctx->class_id;
228		return (0);
229	case HV_VMBUS_IVAR_INSTANCE:
230		*result = (uintptr_t) &child_dev_ctx->device_id;
231		return (0);
232	case HV_VMBUS_IVAR_DEVCTX:
233		*result = (uintptr_t) child_dev_ctx;
234		return (0);
235	case HV_VMBUS_IVAR_NODE:
236		*result = (uintptr_t) child_dev_ctx->device;
237		return (0);
238	}
239	return (ENOENT);
240}
241
242static int
243vmbus_write_ivar(
244	device_t	dev,
245	device_t	child,
246	int		index,
247	uintptr_t	value)
248{
249	switch (index) {
250
251	case HV_VMBUS_IVAR_TYPE:
252	case HV_VMBUS_IVAR_INSTANCE:
253	case HV_VMBUS_IVAR_DEVCTX:
254	case HV_VMBUS_IVAR_NODE:
255		/* read-only */
256		return (EINVAL);
257	}
258	return (ENOENT);
259}
260
261struct hv_device*
262hv_vmbus_child_device_create(
263	hv_guid		type,
264	hv_guid		instance,
265	hv_vmbus_channel*	channel)
266{
267	hv_device* child_dev;
268
269	/*
270	 * Allocate the new child device
271	 */
272	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
273			M_NOWAIT |  M_ZERO);
274	KASSERT(child_dev != NULL,
275	    ("Error VMBUS: malloc failed to allocate hv_device!"));
276
277	if (child_dev == NULL)
278		return (NULL);
279
280	child_dev->channel = channel;
281	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
282	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
283
284	return (child_dev);
285}
286
287static void
288print_dev_guid(struct hv_device *dev)
289{
290	int i;
291	unsigned char guid_name[100];
292	for (i = 0; i < 32; i += 2)
293		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
294	if(bootverbose)
295		printf("VMBUS: Class ID: %s\n", guid_name);
296}
297
298int
299hv_vmbus_child_device_register(struct hv_device *child_dev)
300{
301	device_t child;
302	int ret = 0;
303
304	print_dev_guid(child_dev);
305
306
307	child = device_add_child(vmbus_devp, NULL, -1);
308	child_dev->device = child;
309	device_set_ivars(child, child_dev);
310
311	mtx_lock(&Giant);
312	ret = device_probe_and_attach(child);
313	mtx_unlock(&Giant);
314
315	return (0);
316}
317
318int
319hv_vmbus_child_device_unregister(struct hv_device *child_dev)
320{
321	int ret = 0;
322	/*
323	 * XXXKYS: Ensure that this is the opposite of
324	 * device_add_child()
325	 */
326	mtx_lock(&Giant);
327	ret = device_delete_child(vmbus_devp, child_dev->device);
328	mtx_unlock(&Giant);
329	return(ret);
330}
331
332static void
333vmbus_identify(driver_t *driver, device_t parent)
334{
335	if (!hv_vmbus_query_hypervisor_presence())
336		return;
337
338	vm_guest = VM_GUEST_HV;
339
340	BUS_ADD_CHILD(parent, 0, "vmbus", 0);
341}
342
343static int
344vmbus_probe(device_t dev) {
345	if(bootverbose)
346		device_printf(dev, "VMBUS: probe\n");
347
348	device_set_desc(dev, "Vmbus Devices");
349
350	return (BUS_PROBE_NOWILDCARD);
351}
352
353#ifdef HYPERV
354extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
355
356/**
357 * @brief Find a free IDT slot and setup the interrupt handler.
358 */
359static int
360vmbus_vector_alloc(void)
361{
362	int vector;
363	uintptr_t func;
364	struct gate_descriptor *ip;
365
366	/*
367	 * Search backwards form the highest IDT vector available for use
368	 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
369	 * handler at that vector and use it to interrupt vcpus.
370	 */
371	vector = APIC_SPURIOUS_INT;
372	while (--vector >= APIC_IPI_INTS) {
373		ip = &idt[vector];
374		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
375		if (func == (uintptr_t)&IDTVEC(rsvd)) {
376#ifdef __i386__
377			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
378			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
379#else
380			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
381			    SEL_KPL, 0);
382#endif
383
384			return (vector);
385		}
386	}
387	return (0);
388}
389
390/**
391 * @brief Restore the IDT slot to rsvd.
392 */
393static void
394vmbus_vector_free(int vector)
395{
396        uintptr_t func;
397        struct gate_descriptor *ip;
398
399	if (vector == 0)
400		return;
401
402        KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
403            ("invalid vector %d", vector));
404
405        ip = &idt[vector];
406        func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
407        KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
408            ("invalid vector %d", vector));
409
410        setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
411}
412
413#else /* HYPERV */
414
415static int
416vmbus_vector_alloc(void)
417{
418	return(0);
419}
420
421static void
422vmbus_vector_free(int vector)
423{
424}
425
426#endif /* HYPERV */
427
428/**
429 * @brief Main vmbus driver initialization routine.
430 *
431 * Here, we
432 * - initialize the vmbus driver context
433 * - setup various driver entry points
434 * - invoke the vmbus hv main init routine
435 * - get the irq resource
436 * - invoke the vmbus to add the vmbus root device
437 * - setup the vmbus root device
438 * - retrieve the channel offers
439 */
440static int
441vmbus_bus_init(void)
442{
443	int i, j, n, ret;
444
445	if (vmbus_inited)
446		return (0);
447
448	vmbus_inited = 1;
449
450	ret = hv_vmbus_init();
451
452	if (ret) {
453		if(bootverbose)
454			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
455		return (ret);
456	}
457
458	/*
459	 * Find a free IDT slot for vmbus callback.
460	 */
461	hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
462
463	if (hv_vmbus_g_context.hv_cb_vector == 0) {
464		if(bootverbose)
465			printf("Error VMBUS: Cannot find free IDT slot for "
466			    "vmbus callback!\n");
467		goto cleanup;
468	}
469
470	if(bootverbose)
471		printf("VMBUS: vmbus callback vector %d\n",
472		    hv_vmbus_g_context.hv_cb_vector);
473
474	/*
475	 * Notify the hypervisor of our vector.
476	 */
477	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
478
479	CPU_FOREACH(j) {
480		hv_vmbus_intr_cpu[j] = 0;
481		hv_vmbus_swintr_event_cpu[j] = 0;
482		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
483		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
484		hv_vmbus_g_context.event_swintr[j] = NULL;
485		hv_vmbus_g_context.msg_swintr[j] = NULL;
486
487		for (i = 0; i < 2; i++)
488			setup_args.page_buffers[2 * j + i] = NULL;
489	}
490
491	/*
492	 * Per cpu setup.
493	 */
494	CPU_FOREACH(j) {
495		/*
496		 * Setup software interrupt thread and handler for msg handling.
497		 */
498		ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
499		    "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
500		    &hv_vmbus_g_context.msg_swintr[j]);
501		if (ret) {
502			if(bootverbose)
503				printf("VMBUS: failed to setup msg swi for "
504				    "cpu %d\n", j);
505			goto cleanup1;
506		}
507
508		/*
509		 * Bind the swi thread to the cpu.
510		 */
511		ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
512		    j);
513	 	if (ret) {
514			if(bootverbose)
515				printf("VMBUS: failed to bind msg swi thread "
516				    "to cpu %d\n", j);
517			goto cleanup1;
518		}
519
520		/*
521		 * Setup software interrupt thread and handler for
522		 * event handling.
523		 */
524		ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
525		    "hv_event", hv_vmbus_on_events, (void *)(long)j,
526		    SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
527		if (ret) {
528			if(bootverbose)
529				printf("VMBUS: failed to setup event swi for "
530				    "cpu %d\n", j);
531			goto cleanup1;
532		}
533
534		/*
535		 * Prepare the per cpu msg and event pages to be called on each cpu.
536		 */
537		for(i = 0; i < 2; i++) {
538			setup_args.page_buffers[2 * j + i] =
539				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
540			if (setup_args.page_buffers[2 * j + i] == NULL) {
541				KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
542					("Error VMBUS: malloc failed!"));
543				goto cleanup1;
544			}
545		}
546	}
547
548	if (bootverbose)
549		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
550		    smp_started);
551
552	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
553
554	/*
555	 * Connect to VMBus in the root partition
556	 */
557	ret = hv_vmbus_connect();
558
559	if (ret != 0)
560		goto cleanup1;
561
562	hv_vmbus_request_channel_offers();
563	return (ret);
564
565	cleanup1:
566	/*
567	 * Free pages alloc'ed
568	 */
569	for (n = 0; n < 2 * MAXCPU; n++)
570		if (setup_args.page_buffers[n] != NULL)
571			free(setup_args.page_buffers[n], M_DEVBUF);
572
573	/*
574	 * remove swi and vmbus callback vector;
575	 */
576	CPU_FOREACH(j) {
577		if (hv_vmbus_g_context.msg_swintr[j] != NULL)
578			swi_remove(hv_vmbus_g_context.msg_swintr[j]);
579		if (hv_vmbus_g_context.event_swintr[j] != NULL)
580			swi_remove(hv_vmbus_g_context.event_swintr[j]);
581		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
582		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
583	}
584
585	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
586
587	cleanup:
588	hv_vmbus_cleanup();
589
590	return (ret);
591}
592
593static int
594vmbus_attach(device_t dev)
595{
596	if(bootverbose)
597		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
598	vmbus_devp = dev;
599
600	/*
601	 * If the system has already booted and thread
602	 * scheduling is possible indicated by the global
603	 * cold set to zero, we just call the driver
604	 * initialization directly.
605	 */
606	if (!cold)
607		vmbus_bus_init();
608
609	return (0);
610}
611
612static void
613vmbus_init(void)
614{
615	if (vm_guest != VM_GUEST_HV)
616		return;
617
618	/*
619	 * If the system has already booted and thread
620	 * scheduling is possible, as indicated by the
621	 * global cold set to zero, we just call the driver
622	 * initialization directly.
623	 */
624	if (!cold)
625		vmbus_bus_init();
626}
627
628static void
629vmbus_bus_exit(void)
630{
631	int i;
632
633	hv_vmbus_release_unattached_channels();
634	hv_vmbus_disconnect();
635
636	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
637
638	for(i = 0; i < 2 * MAXCPU; i++) {
639		if (setup_args.page_buffers[i] != 0)
640			free(setup_args.page_buffers[i], M_DEVBUF);
641	}
642
643	hv_vmbus_cleanup();
644
645	/* remove swi */
646	CPU_FOREACH(i) {
647		if (hv_vmbus_g_context.msg_swintr[i] != NULL)
648			swi_remove(hv_vmbus_g_context.msg_swintr[i]);
649		if (hv_vmbus_g_context.event_swintr[i] != NULL)
650			swi_remove(hv_vmbus_g_context.event_swintr[i]);
651		hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
652		hv_vmbus_g_context.hv_event_intr_event[i] = NULL;
653	}
654
655	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
656
657	return;
658}
659
660static void
661vmbus_exit(void)
662{
663	vmbus_bus_exit();
664}
665
666static int
667vmbus_detach(device_t dev)
668{
669	vmbus_exit();
670	return (0);
671}
672
673static void
674vmbus_mod_load(void)
675{
676	if(bootverbose)
677		printf("VMBUS: load\n");
678}
679
680static void
681vmbus_mod_unload(void)
682{
683	if(bootverbose)
684		printf("VMBUS: unload\n");
685}
686
687static int
688vmbus_modevent(module_t mod, int what, void *arg)
689{
690	switch (what) {
691
692	case MOD_LOAD:
693		vmbus_mod_load();
694		break;
695	case MOD_UNLOAD:
696		vmbus_mod_unload();
697		break;
698	}
699
700	return (0);
701}
702
703static device_method_t vmbus_methods[] = {
704	/** Device interface */
705	DEVMETHOD(device_identify, vmbus_identify),
706	DEVMETHOD(device_probe, vmbus_probe),
707	DEVMETHOD(device_attach, vmbus_attach),
708	DEVMETHOD(device_detach, vmbus_detach),
709	DEVMETHOD(device_shutdown, bus_generic_shutdown),
710	DEVMETHOD(device_suspend, bus_generic_suspend),
711	DEVMETHOD(device_resume, bus_generic_resume),
712
713	/** Bus interface */
714	DEVMETHOD(bus_add_child, bus_generic_add_child),
715	DEVMETHOD(bus_print_child, bus_generic_print_child),
716	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
717	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
718
719	{ 0, 0 } };
720
721static char driver_name[] = "vmbus";
722static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
723
724
725devclass_t vmbus_devclass;
726
727DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
728MODULE_VERSION(vmbus,1);
729
730/* We want to be started after SMP is initialized */
731SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
732
733