hv_vmbus_drv_freebsd.c revision 256281
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 256276 2013-10-10 16:25:53Z dim $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/sysctl.h>
42#include <sys/syslog.h>
43#include <sys/systm.h>
44#include <sys/rtprio.h>
45#include <sys/interrupt.h>
46#include <sys/sx.h>
47#include <sys/taskqueue.h>
48#include <sys/mutex.h>
49#include <sys/smp.h>
50
51#include <machine/resource.h>
52#include <sys/rman.h>
53
54#include <machine/stdarg.h>
55#include <machine/intr_machdep.h>
56#include <sys/pcpu.h>
57
58#include "hv_vmbus_priv.h"
59
60
61#define VMBUS_IRQ	0x5
62
63static struct intr_event *hv_msg_intr_event;
64static struct intr_event *hv_event_intr_event;
65static void *msg_swintr;
66static void *event_swintr;
67static device_t vmbus_devp;
68static void *vmbus_cookiep;
69static int vmbus_rid;
70struct resource *intr_res;
71static int vmbus_irq = VMBUS_IRQ;
72static int vmbus_inited;
73static hv_setup_args setup_args; /* only CPU 0 supported at this time */
74
75/**
76 * @brief Software interrupt thread routine to handle channel messages from
77 * the hypervisor.
78 */
79static void
80vmbus_msg_swintr(void *dummy)
81{
82	int 			cpu;
83	void*			page_addr;
84	hv_vmbus_message*	msg;
85	hv_vmbus_message*	copied;
86
87	cpu = PCPU_GET(cpuid);
88	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
89	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
90
91	for (;;) {
92		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
93			break; /* no message */
94		} else {
95			copied = malloc(sizeof(hv_vmbus_message),
96					M_DEVBUF, M_NOWAIT);
97			KASSERT(copied != NULL,
98				("Error VMBUS: malloc failed to allocate"
99					" hv_vmbus_message!"));
100			if (copied == NULL)
101				continue;
102			memcpy(copied, msg, sizeof(hv_vmbus_message));
103			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
104			hv_vmbus_on_channel_message, copied);
105	    }
106
107	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
108
109	    /*
110	     * Make sure the write to message_type (ie set to
111	     * HV_MESSAGE_TYPE_NONE) happens before we read the
112	     * message_pending and EOMing. Otherwise, the EOMing will
113	     * not deliver any more messages
114	     * since there is no empty slot
115	     */
116	    wmb();
117
118	    if (msg->header.message_flags.u.message_pending) {
119			/*
120			 * This will cause message queue rescan to possibly
121			 * deliver another msg from the hypervisor
122			 */
123			wrmsr(HV_X64_MSR_EOM, 0);
124	    }
125	}
126}
127
128/**
129 * @brief Interrupt filter routine for VMBUS.
130 *
131 * The purpose of this routine is to determine the type of VMBUS protocol
132 * message to process - an event or a channel message.
133 * As this is an interrupt filter routine, the function runs in a very
134 * restricted envinronment.  From the manpage for bus_setup_intr(9)
135 *
136 *   In this restricted environment, care must be taken to account for all
137 *   races.  A careful analysis of races should be done as well.  It is gener-
138 *   ally cheaper to take an extra interrupt, for example, than to protect
139 *   variables with spinlocks.	Read, modify, write cycles of hardware regis-
140 *   ters need to be carefully analyzed if other threads are accessing the
141 *   same registers.
142 */
143static int
144hv_vmbus_isr(void *unused)
145{
146	int				cpu;
147	hv_vmbus_message*		msg;
148	hv_vmbus_synic_event_flags*	event;
149	void*				page_addr;
150
151	cpu = PCPU_GET(cpuid);
152	/* (Temporary limit) */
153	KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
154
155	/*
156	 * The Windows team has advised that we check for events
157	 * before checking for messages. This is the way they do it
158	 * in Windows when running as a guest in Hyper-V
159	 */
160
161	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
162	event = (hv_vmbus_synic_event_flags*)
163		    page_addr + HV_VMBUS_MESSAGE_SINT;
164
165	/* Since we are a child, we only need to check bit 0 */
166	if (synch_test_and_clear_bit(0, &event->flags32[0])) {
167		swi_sched(event_swintr, 0);
168	}
169
170	/* Check if there are actual msgs to be process */
171	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
172	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
173
174	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
175		swi_sched(msg_swintr, 0);
176	}
177
178	return FILTER_HANDLED;
179}
180
181static int
182vmbus_read_ivar(
183	device_t	dev,
184	device_t	child,
185	int		index,
186	uintptr_t*	result)
187{
188	struct hv_device *child_dev_ctx = device_get_ivars(child);
189
190	switch (index) {
191
192	case HV_VMBUS_IVAR_TYPE:
193		*result = (uintptr_t) &child_dev_ctx->class_id;
194		return (0);
195	case HV_VMBUS_IVAR_INSTANCE:
196		*result = (uintptr_t) &child_dev_ctx->device_id;
197		return (0);
198	case HV_VMBUS_IVAR_DEVCTX:
199		*result = (uintptr_t) child_dev_ctx;
200		return (0);
201	case HV_VMBUS_IVAR_NODE:
202		*result = (uintptr_t) child_dev_ctx->device;
203		return (0);
204	}
205	return (ENOENT);
206}
207
208static int
209vmbus_write_ivar(
210	device_t	dev,
211	device_t	child,
212	int		index,
213	uintptr_t	value)
214{
215	switch (index) {
216
217	case HV_VMBUS_IVAR_TYPE:
218	case HV_VMBUS_IVAR_INSTANCE:
219	case HV_VMBUS_IVAR_DEVCTX:
220	case HV_VMBUS_IVAR_NODE:
221		/* read-only */
222		return (EINVAL);
223	}
224	return (ENOENT);
225}
226
227struct hv_device*
228hv_vmbus_child_device_create(
229	hv_guid		type,
230	hv_guid		instance,
231	hv_vmbus_channel*	channel)
232{
233	hv_device* child_dev;
234
235	/*
236	 * Allocate the new child device
237	 */
238	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
239			M_NOWAIT |  M_ZERO);
240	KASSERT(child_dev != NULL,
241	    ("Error VMBUS: malloc failed to allocate hv_device!"));
242
243	if (child_dev == NULL)
244		return (NULL);
245
246	child_dev->channel = channel;
247	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
248	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
249
250	return (child_dev);
251}
252
253static void
254print_dev_guid(struct hv_device *dev)
255{
256	int i;
257	unsigned char guid_name[100];
258	for (i = 0; i < 32; i += 2)
259		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
260	if(bootverbose)
261		printf("VMBUS: Class ID: %s\n", guid_name);
262}
263
264int
265hv_vmbus_child_device_register(struct hv_device *child_dev)
266{
267	device_t child;
268	int ret = 0;
269
270	print_dev_guid(child_dev);
271
272
273	child = device_add_child(vmbus_devp, NULL, -1);
274	child_dev->device = child;
275	device_set_ivars(child, child_dev);
276
277	mtx_lock(&Giant);
278	ret = device_probe_and_attach(child);
279	mtx_unlock(&Giant);
280
281	return (0);
282}
283
284int
285hv_vmbus_child_device_unregister(struct hv_device *child_dev)
286{
287	int ret = 0;
288	/*
289	 * XXXKYS: Ensure that this is the opposite of
290	 * device_add_child()
291	 */
292	mtx_lock(&Giant);
293	ret = device_delete_child(vmbus_devp, child_dev->device);
294	mtx_unlock(&Giant);
295	return(ret);
296}
297
298static void vmbus_identify(driver_t *driver, device_t parent) {
299	BUS_ADD_CHILD(parent, 0, "vmbus", 0);
300	if (device_find_child(parent, "vmbus", 0) == NULL) {
301		BUS_ADD_CHILD(parent, 0, "vmbus", 0);
302	}
303}
304
305static int
306vmbus_probe(device_t dev) {
307	if(bootverbose)
308		device_printf(dev, "VMBUS: probe\n");
309
310	if (!hv_vmbus_query_hypervisor_presence())
311		return (ENXIO);
312
313	device_set_desc(dev, "Vmbus Devices");
314
315	return (0);
316}
317
318/**
319 * @brief Main vmbus driver initialization routine.
320 *
321 * Here, we
322 * - initialize the vmbus driver context
323 * - setup various driver entry points
324 * - invoke the vmbus hv main init routine
325 * - get the irq resource
326 * - invoke the vmbus to add the vmbus root device
327 * - setup the vmbus root device
328 * - retrieve the channel offers
329 */
330static int
331vmbus_bus_init(void)
332{
333	struct ioapic_intsrc {
334		struct intsrc io_intsrc;
335		u_int io_irq;
336		u_int io_intpin:8;
337		u_int io_vector:8;
338		u_int io_cpu:8;
339		u_int io_activehi:1;
340		u_int io_edgetrigger:1;
341		u_int io_masked:1;
342		int io_bus:4;
343		uint32_t io_lowreg;
344	};
345	int i, ret;
346	unsigned int vector = 0;
347	struct intsrc *isrc;
348	struct ioapic_intsrc *intpin;
349
350	if (vmbus_inited)
351		return (0);
352
353	vmbus_inited = 1;
354
355	ret = hv_vmbus_init();
356
357	if (ret) {
358		if(bootverbose)
359			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
360		return (ret);
361	}
362
363	ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
364	    NULL, SWI_CLOCK, 0, &msg_swintr);
365
366	if (ret)
367	    goto cleanup;
368
369	/*
370	 * Message SW interrupt handler checks a per-CPU page and
371	 * thus the thread needs to be bound to CPU-0 - which is where
372	 * all interrupts are processed.
373	 */
374	ret = intr_event_bind(hv_msg_intr_event, 0);
375
376	if (ret)
377		goto cleanup1;
378
379	ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
380	    NULL, SWI_CLOCK, 0, &event_swintr);
381
382	if (ret)
383		goto cleanup1;
384
385	intr_res = bus_alloc_resource(vmbus_devp,
386	    SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
387
388	if (intr_res == NULL) {
389		ret = ENOMEM; /* XXXKYS: Need a better errno */
390		goto cleanup2;
391	}
392
393	/*
394	 * Setup interrupt filter handler
395	 */
396	ret = bus_setup_intr(vmbus_devp, intr_res,
397	    INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
398	    NULL, &vmbus_cookiep);
399
400	if (ret != 0)
401		goto cleanup3;
402
403	ret = bus_bind_intr(vmbus_devp, intr_res, 0);
404	if (ret != 0)
405		goto cleanup4;
406
407	isrc = intr_lookup_source(vmbus_irq);
408	if ((isrc == NULL) || (isrc->is_event == NULL)) {
409		ret = EINVAL;
410		goto cleanup4;
411	}
412
413	/* vector = isrc->is_event->ie_vector; */
414	intpin = (struct ioapic_intsrc *)isrc;
415	vector = intpin->io_vector;
416
417	if(bootverbose)
418		printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
419
420	/**
421	 * Notify the hypervisor of our irq.
422	 */
423	setup_args.vector = vector;
424	for(i = 0; i < 2; i++) {
425		setup_args.page_buffers[i] =
426				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
427		if (setup_args.page_buffers[i] == NULL) {
428			KASSERT(setup_args.page_buffers[i] != NULL,
429					("Error VMBUS: malloc failed!"));
430			if (i > 0)
431				free(setup_args.page_buffers[0], M_DEVBUF);
432			goto cleanup4;
433		}
434	}
435
436	/* only CPU #0 supported at this time */
437	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
438
439	/*
440	 * Connect to VMBus in the root partition
441	 */
442	ret = hv_vmbus_connect();
443
444	if (ret != 0)
445	    goto cleanup4;
446
447	hv_vmbus_request_channel_offers();
448	return (ret);
449
450	cleanup4:
451
452	/*
453	 * remove swi, bus and intr resource
454	 */
455	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
456
457	cleanup3:
458	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
459
460	cleanup2:
461	swi_remove(event_swintr);
462
463	cleanup1:
464	swi_remove(msg_swintr);
465
466	cleanup:
467	hv_vmbus_cleanup();
468
469	return (ret);
470}
471
472static int
473vmbus_attach(device_t dev)
474{
475	if(bootverbose)
476		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
477	vmbus_devp = dev;
478
479	/*
480	 * If the system has already booted and thread
481	 * scheduling is possible indicated by the global
482	 * cold set to zero, we just call the driver
483	 * initialization directly.
484	 */
485	if (!cold)
486		vmbus_bus_init();
487
488	return (0);
489}
490
491static void
492vmbus_init(void)
493{
494	/*
495	 * If the system has already booted and thread
496	 * scheduling is possible indicated by the global
497	 * cold set to zero, we just call the driver
498	 * initialization directly.
499	 */
500	if (!cold)
501		vmbus_bus_init();
502}
503
504static void
505vmbus_bus_exit(void)
506{
507	int i;
508
509	hv_vmbus_release_unattached_channels();
510	hv_vmbus_disconnect();
511
512	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
513
514	for(i = 0; i < 2; i++) {
515		if (setup_args.page_buffers[i] != 0)
516			free(setup_args.page_buffers[i], M_DEVBUF);
517	}
518
519	hv_vmbus_cleanup();
520
521	/* remove swi, bus and intr resource */
522	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
523
524	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
525
526	swi_remove(msg_swintr);
527	swi_remove(event_swintr);
528
529	return;
530}
531
532static void
533vmbus_exit(void)
534{
535	vmbus_bus_exit();
536}
537
538static int
539vmbus_detach(device_t dev)
540{
541	vmbus_exit();
542	return (0);
543}
544
545static void
546vmbus_mod_load(void)
547{
548	if(bootverbose)
549		printf("VMBUS: load\n");
550}
551
552static void
553vmbus_mod_unload(void)
554{
555	if(bootverbose)
556		printf("VMBUS: unload\n");
557}
558
559static int
560vmbus_modevent(module_t mod, int what, void *arg)
561{
562	switch (what) {
563
564	case MOD_LOAD:
565		vmbus_mod_load();
566		break;
567	case MOD_UNLOAD:
568		vmbus_mod_unload();
569		break;
570	}
571
572	return (0);
573}
574
575static device_method_t vmbus_methods[] = {
576	/** Device interface */
577	DEVMETHOD(device_identify, vmbus_identify),
578	DEVMETHOD(device_probe, vmbus_probe),
579	DEVMETHOD(device_attach, vmbus_attach),
580	DEVMETHOD(device_detach, vmbus_detach),
581	DEVMETHOD(device_shutdown, bus_generic_shutdown),
582	DEVMETHOD(device_suspend, bus_generic_suspend),
583	DEVMETHOD(device_resume, bus_generic_resume),
584
585	/** Bus interface */
586	DEVMETHOD(bus_add_child, bus_generic_add_child),
587	DEVMETHOD(bus_print_child, bus_generic_print_child),
588	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
589	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
590
591	{ 0, 0 } };
592
593static char driver_name[] = "vmbus";
594static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
595
596
597devclass_t vmbus_devclass;
598
599DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
600MODULE_VERSION(vmbus,1);
601
602/* TODO: We want to be earlier than SI_SUB_VFS */
603SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
604
605