hv_vmbus_drv_freebsd.c revision 250199
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/sysctl.h>
40#include <sys/syslog.h>
41#include <sys/systm.h>
42#include <sys/rtprio.h>
43#include <sys/interrupt.h>
44#include <sys/sx.h>
45#include <sys/taskqueue.h>
46#include <sys/mutex.h>
47#include <sys/smp.h>
48
49#include <machine/resource.h>
50#include <sys/rman.h>
51
52#include <machine/stdarg.h>
53#include <machine/intr_machdep.h>
54#include <sys/pcpu.h>
55
56#include "hv_vmbus_priv.h"
57
58
59#define VMBUS_IRQ	0x5
60
61static struct intr_event *hv_msg_intr_event;
62static struct intr_event *hv_event_intr_event;
63static void *msg_swintr;
64static void *event_swintr;
65static device_t vmbus_devp;
66static void *vmbus_cookiep;
67static int vmbus_rid;
68struct resource *intr_res;
69static int vmbus_irq = VMBUS_IRQ;
70static int vmbus_inited;
71
72/**
73 * @brief Software interrupt thread routine to handle channel messages from
74 * the hypervisor.
75 */
76static void
77vmbus_msg_swintr(void *dummy)
78{
79	int 			cpu;
80	void*			page_addr;
81	hv_vmbus_message*	msg;
82	hv_vmbus_message*	copied;
83
84	cpu = PCPU_GET(cpuid);
85	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
86	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
87
88	for (;;) {
89		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
90			break; /* no message */
91		} else {
92			copied = malloc(sizeof(hv_vmbus_message),
93					M_DEVBUF, M_NOWAIT);
94			KASSERT(copied != NULL,
95				("Error VMBUS: malloc failed to allocate"
96					" hv_vmbus_message!"));
97			if (copied == NULL)
98				continue;
99			memcpy(copied, msg, sizeof(hv_vmbus_message));
100			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
101			hv_vmbus_on_channel_message, copied);
102	    }
103
104	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
105
106	    /*
107	     * Make sure the write to message_type (ie set to
108	     * HV_MESSAGE_TYPE_NONE) happens before we read the
109	     * message_pending and EOMing. Otherwise, the EOMing will
110	     * not deliver any more messages
111	     * since there is no empty slot
112	     */
113	    wmb();
114
115	    if (msg->header.message_flags.message_pending) {
116			/*
117			 * This will cause message queue rescan to possibly
118			 * deliver another msg from the hypervisor
119			 */
120			hv_vmbus_write_msr(HV_X64_MSR_EOM, 0);
121	    }
122	}
123}
124
125/**
126 * @brief Interrupt filter routine for VMBUS.
127 *
128 * The purpose of this routine is to determine the type of VMBUS protocol
129 * message to process - an event or a channel message.
130 * As this is an interrupt filter routine, the function runs in a very
131 * restricted envinronment.  From the manpage for bus_setup_intr(9)
132 *
133 *   In this restricted environment, care must be taken to account for all
134 *   races.  A careful analysis of races should be done as well.  It is gener-
135 *   ally cheaper to take an extra interrupt, for example, than to protect
136 *   variables with spinlocks.	Read, modify, write cycles of hardware regis-
137 *   ters need to be carefully analyzed if other threads are accessing the
138 *   same registers.
139 */
140static int
141hv_vmbus_isr(void *unused)
142{
143	int				cpu;
144	hv_vmbus_message*		msg;
145	hv_vmbus_synic_event_flags*	event;
146	void*				page_addr;
147
148	cpu = PCPU_GET(cpuid);
149	/* (Temporary limit) */
150	KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
151
152	/*
153	 * The Windows team has advised that we check for events
154	 * before checking for messages. This is the way they do it
155	 * in Windows when running as a guest in Hyper-V
156	 */
157
158	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
159	event = (hv_vmbus_synic_event_flags*)
160		    page_addr + HV_VMBUS_MESSAGE_SINT;
161
162	/* Since we are a child, we only need to check bit 0 */
163	if (synch_test_and_clear_bit(0, &event->flags32[0])) {
164		swi_sched(event_swintr, 0);
165	}
166
167	/* Check if there are actual msgs to be process */
168	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
169	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
170
171	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
172		swi_sched(msg_swintr, 0);
173	}
174
175	return FILTER_HANDLED;
176}
177
178static int
179vmbus_read_ivar(
180	device_t	dev,
181	device_t	child,
182	int		index,
183	uintptr_t*	result)
184{
185	struct hv_device *child_dev_ctx = device_get_ivars(child);
186
187	switch (index) {
188
189	case HV_VMBUS_IVAR_TYPE:
190		*result = (uintptr_t) &child_dev_ctx->class_id;
191		return (0);
192	case HV_VMBUS_IVAR_INSTANCE:
193		*result = (uintptr_t) &child_dev_ctx->device_id;
194		return (0);
195	case HV_VMBUS_IVAR_DEVCTX:
196		*result = (uintptr_t) child_dev_ctx;
197		return (0);
198	case HV_VMBUS_IVAR_NODE:
199		*result = (uintptr_t) child_dev_ctx->device;
200		return (0);
201	}
202	return (ENOENT);
203}
204
205static int
206vmbus_write_ivar(
207	device_t	dev,
208	device_t	child,
209	int		index,
210	uintptr_t	value)
211{
212	switch (index) {
213
214	case HV_VMBUS_IVAR_TYPE:
215	case HV_VMBUS_IVAR_INSTANCE:
216	case HV_VMBUS_IVAR_DEVCTX:
217	case HV_VMBUS_IVAR_NODE:
218		/* read-only */
219		return (EINVAL);
220	}
221	return (ENOENT);
222}
223
224struct hv_device*
225hv_vmbus_child_device_create(
226	hv_guid		type,
227	hv_guid		instance,
228	hv_vmbus_channel*	channel)
229{
230	hv_device* child_dev;
231
232	/*
233	 * Allocate the new child device
234	 */
235	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
236			M_NOWAIT |  M_ZERO);
237	KASSERT(child_dev != NULL,
238	    ("Error VMBUS: malloc failed to allocate hv_device!"));
239
240	if (child_dev == NULL)
241		return (NULL);
242
243	child_dev->channel = channel;
244	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
245	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
246
247	return (child_dev);
248}
249
250static void
251print_dev_guid(struct hv_device *dev)
252{
253	int i;
254	unsigned char guid_name[100];
255	for (i = 0; i < 32; i += 2)
256		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
257	if(bootverbose)
258		printf("VMBUS: Class ID: %s\n", guid_name);
259}
260
261int
262hv_vmbus_child_device_register(struct hv_device *child_dev)
263{
264	device_t child;
265	int ret = 0;
266
267	print_dev_guid(child_dev);
268
269
270	child = device_add_child(vmbus_devp, NULL, -1);
271	child_dev->device = child;
272	device_set_ivars(child, child_dev);
273
274	mtx_lock(&Giant);
275	ret = device_probe_and_attach(child);
276	mtx_unlock(&Giant);
277
278	return (0);
279}
280
281int
282hv_vmbus_child_device_unregister(struct hv_device *child_dev)
283{
284	int ret = 0;
285	/*
286	 * XXXKYS: Ensure that this is the opposite of
287	 * device_add_child()
288	 */
289	mtx_lock(&Giant);
290	ret = device_delete_child(vmbus_devp, child_dev->device);
291	mtx_unlock(&Giant);
292	return(ret);
293}
294
295static void vmbus_identify(driver_t *driver, device_t parent) {
296	BUS_ADD_CHILD(parent, 0, "vmbus", 0);
297	if (device_find_child(parent, "vmbus", 0) == NULL) {
298		BUS_ADD_CHILD(parent, 0, "vmbus", 0);
299	}
300}
301
302static int
303vmbus_probe(device_t dev) {
304	if(bootverbose)
305		device_printf(dev, "VMBUS: probe\n");
306
307	if (!hv_vmbus_query_hypervisor_presence())
308		return (ENXIO);
309
310	device_set_desc(dev, "Vmbus Devices");
311
312	return (0);
313}
314
315/**
316 * @brief Main vmbus driver initialization routine.
317 *
318 * Here, we
319 * - initialize the vmbus driver context
320 * - setup various driver entry points
321 * - invoke the vmbus hv main init routine
322 * - get the irq resource
323 * - invoke the vmbus to add the vmbus root device
324 * - setup the vmbus root device
325 * - retrieve the channel offers
326 */
327static int
328vmbus_bus_init(void)
329{
330	struct ioapic_intsrc {
331		struct intsrc io_intsrc;
332		u_int io_irq;
333		u_int io_intpin:8;
334		u_int io_vector:8;
335		u_int io_cpu:8;
336		u_int io_activehi:1;
337		u_int io_edgetrigger:1;
338		u_int io_masked:1;
339		int io_bus:4;
340		uint32_t io_lowreg;
341	};
342
343	int ret;
344	unsigned int vector = 0;
345	struct intsrc *isrc;
346	struct ioapic_intsrc *intpin;
347
348	if (vmbus_inited)
349		return (0);
350
351	vmbus_inited = 1;
352
353	ret = hv_vmbus_init();
354
355	if (ret) {
356		if(bootverbose)
357		printf("Error VMBUS: Hypervisor Initialization Failed!\n");
358		return (ret);
359	}
360
361	ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
362	    NULL, SWI_CLOCK, 0, &msg_swintr);
363
364	if (ret)
365	    goto cleanup;
366
367	/*
368	 * Message SW interrupt handler checks a per-CPU page and
369	 * thus the thread needs to be bound to CPU-0 - which is where
370	 * all interrupts are processed.
371	 */
372	ret = intr_event_bind(hv_msg_intr_event, 0);
373
374	if (ret)
375		goto cleanup1;
376
377	ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
378	    NULL, SWI_CLOCK, 0, &event_swintr);
379
380	if (ret)
381		goto cleanup1;
382
383	intr_res = bus_alloc_resource(vmbus_devp,
384	    SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
385
386	if (intr_res == NULL) {
387		ret = ENOMEM; /* XXXKYS: Need a better errno */
388		goto cleanup2;
389	}
390
391	/*
392	 * Setup interrupt filter handler
393	 */
394	ret = bus_setup_intr(vmbus_devp, intr_res,
395	    INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
396	    NULL, &vmbus_cookiep);
397
398	if (ret != 0)
399		goto cleanup3;
400
401	ret = bus_bind_intr(vmbus_devp, intr_res, 0);
402	if (ret != 0)
403		goto cleanup4;
404
405	isrc = intr_lookup_source(vmbus_irq);
406	if ((isrc == NULL) || (isrc->is_event == NULL)) {
407		ret = EINVAL;
408		goto cleanup4;
409	}
410
411	/* vector = isrc->is_event->ie_vector; */
412	intpin = (struct ioapic_intsrc *)isrc;
413	vector = intpin->io_vector;
414
415	if(bootverbose)
416		printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
417
418	/**
419	 * Notify the hypervisor of our irq.
420	 */
421
422	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &vector);
423
424	/**
425	 * Connect to VMBus in the root partition
426	 */
427	ret = hv_vmbus_connect();
428
429	if (ret)
430	    goto cleanup4;
431
432	hv_vmbus_request_channel_offers();
433	return (ret);
434
435	cleanup4:
436
437	/*
438	 * remove swi, bus and intr resource
439	 */
440	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
441
442	cleanup3:
443
444	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
445
446	cleanup2:
447	swi_remove(event_swintr);
448
449	cleanup1:
450	swi_remove(msg_swintr);
451
452	cleanup:
453	hv_vmbus_cleanup();
454
455	return (ret);
456}
457
458static int
459vmbus_attach(device_t dev)
460{
461	if(bootverbose)
462		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
463	vmbus_devp = dev;
464
465	/*
466	 * If the system has already booted and thread
467	 * scheduling is possible indicated by the global
468	 * cold set to zero, we just call the driver
469	 * initialization directly.
470	 */
471	if (!cold)
472		vmbus_bus_init();
473
474	return (0);
475}
476
477static void
478vmbus_init(void)
479{
480	/*
481	 * If the system has already booted and thread
482	 * scheduling is possible indicated by the global
483	 * cold set to zero, we just call the driver
484	 * initialization directly.
485	 */
486	if (!cold)
487		vmbus_bus_init();
488}
489
490static void
491vmbus_bus_exit(void)
492{
493	hv_vmbus_release_unattached_channels();
494	hv_vmbus_disconnect();
495
496	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
497
498	hv_vmbus_cleanup();
499
500	/* remove swi, bus and intr resource */
501	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
502
503	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
504
505	swi_remove(msg_swintr);
506	swi_remove(event_swintr);
507
508	return;
509}
510
511static void
512vmbus_exit(void)
513{
514	vmbus_bus_exit();
515}
516
517static int
518vmbus_detach(device_t dev)
519{
520	vmbus_exit();
521	return (0);
522}
523
524static void
525vmbus_mod_load(void)
526{
527	if(bootverbose)
528		printf("VMBUS: load\n");
529}
530
531static void
532vmbus_mod_unload(void)
533{
534	if(bootverbose)
535		printf("VMBUS: unload\n");
536}
537
538static int
539vmbus_modevent(module_t mod, int what, void *arg)
540{
541	switch (what) {
542
543	case MOD_LOAD:
544		vmbus_mod_load();
545		break;
546	case MOD_UNLOAD:
547		vmbus_mod_unload();
548		break;
549	}
550
551	return (0);
552}
553
554static device_method_t vmbus_methods[] = {
555	/** Device interface */
556	DEVMETHOD(device_identify, vmbus_identify),
557	DEVMETHOD(device_probe, vmbus_probe),
558	DEVMETHOD(device_attach, vmbus_attach),
559	DEVMETHOD(device_detach, vmbus_detach),
560	DEVMETHOD(device_shutdown, bus_generic_shutdown),
561	DEVMETHOD(device_suspend, bus_generic_suspend),
562	DEVMETHOD(device_resume, bus_generic_resume),
563
564	/** Bus interface */
565	DEVMETHOD(bus_add_child, bus_generic_add_child),
566	DEVMETHOD(bus_print_child, bus_generic_print_child),
567	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
568	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
569
570	{ 0, 0 } };
571
572static char driver_name[] = "vmbus";
573static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
574
575
576devclass_t vmbus_devclass;
577
578DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
579MODULE_VERSION(vmbus,1);
580
581/* TODO: We want to be earlier than SI_SUB_VFS */
582SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
583
584