hv_vmbus_drv_freebsd.c revision 255414
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/sysctl.h>
40#include <sys/syslog.h>
41#include <sys/systm.h>
42#include <sys/rtprio.h>
43#include <sys/interrupt.h>
44#include <sys/sx.h>
45#include <sys/taskqueue.h>
46#include <sys/mutex.h>
47#include <sys/smp.h>
48
49#include <machine/resource.h>
50#include <sys/rman.h>
51
52#include <machine/stdarg.h>
53#include <machine/intr_machdep.h>
54#include <sys/pcpu.h>
55
56#include "hv_vmbus_priv.h"
57
58
59#define VMBUS_IRQ	0x5
60
61static struct intr_event *hv_msg_intr_event;
62static struct intr_event *hv_event_intr_event;
63static void *msg_swintr;
64static void *event_swintr;
65static device_t vmbus_devp;
66static void *vmbus_cookiep;
67static int vmbus_rid;
68struct resource *intr_res;
69static int vmbus_irq = VMBUS_IRQ;
70static int vmbus_inited;
71static hv_setup_args setup_args; /* only CPU 0 supported at this time */
72
73/**
74 * @brief Software interrupt thread routine to handle channel messages from
75 * the hypervisor.
76 */
77static void
78vmbus_msg_swintr(void *dummy)
79{
80	int 			cpu;
81	void*			page_addr;
82	hv_vmbus_message*	msg;
83	hv_vmbus_message*	copied;
84
85	cpu = PCPU_GET(cpuid);
86	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
87	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
88
89	for (;;) {
90		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
91			break; /* no message */
92		} else {
93			copied = malloc(sizeof(hv_vmbus_message),
94					M_DEVBUF, M_NOWAIT);
95			KASSERT(copied != NULL,
96				("Error VMBUS: malloc failed to allocate"
97					" hv_vmbus_message!"));
98			if (copied == NULL)
99				continue;
100			memcpy(copied, msg, sizeof(hv_vmbus_message));
101			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
102			hv_vmbus_on_channel_message, copied);
103	    }
104
105	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
106
107	    /*
108	     * Make sure the write to message_type (ie set to
109	     * HV_MESSAGE_TYPE_NONE) happens before we read the
110	     * message_pending and EOMing. Otherwise, the EOMing will
111	     * not deliver any more messages
112	     * since there is no empty slot
113	     */
114	    wmb();
115
116	    if (msg->header.message_flags.message_pending) {
117			/*
118			 * This will cause message queue rescan to possibly
119			 * deliver another msg from the hypervisor
120			 */
121			wrmsr(HV_X64_MSR_EOM, 0);
122	    }
123	}
124}
125
126/**
127 * @brief Interrupt filter routine for VMBUS.
128 *
129 * The purpose of this routine is to determine the type of VMBUS protocol
130 * message to process - an event or a channel message.
131 * As this is an interrupt filter routine, the function runs in a very
132 * restricted envinronment.  From the manpage for bus_setup_intr(9)
133 *
134 *   In this restricted environment, care must be taken to account for all
135 *   races.  A careful analysis of races should be done as well.  It is gener-
136 *   ally cheaper to take an extra interrupt, for example, than to protect
137 *   variables with spinlocks.	Read, modify, write cycles of hardware regis-
138 *   ters need to be carefully analyzed if other threads are accessing the
139 *   same registers.
140 */
141static int
142hv_vmbus_isr(void *unused)
143{
144	int				cpu;
145	hv_vmbus_message*		msg;
146	hv_vmbus_synic_event_flags*	event;
147	void*				page_addr;
148
149	cpu = PCPU_GET(cpuid);
150	/* (Temporary limit) */
151	KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
152
153	/*
154	 * The Windows team has advised that we check for events
155	 * before checking for messages. This is the way they do it
156	 * in Windows when running as a guest in Hyper-V
157	 */
158
159	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
160	event = (hv_vmbus_synic_event_flags*)
161		    page_addr + HV_VMBUS_MESSAGE_SINT;
162
163	/* Since we are a child, we only need to check bit 0 */
164	if (synch_test_and_clear_bit(0, &event->flags32[0])) {
165		swi_sched(event_swintr, 0);
166	}
167
168	/* Check if there are actual msgs to be process */
169	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
170	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
171
172	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
173		swi_sched(msg_swintr, 0);
174	}
175
176	return FILTER_HANDLED;
177}
178
179static int
180vmbus_read_ivar(
181	device_t	dev,
182	device_t	child,
183	int		index,
184	uintptr_t*	result)
185{
186	struct hv_device *child_dev_ctx = device_get_ivars(child);
187
188	switch (index) {
189
190	case HV_VMBUS_IVAR_TYPE:
191		*result = (uintptr_t) &child_dev_ctx->class_id;
192		return (0);
193	case HV_VMBUS_IVAR_INSTANCE:
194		*result = (uintptr_t) &child_dev_ctx->device_id;
195		return (0);
196	case HV_VMBUS_IVAR_DEVCTX:
197		*result = (uintptr_t) child_dev_ctx;
198		return (0);
199	case HV_VMBUS_IVAR_NODE:
200		*result = (uintptr_t) child_dev_ctx->device;
201		return (0);
202	}
203	return (ENOENT);
204}
205
206static int
207vmbus_write_ivar(
208	device_t	dev,
209	device_t	child,
210	int		index,
211	uintptr_t	value)
212{
213	switch (index) {
214
215	case HV_VMBUS_IVAR_TYPE:
216	case HV_VMBUS_IVAR_INSTANCE:
217	case HV_VMBUS_IVAR_DEVCTX:
218	case HV_VMBUS_IVAR_NODE:
219		/* read-only */
220		return (EINVAL);
221	}
222	return (ENOENT);
223}
224
225struct hv_device*
226hv_vmbus_child_device_create(
227	hv_guid		type,
228	hv_guid		instance,
229	hv_vmbus_channel*	channel)
230{
231	hv_device* child_dev;
232
233	/*
234	 * Allocate the new child device
235	 */
236	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
237			M_NOWAIT |  M_ZERO);
238	KASSERT(child_dev != NULL,
239	    ("Error VMBUS: malloc failed to allocate hv_device!"));
240
241	if (child_dev == NULL)
242		return (NULL);
243
244	child_dev->channel = channel;
245	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
246	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
247
248	return (child_dev);
249}
250
251static void
252print_dev_guid(struct hv_device *dev)
253{
254	int i;
255	unsigned char guid_name[100];
256	for (i = 0; i < 32; i += 2)
257		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
258	if(bootverbose)
259		printf("VMBUS: Class ID: %s\n", guid_name);
260}
261
262int
263hv_vmbus_child_device_register(struct hv_device *child_dev)
264{
265	device_t child;
266	int ret = 0;
267
268	print_dev_guid(child_dev);
269
270
271	child = device_add_child(vmbus_devp, NULL, -1);
272	child_dev->device = child;
273	device_set_ivars(child, child_dev);
274
275	mtx_lock(&Giant);
276	ret = device_probe_and_attach(child);
277	mtx_unlock(&Giant);
278
279	return (0);
280}
281
282int
283hv_vmbus_child_device_unregister(struct hv_device *child_dev)
284{
285	int ret = 0;
286	/*
287	 * XXXKYS: Ensure that this is the opposite of
288	 * device_add_child()
289	 */
290	mtx_lock(&Giant);
291	ret = device_delete_child(vmbus_devp, child_dev->device);
292	mtx_unlock(&Giant);
293	return(ret);
294}
295
296static void vmbus_identify(driver_t *driver, device_t parent) {
297	BUS_ADD_CHILD(parent, 0, "vmbus", 0);
298	if (device_find_child(parent, "vmbus", 0) == NULL) {
299		BUS_ADD_CHILD(parent, 0, "vmbus", 0);
300	}
301}
302
303static int
304vmbus_probe(device_t dev) {
305	if(bootverbose)
306		device_printf(dev, "VMBUS: probe\n");
307
308	if (!hv_vmbus_query_hypervisor_presence())
309		return (ENXIO);
310
311	device_set_desc(dev, "Vmbus Devices");
312
313	return (0);
314}
315
316/**
317 * @brief Main vmbus driver initialization routine.
318 *
319 * Here, we
320 * - initialize the vmbus driver context
321 * - setup various driver entry points
322 * - invoke the vmbus hv main init routine
323 * - get the irq resource
324 * - invoke the vmbus to add the vmbus root device
325 * - setup the vmbus root device
326 * - retrieve the channel offers
327 */
328static int
329vmbus_bus_init(void)
330{
331	struct ioapic_intsrc {
332		struct intsrc io_intsrc;
333		u_int io_irq;
334		u_int io_intpin:8;
335		u_int io_vector:8;
336		u_int io_cpu:8;
337		u_int io_activehi:1;
338		u_int io_edgetrigger:1;
339		u_int io_masked:1;
340		int io_bus:4;
341		uint32_t io_lowreg;
342	};
343	int i, ret;
344	unsigned int vector = 0;
345	struct intsrc *isrc;
346	struct ioapic_intsrc *intpin;
347
348	if (vmbus_inited)
349		return (0);
350
351	vmbus_inited = 1;
352
353	ret = hv_vmbus_init();
354
355	if (ret) {
356		if(bootverbose)
357			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
358		return (ret);
359	}
360
361	ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
362	    NULL, SWI_CLOCK, 0, &msg_swintr);
363
364	if (ret)
365	    goto cleanup;
366
367	/*
368	 * Message SW interrupt handler checks a per-CPU page and
369	 * thus the thread needs to be bound to CPU-0 - which is where
370	 * all interrupts are processed.
371	 */
372	ret = intr_event_bind(hv_msg_intr_event, 0);
373
374	if (ret)
375		goto cleanup1;
376
377	ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
378	    NULL, SWI_CLOCK, 0, &event_swintr);
379
380	if (ret)
381		goto cleanup1;
382
383	intr_res = bus_alloc_resource(vmbus_devp,
384	    SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
385
386	if (intr_res == NULL) {
387		ret = ENOMEM; /* XXXKYS: Need a better errno */
388		goto cleanup2;
389	}
390
391	/*
392	 * Setup interrupt filter handler
393	 */
394	ret = bus_setup_intr(vmbus_devp, intr_res,
395	    INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
396	    NULL, &vmbus_cookiep);
397
398	if (ret != 0)
399		goto cleanup3;
400
401	ret = bus_bind_intr(vmbus_devp, intr_res, 0);
402	if (ret != 0)
403		goto cleanup4;
404
405	isrc = intr_lookup_source(vmbus_irq);
406	if ((isrc == NULL) || (isrc->is_event == NULL)) {
407		ret = EINVAL;
408		goto cleanup4;
409	}
410
411	/* vector = isrc->is_event->ie_vector; */
412	intpin = (struct ioapic_intsrc *)isrc;
413	vector = intpin->io_vector;
414
415	if(bootverbose)
416		printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
417
418	/**
419	 * Notify the hypervisor of our irq.
420	 */
421	setup_args.vector = vector;
422	for(i = 0; i < 2; i++) {
423		setup_args.page_buffers[i] =
424				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
425		if (setup_args.page_buffers[i] == NULL) {
426			KASSERT(setup_args.page_buffers[i] != NULL,
427					("Error VMBUS: malloc failed!"));
428			if (i > 0)
429				free(setup_args.page_buffers[0], M_DEVBUF);
430			goto cleanup4;
431		}
432	}
433
434	/* only CPU #0 supported at this time */
435	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
436
437	/*
438	 * Connect to VMBus in the root partition
439	 */
440	ret = hv_vmbus_connect();
441
442	if (ret != 0)
443	    goto cleanup4;
444
445	hv_vmbus_request_channel_offers();
446	return (ret);
447
448	cleanup4:
449
450	/*
451	 * remove swi, bus and intr resource
452	 */
453	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
454
455	cleanup3:
456	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
457
458	cleanup2:
459	swi_remove(event_swintr);
460
461	cleanup1:
462	swi_remove(msg_swintr);
463
464	cleanup:
465	hv_vmbus_cleanup();
466
467	return (ret);
468}
469
470static int
471vmbus_attach(device_t dev)
472{
473	if(bootverbose)
474		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
475	vmbus_devp = dev;
476
477	/*
478	 * If the system has already booted and thread
479	 * scheduling is possible indicated by the global
480	 * cold set to zero, we just call the driver
481	 * initialization directly.
482	 */
483	if (!cold)
484		vmbus_bus_init();
485
486	return (0);
487}
488
489static void
490vmbus_init(void)
491{
492	/*
493	 * If the system has already booted and thread
494	 * scheduling is possible indicated by the global
495	 * cold set to zero, we just call the driver
496	 * initialization directly.
497	 */
498	if (!cold)
499		vmbus_bus_init();
500}
501
502static void
503vmbus_bus_exit(void)
504{
505	int i;
506
507	hv_vmbus_release_unattached_channels();
508	hv_vmbus_disconnect();
509
510	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
511
512	for(i = 0; i < 2; i++) {
513		if (setup_args.page_buffers[i] != 0)
514			free(setup_args.page_buffers[i], M_DEVBUF);
515	}
516
517	hv_vmbus_cleanup();
518
519	/* remove swi, bus and intr resource */
520	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
521
522	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
523
524	swi_remove(msg_swintr);
525	swi_remove(event_swintr);
526
527	return;
528}
529
530static void
531vmbus_exit(void)
532{
533	vmbus_bus_exit();
534}
535
536static int
537vmbus_detach(device_t dev)
538{
539	vmbus_exit();
540	return (0);
541}
542
543static void
544vmbus_mod_load(void)
545{
546	if(bootverbose)
547		printf("VMBUS: load\n");
548}
549
550static void
551vmbus_mod_unload(void)
552{
553	if(bootverbose)
554		printf("VMBUS: unload\n");
555}
556
557static int
558vmbus_modevent(module_t mod, int what, void *arg)
559{
560	switch (what) {
561
562	case MOD_LOAD:
563		vmbus_mod_load();
564		break;
565	case MOD_UNLOAD:
566		vmbus_mod_unload();
567		break;
568	}
569
570	return (0);
571}
572
573static device_method_t vmbus_methods[] = {
574	/** Device interface */
575	DEVMETHOD(device_identify, vmbus_identify),
576	DEVMETHOD(device_probe, vmbus_probe),
577	DEVMETHOD(device_attach, vmbus_attach),
578	DEVMETHOD(device_detach, vmbus_detach),
579	DEVMETHOD(device_shutdown, bus_generic_shutdown),
580	DEVMETHOD(device_suspend, bus_generic_suspend),
581	DEVMETHOD(device_resume, bus_generic_resume),
582
583	/** Bus interface */
584	DEVMETHOD(bus_add_child, bus_generic_add_child),
585	DEVMETHOD(bus_print_child, bus_generic_print_child),
586	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
587	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
588
589	{ 0, 0 } };
590
591static char driver_name[] = "vmbus";
592static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
593
594
595devclass_t vmbus_devclass;
596
597DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
598MODULE_VERSION(vmbus,1);
599
600/* TODO: We want to be earlier than SI_SUB_VFS */
601SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
602
603