hv_vmbus_drv_freebsd.c revision 256758
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 256758 2013-10-18 23:19:27Z gibbs $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/sysctl.h>
42#include <sys/syslog.h>
43#include <sys/systm.h>
44#include <sys/rtprio.h>
45#include <sys/interrupt.h>
46#include <sys/sx.h>
47#include <sys/taskqueue.h>
48#include <sys/mutex.h>
49#include <sys/smp.h>
50
51#include <machine/resource.h>
52#include <sys/rman.h>
53
54#include <machine/stdarg.h>
55#include <machine/intr_machdep.h>
56#include <sys/pcpu.h>
57
58#include "hv_vmbus_priv.h"
59
60
61#define VMBUS_IRQ	0x5
62
63static struct intr_event *hv_msg_intr_event;
64static struct intr_event *hv_event_intr_event;
65static void *msg_swintr;
66static void *event_swintr;
67static device_t vmbus_devp;
68static void *vmbus_cookiep;
69static int vmbus_rid;
70struct resource *intr_res;
71static int vmbus_irq = VMBUS_IRQ;
72static int vmbus_inited;
73static hv_setup_args setup_args; /* only CPU 0 supported at this time */
74
75/**
76 * @brief Software interrupt thread routine to handle channel messages from
77 * the hypervisor.
78 */
79static void
80vmbus_msg_swintr(void *dummy)
81{
82	int 			cpu;
83	void*			page_addr;
84	hv_vmbus_message*	msg;
85	hv_vmbus_message*	copied;
86
87	cpu = PCPU_GET(cpuid);
88	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
89	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
90
91	for (;;) {
92		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
93			break; /* no message */
94		} else {
95			copied = malloc(sizeof(hv_vmbus_message),
96					M_DEVBUF, M_NOWAIT);
97			KASSERT(copied != NULL,
98				("Error VMBUS: malloc failed to allocate"
99					" hv_vmbus_message!"));
100			if (copied == NULL)
101				continue;
102			memcpy(copied, msg, sizeof(hv_vmbus_message));
103			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
104			hv_vmbus_on_channel_message, copied);
105	    }
106
107	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
108
109	    /*
110	     * Make sure the write to message_type (ie set to
111	     * HV_MESSAGE_TYPE_NONE) happens before we read the
112	     * message_pending and EOMing. Otherwise, the EOMing will
113	     * not deliver any more messages
114	     * since there is no empty slot
115	     */
116	    wmb();
117
118	    if (msg->header.message_flags.u.message_pending) {
119			/*
120			 * This will cause message queue rescan to possibly
121			 * deliver another msg from the hypervisor
122			 */
123			wrmsr(HV_X64_MSR_EOM, 0);
124	    }
125	}
126}
127
128/**
129 * @brief Interrupt filter routine for VMBUS.
130 *
131 * The purpose of this routine is to determine the type of VMBUS protocol
132 * message to process - an event or a channel message.
133 * As this is an interrupt filter routine, the function runs in a very
134 * restricted envinronment.  From the manpage for bus_setup_intr(9)
135 *
136 *   In this restricted environment, care must be taken to account for all
137 *   races.  A careful analysis of races should be done as well.  It is gener-
138 *   ally cheaper to take an extra interrupt, for example, than to protect
139 *   variables with spinlocks.	Read, modify, write cycles of hardware regis-
140 *   ters need to be carefully analyzed if other threads are accessing the
141 *   same registers.
142 */
143static int
144hv_vmbus_isr(void *unused)
145{
146	int				cpu;
147	hv_vmbus_message*		msg;
148	hv_vmbus_synic_event_flags*	event;
149	void*				page_addr;
150
151	cpu = PCPU_GET(cpuid);
152	/* (Temporary limit) */
153	KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
154
155	/*
156	 * The Windows team has advised that we check for events
157	 * before checking for messages. This is the way they do it
158	 * in Windows when running as a guest in Hyper-V
159	 */
160
161	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
162	event = (hv_vmbus_synic_event_flags*)
163		    page_addr + HV_VMBUS_MESSAGE_SINT;
164
165	/* Since we are a child, we only need to check bit 0 */
166	if (synch_test_and_clear_bit(0, &event->flags32[0])) {
167		swi_sched(event_swintr, 0);
168	}
169
170	/* Check if there are actual msgs to be process */
171	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
172	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
173
174	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
175		swi_sched(msg_swintr, 0);
176	}
177
178	return FILTER_HANDLED;
179}
180
181static int
182vmbus_read_ivar(
183	device_t	dev,
184	device_t	child,
185	int		index,
186	uintptr_t*	result)
187{
188	struct hv_device *child_dev_ctx = device_get_ivars(child);
189
190	switch (index) {
191
192	case HV_VMBUS_IVAR_TYPE:
193		*result = (uintptr_t) &child_dev_ctx->class_id;
194		return (0);
195	case HV_VMBUS_IVAR_INSTANCE:
196		*result = (uintptr_t) &child_dev_ctx->device_id;
197		return (0);
198	case HV_VMBUS_IVAR_DEVCTX:
199		*result = (uintptr_t) child_dev_ctx;
200		return (0);
201	case HV_VMBUS_IVAR_NODE:
202		*result = (uintptr_t) child_dev_ctx->device;
203		return (0);
204	}
205	return (ENOENT);
206}
207
208static int
209vmbus_write_ivar(
210	device_t	dev,
211	device_t	child,
212	int		index,
213	uintptr_t	value)
214{
215	switch (index) {
216
217	case HV_VMBUS_IVAR_TYPE:
218	case HV_VMBUS_IVAR_INSTANCE:
219	case HV_VMBUS_IVAR_DEVCTX:
220	case HV_VMBUS_IVAR_NODE:
221		/* read-only */
222		return (EINVAL);
223	}
224	return (ENOENT);
225}
226
227struct hv_device*
228hv_vmbus_child_device_create(
229	hv_guid		type,
230	hv_guid		instance,
231	hv_vmbus_channel*	channel)
232{
233	hv_device* child_dev;
234
235	/*
236	 * Allocate the new child device
237	 */
238	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
239			M_NOWAIT |  M_ZERO);
240	KASSERT(child_dev != NULL,
241	    ("Error VMBUS: malloc failed to allocate hv_device!"));
242
243	if (child_dev == NULL)
244		return (NULL);
245
246	child_dev->channel = channel;
247	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
248	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
249
250	return (child_dev);
251}
252
253static void
254print_dev_guid(struct hv_device *dev)
255{
256	int i;
257	unsigned char guid_name[100];
258	for (i = 0; i < 32; i += 2)
259		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
260	if(bootverbose)
261		printf("VMBUS: Class ID: %s\n", guid_name);
262}
263
264int
265hv_vmbus_child_device_register(struct hv_device *child_dev)
266{
267	device_t child;
268	int ret = 0;
269
270	print_dev_guid(child_dev);
271
272
273	child = device_add_child(vmbus_devp, NULL, -1);
274	child_dev->device = child;
275	device_set_ivars(child, child_dev);
276
277	mtx_lock(&Giant);
278	ret = device_probe_and_attach(child);
279	mtx_unlock(&Giant);
280
281	return (0);
282}
283
284int
285hv_vmbus_child_device_unregister(struct hv_device *child_dev)
286{
287	int ret = 0;
288	/*
289	 * XXXKYS: Ensure that this is the opposite of
290	 * device_add_child()
291	 */
292	mtx_lock(&Giant);
293	ret = device_delete_child(vmbus_devp, child_dev->device);
294	mtx_unlock(&Giant);
295	return(ret);
296}
297
298static void
299vmbus_identify(driver_t *driver, device_t parent)
300{
301	if (!hv_vmbus_query_hypervisor_presence())
302		return;
303
304	vm_guest = VM_GUEST_HV;
305
306	BUS_ADD_CHILD(parent, 0, "vmbus", 0);
307}
308
309static int
310vmbus_probe(device_t dev) {
311	if(bootverbose)
312		device_printf(dev, "VMBUS: probe\n");
313
314	device_set_desc(dev, "Vmbus Devices");
315
316	return (0);
317}
318
319/**
320 * @brief Main vmbus driver initialization routine.
321 *
322 * Here, we
323 * - initialize the vmbus driver context
324 * - setup various driver entry points
325 * - invoke the vmbus hv main init routine
326 * - get the irq resource
327 * - invoke the vmbus to add the vmbus root device
328 * - setup the vmbus root device
329 * - retrieve the channel offers
330 */
331static int
332vmbus_bus_init(void)
333{
334	struct ioapic_intsrc {
335		struct intsrc io_intsrc;
336		u_int io_irq;
337		u_int io_intpin:8;
338		u_int io_vector:8;
339		u_int io_cpu:8;
340		u_int io_activehi:1;
341		u_int io_edgetrigger:1;
342		u_int io_masked:1;
343		int io_bus:4;
344		uint32_t io_lowreg;
345	};
346	int i, ret;
347	unsigned int vector = 0;
348	struct intsrc *isrc;
349	struct ioapic_intsrc *intpin;
350
351	if (vmbus_inited)
352		return (0);
353
354	vmbus_inited = 1;
355
356	ret = hv_vmbus_init();
357
358	if (ret) {
359		if(bootverbose)
360			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
361		return (ret);
362	}
363
364	ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
365	    NULL, SWI_CLOCK, 0, &msg_swintr);
366
367	if (ret)
368	    goto cleanup;
369
370	/*
371	 * Message SW interrupt handler checks a per-CPU page and
372	 * thus the thread needs to be bound to CPU-0 - which is where
373	 * all interrupts are processed.
374	 */
375	ret = intr_event_bind(hv_msg_intr_event, 0);
376
377	if (ret)
378		goto cleanup1;
379
380	ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
381	    NULL, SWI_CLOCK, 0, &event_swintr);
382
383	if (ret)
384		goto cleanup1;
385
386	intr_res = bus_alloc_resource(vmbus_devp,
387	    SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
388
389	if (intr_res == NULL) {
390		ret = ENOMEM; /* XXXKYS: Need a better errno */
391		goto cleanup2;
392	}
393
394	/*
395	 * Setup interrupt filter handler
396	 */
397	ret = bus_setup_intr(vmbus_devp, intr_res,
398	    INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
399	    NULL, &vmbus_cookiep);
400
401	if (ret != 0)
402		goto cleanup3;
403
404	ret = bus_bind_intr(vmbus_devp, intr_res, 0);
405	if (ret != 0)
406		goto cleanup4;
407
408	isrc = intr_lookup_source(vmbus_irq);
409	if ((isrc == NULL) || (isrc->is_event == NULL)) {
410		ret = EINVAL;
411		goto cleanup4;
412	}
413
414	/* vector = isrc->is_event->ie_vector; */
415	intpin = (struct ioapic_intsrc *)isrc;
416	vector = intpin->io_vector;
417
418	if(bootverbose)
419		printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
420
421	/**
422	 * Notify the hypervisor of our irq.
423	 */
424	setup_args.vector = vector;
425	for(i = 0; i < 2; i++) {
426		setup_args.page_buffers[i] =
427				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
428		if (setup_args.page_buffers[i] == NULL) {
429			KASSERT(setup_args.page_buffers[i] != NULL,
430					("Error VMBUS: malloc failed!"));
431			if (i > 0)
432				free(setup_args.page_buffers[0], M_DEVBUF);
433			goto cleanup4;
434		}
435	}
436
437	/* only CPU #0 supported at this time */
438	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
439
440	/*
441	 * Connect to VMBus in the root partition
442	 */
443	ret = hv_vmbus_connect();
444
445	if (ret != 0)
446	    goto cleanup4;
447
448	hv_vmbus_request_channel_offers();
449	return (ret);
450
451	cleanup4:
452
453	/*
454	 * remove swi, bus and intr resource
455	 */
456	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
457
458	cleanup3:
459	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
460
461	cleanup2:
462	swi_remove(event_swintr);
463
464	cleanup1:
465	swi_remove(msg_swintr);
466
467	cleanup:
468	hv_vmbus_cleanup();
469
470	return (ret);
471}
472
473static int
474vmbus_attach(device_t dev)
475{
476	if(bootverbose)
477		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
478	vmbus_devp = dev;
479
480	/*
481	 * If the system has already booted and thread
482	 * scheduling is possible indicated by the global
483	 * cold set to zero, we just call the driver
484	 * initialization directly.
485	 */
486	if (!cold)
487		vmbus_bus_init();
488
489	return (0);
490}
491
492static void
493vmbus_init(void)
494{
495	if (vm_guest != VM_GUEST_HV)
496		return;
497
498	/*
499	 * If the system has already booted and thread
500	 * scheduling is possible, as indicated by the
501	 * global cold set to zero, we just call the driver
502	 * initialization directly.
503	 */
504	if (!cold)
505		vmbus_bus_init();
506}
507
508static void
509vmbus_bus_exit(void)
510{
511	int i;
512
513	hv_vmbus_release_unattached_channels();
514	hv_vmbus_disconnect();
515
516	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
517
518	for(i = 0; i < 2; i++) {
519		if (setup_args.page_buffers[i] != 0)
520			free(setup_args.page_buffers[i], M_DEVBUF);
521	}
522
523	hv_vmbus_cleanup();
524
525	/* remove swi, bus and intr resource */
526	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
527
528	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
529
530	swi_remove(msg_swintr);
531	swi_remove(event_swintr);
532
533	return;
534}
535
536static void
537vmbus_exit(void)
538{
539	vmbus_bus_exit();
540}
541
542static int
543vmbus_detach(device_t dev)
544{
545	vmbus_exit();
546	return (0);
547}
548
549static void
550vmbus_mod_load(void)
551{
552	if(bootverbose)
553		printf("VMBUS: load\n");
554}
555
556static void
557vmbus_mod_unload(void)
558{
559	if(bootverbose)
560		printf("VMBUS: unload\n");
561}
562
563static int
564vmbus_modevent(module_t mod, int what, void *arg)
565{
566	switch (what) {
567
568	case MOD_LOAD:
569		vmbus_mod_load();
570		break;
571	case MOD_UNLOAD:
572		vmbus_mod_unload();
573		break;
574	}
575
576	return (0);
577}
578
579static device_method_t vmbus_methods[] = {
580	/** Device interface */
581	DEVMETHOD(device_identify, vmbus_identify),
582	DEVMETHOD(device_probe, vmbus_probe),
583	DEVMETHOD(device_attach, vmbus_attach),
584	DEVMETHOD(device_detach, vmbus_detach),
585	DEVMETHOD(device_shutdown, bus_generic_shutdown),
586	DEVMETHOD(device_suspend, bus_generic_suspend),
587	DEVMETHOD(device_resume, bus_generic_resume),
588
589	/** Bus interface */
590	DEVMETHOD(bus_add_child, bus_generic_add_child),
591	DEVMETHOD(bus_print_child, bus_generic_print_child),
592	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
593	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
594
595	{ 0, 0 } };
596
597static char driver_name[] = "vmbus";
598static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
599
600
601devclass_t vmbus_devclass;
602
603DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
604MODULE_VERSION(vmbus,1);
605
606/* TODO: We want to be earlier than SI_SUB_VFS */
607SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
608
609