1/*-
2 * Copyright (c) 2018 VMware, Inc.
3 *
4 * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5 */
6
7/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
8
9#include <sys/param.h>
10#include <sys/bus.h>
11#include <sys/kernel.h>
12#include <sys/malloc.h>
13#include <sys/module.h>
14#include <sys/rman.h>
15#include <sys/systm.h>
16
17#include <dev/pci/pcireg.h>
18#include <dev/pci/pcivar.h>
19
20#include <machine/bus.h>
21
22#include "vmci.h"
23#include "vmci_doorbell.h"
24#include "vmci_driver.h"
25#include "vmci_kernel_defs.h"
26#include "vmci_queue_pair.h"
27
28static int	vmci_probe(device_t);
29static int	vmci_attach(device_t);
30static int	vmci_detach(device_t);
31static int	vmci_shutdown(device_t);
32
33static int	vmci_map_bars(struct vmci_softc *);
34static void	vmci_unmap_bars(struct vmci_softc *);
35
36static int	vmci_config_capabilities(struct vmci_softc *);
37
38static int	vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
39		    bus_size_t, struct vmci_dma_alloc *);
40static void	vmci_dma_free_int(struct vmci_softc *,
41		    struct vmci_dma_alloc *);
42
43static int	vmci_config_interrupts(struct vmci_softc *);
44static int	vmci_config_interrupt(struct vmci_softc *);
45static int	vmci_check_intr_cnt(struct vmci_softc *);
46static int	vmci_allocate_interrupt_resources(struct vmci_softc *);
47static int	vmci_setup_interrupts(struct vmci_softc *);
48static void	vmci_dismantle_interrupts(struct vmci_softc *);
49static void	vmci_interrupt(void *);
50static void	vmci_interrupt_bm(void *);
51static void	dispatch_datagrams(void *, int);
52static void	process_bitmap(void *, int);
53
54static void	vmci_delayed_work_fn_cb(void *context, int data);
55
56static device_method_t vmci_methods[] = {
57	/* Device interface. */
58	DEVMETHOD(device_probe,		vmci_probe),
59	DEVMETHOD(device_attach,	vmci_attach),
60	DEVMETHOD(device_detach,	vmci_detach),
61	DEVMETHOD(device_shutdown,	vmci_shutdown),
62
63	DEVMETHOD_END
64};
65
66static driver_t vmci_driver = {
67	"vmci", vmci_methods, sizeof(struct vmci_softc)
68};
69
70DRIVER_MODULE(vmci, pci, vmci_driver, 0, 0);
71MODULE_VERSION(vmci, VMCI_VERSION);
72const struct {
73	uint16_t vendor;
74	uint16_t device;
75	const char *desc;
76} vmci_ids[] = {
77	{ VMCI_VMWARE_VENDOR_ID, VMCI_VMWARE_DEVICE_ID,
78	    "VMware Virtual Machine Communication Interface" },
79};
80MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, vmci, vmci_ids,
81    nitems(vmci_ids));
82
83MODULE_DEPEND(vmci, pci, 1, 1, 1);
84
85static struct vmci_softc *vmci_sc;
86
87#define LGPFX	"vmci: "
88/*
89 * Allocate a buffer for incoming datagrams globally to avoid repeated
90 * allocation in the interrupt handler's atomic context.
91 */
92static uint8_t *data_buffer = NULL;
93static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
94
95struct vmci_delayed_work_info {
96	vmci_work_fn	*work_fn;
97	void		*data;
98	vmci_list_item(vmci_delayed_work_info) entry;
99};
100
101/*
102 *------------------------------------------------------------------------------
103 *
104 * vmci_probe --
105 *
106 *     Probe to see if the VMCI device is present.
107 *
108 * Results:
109 *     BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
110 *
111 * Side effects:
112 *     None.
113 *
114 *------------------------------------------------------------------------------
115 */
116
117static int
118vmci_probe(device_t dev)
119{
120
121	if (pci_get_vendor(dev) == vmci_ids[0].vendor &&
122	    pci_get_device(dev) == vmci_ids[0].device) {
123		device_set_desc(dev, vmci_ids[0].desc);
124
125		return (BUS_PROBE_DEFAULT);
126	}
127
128	return (ENXIO);
129}
130
131/*
132 *------------------------------------------------------------------------------
133 *
134 * vmci_attach --
135 *
136 *     Attach VMCI device to the system after vmci_probe() has been called and
137 *     the device has been detected.
138 *
139 * Results:
140 *     0 if success, ENXIO otherwise.
141 *
142 * Side effects:
143 *     None.
144 *
145 *------------------------------------------------------------------------------
146 */
147
148static int
149vmci_attach(device_t dev)
150{
151	struct vmci_softc *sc;
152	int error, i;
153
154	sc = device_get_softc(dev);
155	sc->vmci_dev = dev;
156	vmci_sc = sc;
157
158	data_buffer = NULL;
159	sc->vmci_num_intr = 0;
160	for (i = 0; i < VMCI_MAX_INTRS; i++) {
161		sc->vmci_intrs[i].vmci_irq = NULL;
162		sc->vmci_intrs[i].vmci_handler = NULL;
163	}
164
165	TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
166	TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
167
168	TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
169
170	pci_enable_busmaster(dev);
171
172	mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
173	mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
174	    NULL, MTX_DEF);
175
176	error = vmci_map_bars(sc);
177	if (error) {
178		VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
179		goto fail;
180	}
181
182	error = vmci_config_capabilities(sc);
183	if (error) {
184		VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
185		goto fail;
186	}
187
188	vmci_list_init(&sc->vmci_delayed_work_infos);
189
190	vmci_components_init();
191	vmci_util_init();
192	error = vmci_qp_guest_endpoints_init();
193	if (error) {
194		VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
195		goto fail;
196	}
197
198	error = vmci_config_interrupts(sc);
199	if (error)
200		VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
201
202fail:
203	if (error) {
204		vmci_detach(dev);
205		return (ENXIO);
206	}
207
208	return (0);
209}
210
211/*
212 *------------------------------------------------------------------------------
213 *
214 * vmci_detach --
215 *
216 *     Detach the VMCI device.
217 *
218 * Results:
219 *     0
220 *
221 * Side effects:
222 *     None.
223 *
224 *------------------------------------------------------------------------------
225 */
226
227static int
228vmci_detach(device_t dev)
229{
230	struct vmci_softc *sc;
231
232	sc = device_get_softc(dev);
233
234	vmci_qp_guest_endpoints_exit();
235	vmci_util_exit();
236
237	vmci_dismantle_interrupts(sc);
238
239	vmci_components_cleanup();
240
241	if mtx_initialized(&sc->vmci_spinlock) {
242		taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
243		mtx_destroy(&sc->vmci_delayed_work_lock);
244	}
245
246	if (sc->vmci_res0 != NULL)
247		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
248		    VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
249
250	if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
251		vmci_dma_free(&sc->vmci_notifications_bitmap);
252
253	vmci_unmap_bars(sc);
254
255	if mtx_initialized(&sc->vmci_spinlock)
256		mtx_destroy(&sc->vmci_spinlock);
257
258	pci_disable_busmaster(dev);
259
260	return (0);
261}
262
263/*
264 *------------------------------------------------------------------------------
265 *
266 * vmci_shutdown --
267 *
268 *     This function is called during system shutdown. We don't do anything.
269 *
270 * Results:
271 *     0
272 *
273 * Side effects:
274 *     None.
275 *
276 *------------------------------------------------------------------------------
277 */
278
279static int
280vmci_shutdown(device_t dev)
281{
282
283	return (0);
284}
285
286/*
287 *------------------------------------------------------------------------------
288 *
289 * vmci_map_bars --
290 *
291 *     Maps the PCI I/O and MMIO BARs.
292 *
293 * Results:
294 *     0 on success, ENXIO otherwise.
295 *
296 * Side effects:
297 *     None.
298 *
299 *------------------------------------------------------------------------------
300 */
301
302static int
303vmci_map_bars(struct vmci_softc *sc)
304{
305	int rid;
306
307	/* Map the PCI I/O BAR: BAR0 */
308	rid = PCIR_BAR(0);
309	sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
310	    &rid, RF_ACTIVE);
311	if (sc->vmci_res0 == NULL) {
312		VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
313		return (ENXIO);
314	}
315
316	sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
317	sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
318	sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
319
320	/* Map the PCI MMIO BAR: BAR1 */
321	rid = PCIR_BAR(1);
322	sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
323	    &rid, RF_ACTIVE);
324	if (sc->vmci_res1 == NULL) {
325		VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
326		return (ENXIO);
327	}
328
329	sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
330	sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
331
332	return (0);
333}
334
335/*
336 *------------------------------------------------------------------------------
337 *
338 * vmci_unmap_bars --
339 *
340 *     Unmaps the VMCI PCI I/O and MMIO BARs.
341 *
342 * Results:
343 *     None.
344 *
345 * Side effects:
346 *     None.
347 *
348 *------------------------------------------------------------------------------
349 */
350
351static void
352vmci_unmap_bars(struct vmci_softc *sc)
353{
354	int rid;
355
356	if (sc->vmci_res0 != NULL) {
357		rid = PCIR_BAR(0);
358		bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
359		    sc->vmci_res0);
360		sc->vmci_res0 = NULL;
361	}
362
363	if (sc->vmci_res1 != NULL) {
364		rid = PCIR_BAR(1);
365		bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
366		    sc->vmci_res1);
367		sc->vmci_res1 = NULL;
368	}
369}
370
371/*
372 *------------------------------------------------------------------------------
373 *
374 * vmci_config_capabilities --
375 *
376 *     Check the VMCI device capabilities and configure the device accordingly.
377 *
378 * Results:
379 *     0 if success, ENODEV otherwise.
380 *
381 * Side effects:
382 *     Device capabilities are enabled.
383 *
384 *------------------------------------------------------------------------------
385 */
386
387static int
388vmci_config_capabilities(struct vmci_softc *sc)
389{
390	unsigned long bitmap_PPN;
391	int error;
392
393	/*
394	 * Verify that the VMCI device supports the capabilities that we
395	 * need. Datagrams are necessary and notifications will be used
396	 * if the device supports it.
397	 */
398	sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
399	    VMCI_CAPS_ADDR);
400
401	if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
402		VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
403		    "datagrams.\n");
404		return (ENODEV);
405	}
406
407	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
408		sc->capabilities = VMCI_CAPS_DATAGRAM;
409		error = vmci_dma_malloc(PAGE_SIZE, 1,
410		    &sc->vmci_notifications_bitmap);
411		if (error)
412			VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
413			    "notification bitmap.\n");
414		else {
415			memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
416			    PAGE_SIZE);
417			sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
418		}
419	} else
420		sc->capabilities = VMCI_CAPS_DATAGRAM;
421
422	/* Let the host know which capabilities we intend to use. */
423	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
424	    VMCI_CAPS_ADDR, sc->capabilities);
425
426	/*
427	 * Register notification bitmap with device if that capability is
428	 * used.
429	 */
430	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
431		bitmap_PPN =
432		    sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
433		vmci_register_notification_bitmap(bitmap_PPN);
434	}
435
436	/* Check host capabilities. */
437	if (!vmci_check_host_capabilities())
438		return (ENODEV);
439
440	return (0);
441}
442
443/*
444 *------------------------------------------------------------------------------
445 *
446 * vmci_dmamap_cb --
447 *
448 *     Callback to receive mapping information resulting from the load of a
449 *     bus_dmamap_t via bus_dmamap_load()
450 *
451 * Results:
452 *     None.
453 *
454 * Side effects:
455 *     None.
456 *
457 *------------------------------------------------------------------------------
458 */
459
460static void
461vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
462{
463	bus_addr_t *baddr = arg;
464
465	if (error == 0)
466		*baddr = segs->ds_addr;
467}
468
469/*
470 *------------------------------------------------------------------------------
471 *
472 * vmci_dma_malloc_int --
473 *
474 *     Internal function that allocates DMA memory.
475 *
476 * Results:
477 *     0 if success.
478 *     ENOMEM if insufficient memory.
479 *     EINPROGRESS if mapping is deferred.
480 *     EINVAL if the request was invalid.
481 *
482 * Side effects:
483 *     DMA memory is allocated.
484 *
485 *------------------------------------------------------------------------------
486 */
487
488static int
489vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
490    struct vmci_dma_alloc *dma)
491{
492	int error;
493
494	bzero(dma, sizeof(struct vmci_dma_alloc));
495
496	error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
497	    align, 0,		/* alignment, bounds */
498	    BUS_SPACE_MAXADDR,	/* lowaddr */
499	    BUS_SPACE_MAXADDR,	/* highaddr */
500	    NULL, NULL,		/* filter, filterarg */
501	    size,		/* maxsize */
502	    1,			/* nsegments */
503	    size,		/* maxsegsize */
504	    BUS_DMA_ALLOCNOW,	/* flags */
505	    NULL,		/* lockfunc */
506	    NULL,		/* lockfuncarg */
507	    &dma->dma_tag);
508	if (error) {
509		VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
510		goto fail;
511	}
512
513	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
514	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
515	if (error) {
516		VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
517		goto fail;
518	}
519
520	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
521	    size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
522	if (error) {
523		VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
524		goto fail;
525	}
526
527	dma->dma_size = size;
528
529fail:
530	if (error)
531		vmci_dma_free(dma);
532
533	return (error);
534}
535
536/*
537 *------------------------------------------------------------------------------
538 *
539 * vmci_dma_malloc --
540 *
541 *     This function is a wrapper around vmci_dma_malloc_int for callers
542 *     outside of this module. Since we only support a single VMCI device, this
543 *     wrapper provides access to the device softc structure.
544 *
545 * Results:
546 *     0 if success.
547 *     ENOMEM if insufficient memory.
548 *     EINPROGRESS if mapping is deferred.
549 *     EINVAL if the request was invalid.
550 *
551 * Side effects:
552 *     DMA memory is allocated.
553 *
554 *------------------------------------------------------------------------------
555 */
556
557int
558vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
559{
560
561	return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
562}
563
564/*
565 *------------------------------------------------------------------------------
566 *
567 * vmci_dma_free_int --
568 *
569 *     Internal function that frees DMA memory.
570 *
571 * Results:
572 *     None.
573 *
574 * Side effects:
575 *     Frees DMA memory.
576 *
577 *------------------------------------------------------------------------------
578 */
579
580static void
581vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
582{
583
584	if (dma->dma_tag != NULL) {
585		if (dma->dma_paddr != 0) {
586			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
587			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
588			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
589		}
590
591		if (dma->dma_vaddr != NULL)
592			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
593			    dma->dma_map);
594
595		bus_dma_tag_destroy(dma->dma_tag);
596	}
597	bzero(dma, sizeof(struct vmci_dma_alloc));
598}
599
600/*
601 *------------------------------------------------------------------------------
602 *
603 * vmci_dma_free --
604 *
605 *     This function is a wrapper around vmci_dma_free_int for callers outside
606 *     of this module. Since we only support a single VMCI device, this wrapper
607 *     provides access to the device softc structure.
608 *
609 * Results:
610 *     None.
611 *
612 * Side effects:
613 *     Frees DMA memory.
614 *
615 *------------------------------------------------------------------------------
616 */
617
618void
619vmci_dma_free(struct vmci_dma_alloc *dma)
620{
621
622	vmci_dma_free_int(vmci_sc, dma);
623}
624
625/*
626 *------------------------------------------------------------------------------
627 *
628 * vmci_config_interrupts --
629 *
630 *     Configures and enables interrupts. Try to configure MSI-X. If this fails,
631 *     try to configure MSI. If even this fails, try legacy interrupts.
632 *
633 * Results:
634 *     0 if success.
635 *     ENOMEM if insufficient memory.
636 *     ENODEV if the device doesn't support interrupts.
637 *     ENXIO if the device configuration failed.
638 *
639 * Side effects:
640 *     Interrupts get enabled if successful.
641 *
642 *------------------------------------------------------------------------------
643 */
644
645static int
646vmci_config_interrupts(struct vmci_softc *sc)
647{
648	int error;
649
650	data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
651	if (data_buffer == NULL)
652		return (ENOMEM);
653
654	sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
655	error = vmci_config_interrupt(sc);
656	if (error) {
657		sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
658		error = vmci_config_interrupt(sc);
659	}
660	if (error) {
661		sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
662		error = vmci_config_interrupt(sc);
663	}
664	if (error)
665		return (error);
666
667	/* Enable specific interrupt bits. */
668	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
669		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
670		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
671	else
672		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
673		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
674
675	/* Enable interrupts. */
676	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
677	    VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
678
679	return (0);
680}
681
682/*
683 *------------------------------------------------------------------------------
684 *
685 * vmci_config_interrupt --
686 *
687 *     Check the number of interrupts supported, allocate resources and setup
688 *     interrupts.
689 *
690 * Results:
691 *     0 if success.
692 *     ENOMEM if insufficient memory.
693 *     ENODEV if the device doesn't support interrupts.
694 *     ENXIO if the device configuration failed.
695 *
696 * Side effects:
697 *     Resources get allocated and interrupts get setup (but not enabled) if
698 *     successful.
699 *
700 *------------------------------------------------------------------------------
701 */
702
703static int
704vmci_config_interrupt(struct vmci_softc *sc)
705{
706	int error;
707
708	error = vmci_check_intr_cnt(sc);
709	if (error)
710		return (error);
711
712	error = vmci_allocate_interrupt_resources(sc);
713	if (error)
714		return (error);
715
716	error = vmci_setup_interrupts(sc);
717	if (error)
718		return (error);
719
720	return (0);
721}
722
723/*
724 *------------------------------------------------------------------------------
725 *
726 * vmci_check_intr_cnt --
727 *
728 *     Check the number of interrupts supported by the device and ask PCI bus
729 *     to allocate appropriate number of interrupts.
730 *
731 * Results:
732 *     0 if success.
733 *     ENODEV if the device doesn't support any interrupts.
734 *     ENXIO if the device configuration failed.
735 *
736 * Side effects:
737 *     Resources get allocated on success.
738 *
739 *------------------------------------------------------------------------------
740 */
741
742static int
743vmci_check_intr_cnt(struct vmci_softc *sc)
744{
745
746	if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
747		sc->vmci_num_intr = 1;
748		return (0);
749	}
750
751	/*
752	 * Make sure that the device supports the required number of MSI/MSI-X
753	 * messages. We try for 2 MSI-X messages but 1 is good too. We need at
754	 * least 1 MSI message.
755	 */
756	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
757	    pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
758
759	if (!sc->vmci_num_intr) {
760		VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
761		    " messages");
762		return (ENODEV);
763	}
764
765	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
766	    VMCI_MAX_INTRS : 1;
767	if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
768		if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
769			return (ENXIO);
770	} else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
771		if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
772			return (ENXIO);
773	}
774
775	return (0);
776}
777
778/*
779 *------------------------------------------------------------------------------
780 *
781 * vmci_allocate_interrupt_resources --
782 *
783 *     Allocate resources necessary for interrupts.
784 *
785 * Results:
786 *     0 if success, ENXIO otherwise.
787 *
788 * Side effects:
789 *     Resources get allocated on success.
790 *
791 *------------------------------------------------------------------------------
792 */
793
794static int
795vmci_allocate_interrupt_resources(struct vmci_softc *sc)
796{
797	struct resource *irq;
798	int flags, i, rid;
799
800	flags = RF_ACTIVE;
801	flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
802	rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
803
804	for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
805		irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
806		    flags);
807		if (irq == NULL)
808			return (ENXIO);
809		sc->vmci_intrs[i].vmci_irq = irq;
810		sc->vmci_intrs[i].vmci_rid = rid;
811	}
812
813	return (0);
814}
815
816/*
817 *------------------------------------------------------------------------------
818 *
819 * vmci_setup_interrupts --
820 *
821 *     Sets up the interrupts.
822 *
823 * Results:
824 *     0 if success, appropriate error code from bus_setup_intr otherwise.
825 *
826 * Side effects:
827 *     Interrupt handler gets attached.
828 *
829 *------------------------------------------------------------------------------
830 */
831
832static int
833vmci_setup_interrupts(struct vmci_softc *sc)
834{
835	struct vmci_interrupt *intr;
836	int error, flags;
837
838	flags = INTR_TYPE_NET | INTR_MPSAFE;
839	if (sc->vmci_num_intr > 1)
840		flags |= INTR_EXCL;
841
842	intr = &sc->vmci_intrs[0];
843	error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
844	    vmci_interrupt, NULL, &intr->vmci_handler);
845	if (error)
846		return (error);
847
848	if (sc->vmci_num_intr == 2) {
849		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
850		    intr->vmci_handler, "dg");
851		intr = &sc->vmci_intrs[1];
852		error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
853		    NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
854		if (error)
855			return (error);
856		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
857		    intr->vmci_handler, "bm");
858	}
859
860	return (0);
861}
862
863/*
864 *------------------------------------------------------------------------------
865 *
866 * vmci_interrupt --
867 *
868 *     Interrupt handler for legacy or MSI interrupt, or for first MSI-X
869 *     interrupt (vector VMCI_INTR_DATAGRAM).
870 *
871 * Results:
872 *     None.
873 *
874 * Side effects:
875 *     None.
876 *
877 *------------------------------------------------------------------------------
878 */
879
880static void
881vmci_interrupt(void *arg)
882{
883
884	if (vmci_sc->vmci_num_intr == 2)
885		taskqueue_enqueue(taskqueue_swi,
886		    &vmci_sc->vmci_interrupt_dq_task);
887	else {
888		unsigned int icr;
889
890		icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
891		if (icr == 0 || icr == 0xffffffff)
892			return;
893		if (icr & VMCI_ICR_DATAGRAM) {
894			taskqueue_enqueue(taskqueue_swi,
895			    &vmci_sc->vmci_interrupt_dq_task);
896			icr &= ~VMCI_ICR_DATAGRAM;
897		}
898		if (icr & VMCI_ICR_NOTIFICATION) {
899			taskqueue_enqueue(taskqueue_swi,
900			    &vmci_sc->vmci_interrupt_bm_task);
901			icr &= ~VMCI_ICR_NOTIFICATION;
902		}
903		if (icr != 0)
904			VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
905			    "cause");
906	}
907}
908
909/*
910 *------------------------------------------------------------------------------
911 *
912 * vmci_interrupt_bm --
913 *
914 *     Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
915 *     which is for the notification bitmap. Will only get called if we are
916 *     using MSI-X with exclusive vectors.
917 *
918 * Results:
919 *     None.
920 *
921 * Side effects:
922 *     None.
923 *
924 *------------------------------------------------------------------------------
925 */
926
927static void
928vmci_interrupt_bm(void *arg)
929{
930
931	ASSERT(vmci_sc->vmci_num_intr == 2);
932	taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
933}
934
935/*
936 *------------------------------------------------------------------------------
937 *
938 * dispatch_datagrams --
939 *
940 *     Reads and dispatches incoming datagrams.
941 *
942 * Results:
943 *     None.
944 *
945 * Side effects:
946 *     Reads data from the device.
947 *
948 *------------------------------------------------------------------------------
949 */
950
951static void
952dispatch_datagrams(void *context, int data)
953{
954
955	if (data_buffer == NULL)
956		VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
957		    "present");
958
959	vmci_read_datagrams_from_port((vmci_io_handle) 0,
960	    vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
961	    data_buffer, data_buffer_size);
962}
963
964/*
965 *------------------------------------------------------------------------------
966 *
967 * process_bitmap --
968 *
969 *     Scans the notification bitmap for raised flags, clears them and handles
970 *     the notifications.
971 *
972 * Results:
973 *     None.
974 *
975 * Side effects:
976 *     None.
977 *
978 *------------------------------------------------------------------------------
979 */
980
981static void
982process_bitmap(void *context, int data)
983{
984
985	if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
986		VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
987
988	vmci_scan_notification_bitmap(
989	    vmci_sc->vmci_notifications_bitmap.dma_vaddr);
990}
991
992/*
993 *------------------------------------------------------------------------------
994 *
995 * vmci_dismantle_interrupts --
996 *
997 *     Releases resources, detaches the interrupt handler and drains the task
998 *     queue.
999 *
1000 * Results:
1001 *     None.
1002 *
1003 * Side effects:
1004 *     No more interrupts.
1005 *
1006 *------------------------------------------------------------------------------
1007 */
1008
1009static void
1010vmci_dismantle_interrupts(struct vmci_softc *sc)
1011{
1012	struct vmci_interrupt *intr;
1013	int i;
1014
1015	for (i = 0; i < sc->vmci_num_intr; i++) {
1016		intr = &sc->vmci_intrs[i];
1017		if (intr->vmci_handler != NULL) {
1018			bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
1019			    intr->vmci_handler);
1020			intr->vmci_handler = NULL;
1021		}
1022		if (intr->vmci_irq != NULL) {
1023			bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
1024			    intr->vmci_rid, intr->vmci_irq);
1025			intr->vmci_irq = NULL;
1026			intr->vmci_rid = -1;
1027		}
1028	}
1029
1030	if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
1031	    (sc->vmci_num_intr))
1032		pci_release_msi(sc->vmci_dev);
1033
1034	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
1035	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
1036
1037	if (data_buffer != NULL)
1038		free(data_buffer, M_DEVBUF);
1039}
1040
1041/*
1042 *------------------------------------------------------------------------------
1043 *
1044 * vmci_delayed_work_fn_cb --
1045 *
1046 *     Callback function that executes the queued up delayed work functions.
1047 *
1048 * Results:
1049 *     None.
1050 *
1051 * Side effects:
1052 *     None.
1053 *
1054 *------------------------------------------------------------------------------
1055 */
1056
1057static void
1058vmci_delayed_work_fn_cb(void *context, int data)
1059{
1060	vmci_list(vmci_delayed_work_info) temp_list;
1061
1062	vmci_list_init(&temp_list);
1063
1064	/*
1065	 * Swap vmci_delayed_work_infos list with the empty temp_list while
1066	 * holding a lock. vmci_delayed_work_infos would then be an empty list
1067	 * and temp_list would contain the elements from the original
1068	 * vmci_delayed_work_infos. Finally, iterate through temp_list
1069	 * executing the delayed callbacks.
1070	 */
1071
1072	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1073	vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
1074	    vmci_delayed_work_info, entry);
1075	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1076
1077	while (!vmci_list_empty(&temp_list)) {
1078		struct vmci_delayed_work_info *delayed_work_info =
1079		    vmci_list_first(&temp_list);
1080
1081		delayed_work_info->work_fn(delayed_work_info->data);
1082
1083		vmci_list_remove(delayed_work_info, entry);
1084		vmci_free_kernel_mem(delayed_work_info,
1085		    sizeof(*delayed_work_info));
1086	}
1087}
1088
1089/*
1090 *------------------------------------------------------------------------------
1091 *
1092 * vmci_schedule_delayed_work_fn --
1093 *
1094 *     Schedule the specified callback.
1095 *
1096 * Results:
1097 *     0 if success, error code otherwise.
1098 *
1099 * Side effects:
1100 *     None.
1101 *
1102 *------------------------------------------------------------------------------
1103 */
1104
1105int
1106vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
1107{
1108	struct vmci_delayed_work_info *delayed_work_info;
1109
1110	delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
1111	    VMCI_MEMORY_ATOMIC);
1112
1113	if (!delayed_work_info)
1114		return (VMCI_ERROR_NO_MEM);
1115
1116	delayed_work_info->work_fn = work_fn;
1117	delayed_work_info->data = data;
1118	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1119	vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
1120	    delayed_work_info, entry);
1121	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1122
1123	taskqueue_enqueue(taskqueue_thread,
1124	    &vmci_sc->vmci_delayed_work_task);
1125
1126	return (VMCI_SUCCESS);
1127}
1128
1129/*
1130 *------------------------------------------------------------------------------
1131 *
1132 * vmci_send_datagram --
1133 *
1134 *     VM to hypervisor call mechanism.
1135 *
1136 * Results:
1137 *     The result of the hypercall.
1138 *
1139 * Side effects:
1140 *     None.
1141 *
1142 *------------------------------------------------------------------------------
1143 */
1144
1145int
1146vmci_send_datagram(struct vmci_datagram *dg)
1147{
1148	int result;
1149
1150	if (dg == NULL)
1151		return (VMCI_ERROR_INVALID_ARGS);
1152
1153	/*
1154	 * Need to acquire spinlock on the device because
1155	 * the datagram data may be spread over multiple pages and the monitor
1156	 * may interleave device user rpc calls from multiple VCPUs. Acquiring
1157	 * the spinlock precludes that possibility. Disabling interrupts to
1158	 * avoid incoming datagrams during a "rep out" and possibly landing up
1159	 * in this function.
1160	 */
1161	mtx_lock_spin(&vmci_sc->vmci_spinlock);
1162
1163	/*
1164	 * Send the datagram and retrieve the return value from the result
1165	 * register.
1166	 */
1167	__asm__ __volatile__(
1168	    "cld\n\t"
1169	    "rep outsb\n\t"
1170	    : /* No output. */
1171	    : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
1172	    "c"(VMCI_DG_SIZE(dg)), "S"(dg)
1173	    );
1174
1175	/*
1176	 * XXX: Should read result high port as well when updating handlers to
1177	 * return 64bit.
1178	 */
1179
1180	result = bus_space_read_4(vmci_sc->vmci_iot0,
1181	    vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
1182	mtx_unlock_spin(&vmci_sc->vmci_spinlock);
1183
1184	return (result);
1185}
1186