// SPDX-License-Identifier: GPL-2.0-only /* * VMware VMCI Driver * * Copyright (C) 2012 VMware, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmci_datagram.h" #include "vmci_doorbell.h" #include "vmci_context.h" #include "vmci_driver.h" #include "vmci_event.h" #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 #define VMCI_UTIL_NUM_RESOURCES 1 /* * Datagram buffers for DMA send/receive must accommodate at least * a maximum sized datagram and the header. */ #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) static bool vmci_disable_msi; module_param_named(disable_msi, vmci_disable_msi, bool, 0); MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); static bool vmci_disable_msix; module_param_named(disable_msix, vmci_disable_msix, bool, 0); MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); static u32 ctx_update_sub_id = VMCI_INVALID_ID; static u32 vm_context_id = VMCI_INVALID_ID; struct vmci_guest_device { struct device *dev; /* PCI device we are attached to */ void __iomem *iobase; void __iomem *mmio_base; bool exclusive_vectors; struct wait_queue_head inout_wq; void *data_buffer; dma_addr_t data_buffer_base; void *tx_buffer; dma_addr_t tx_buffer_base; void *notification_bitmap; dma_addr_t notification_base; }; static bool use_ppn64; bool vmci_use_ppn64(void) { return use_ppn64; } /* vmci_dev singleton device and supporting data*/ struct pci_dev *vmci_pdev; static struct vmci_guest_device *vmci_dev_g; static DEFINE_SPINLOCK(vmci_dev_spinlock); static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); bool vmci_guest_code_active(void) { return atomic_read(&vmci_num_guest_devices) != 0; } u32 vmci_get_vm_context_id(void) { if (vm_context_id == VMCI_INVALID_ID) { struct vmci_datagram get_cid_msg; get_cid_msg.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_GET_CONTEXT_ID); get_cid_msg.src = VMCI_ANON_SRC_HANDLE; get_cid_msg.payload_size = 0; vm_context_id = vmci_send_datagram(&get_cid_msg); } return vm_context_id; } static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) { if (dev->mmio_base != NULL) return readl(dev->mmio_base + reg); return ioread32(dev->iobase + reg); } static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) { if (dev->mmio_base != NULL) writel(val, dev->mmio_base + reg); else iowrite32(val, dev->iobase + reg); } static void vmci_read_data(struct vmci_guest_device *vmci_dev, void *dest, size_t size) { if (vmci_dev->mmio_base == NULL) ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, dest, size); else { /* * For DMA datagrams, the data_buffer will contain the header on the * first page, followed by the incoming datagram(s) on the following * pages. The header uses an S/G element immediately following the * header on the first page to point to the data area. */ struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); size_t buffer_offset = dest - vmci_dev->data_buffer; buffer_header->opcode = 1; buffer_header->size = 1; buffer_header->busy = 0; sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; sg_array[0].size = size; vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), VMCI_DATA_IN_LOW_ADDR); wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); } } static int vmci_write_data(struct vmci_guest_device *dev, struct vmci_datagram *dg) { int result; if (dev->mmio_base != NULL) { struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; u8 *dg_out_buffer = (u8 *)(buffer_header + 1); if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) return VMCI_ERROR_INVALID_ARGS; /* * Initialize send buffer with outgoing datagram * and set up header for inline data. Device will * not access buffer asynchronously - only after * the write to VMCI_DATA_OUT_LOW_ADDR. */ memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); buffer_header->opcode = 0; buffer_header->size = VMCI_DG_SIZE(dg); buffer_header->busy = 1; vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), VMCI_DATA_OUT_LOW_ADDR); /* Caller holds a spinlock, so cannot block. */ spin_until_cond(buffer_header->busy == 0); result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); if (result == VMCI_SUCCESS) result = (int)buffer_header->result; } else { iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, dg, VMCI_DG_SIZE(dg)); result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); } return result; } /* * VM to hypervisor call mechanism. We use the standard VMware naming * convention since shared code is calling this function as well. */ int vmci_send_datagram(struct vmci_datagram *dg) { unsigned long flags; int result; /* Check args. */ if (dg == NULL) return VMCI_ERROR_INVALID_ARGS; /* * Need to acquire spinlock on the device because the datagram * data may be spread over multiple pages and the monitor may * interleave device user rpc calls from multiple * VCPUs. Acquiring the spinlock precludes that * possibility. Disabling interrupts to avoid incoming * datagrams during a "rep out" and possibly landing up in * this function. */ spin_lock_irqsave(&vmci_dev_spinlock, flags); if (vmci_dev_g) { vmci_write_data(vmci_dev_g, dg); result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); } else { result = VMCI_ERROR_UNAVAILABLE; } spin_unlock_irqrestore(&vmci_dev_spinlock, flags); return result; } EXPORT_SYMBOL_GPL(vmci_send_datagram); /* * Gets called with the new context id if updated or resumed. * Context id. */ static void vmci_guest_cid_update(u32 sub_id, const struct vmci_event_data *event_data, void *client_data) { const struct vmci_event_payld_ctx *ev_payload = vmci_event_data_const_payload(event_data); if (sub_id != ctx_update_sub_id) { pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); return; } if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { pr_devel("Invalid event data\n"); return; } pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", vm_context_id, ev_payload->context_id, event_data->event); vm_context_id = ev_payload->context_id; } /* * Verify that the host supports the hypercalls we need. If it does not, * try to find fallback hypercalls and use those instead. Returns 0 if * required hypercalls (or fallback hypercalls) are supported by the host, * an error code otherwise. */ static int vmci_check_host_caps(struct pci_dev *pdev) { bool result; struct vmci_resource_query_msg *msg; u32 msg_size = sizeof(struct vmci_resource_query_hdr) + VMCI_UTIL_NUM_RESOURCES * sizeof(u32); struct vmci_datagram *check_msg; check_msg = kzalloc(msg_size, GFP_KERNEL); if (!check_msg) { dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); return -ENOMEM; } check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_RESOURCES_QUERY); check_msg->src = VMCI_ANON_SRC_HANDLE; check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); msg->num_resources = VMCI_UTIL_NUM_RESOURCES; msg->resources[0] = VMCI_GET_CONTEXT_ID; /* Checks that hyper calls are supported */ result = vmci_send_datagram(check_msg) == 0x01; kfree(check_msg); dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", __func__, result ? "PASSED" : "FAILED"); /* We need the vector. There are no fallbacks. */ return result ? 0 : -ENXIO; } /* * Reads datagrams from the device and dispatches them. For IO port * based access to the device, we always start reading datagrams into * only the first page of the datagram buffer. If the datagrams don't * fit into one page, we use the maximum datagram buffer size for the * remainder of the invocation. This is a simple heuristic for not * penalizing small datagrams. For DMA-based datagrams, we always * use the maximum datagram buffer size, since there is no performance * penalty for doing so. * * This function assumes that it has exclusive access to the data * in register(s) for the duration of the call. */ static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev) { u8 *dg_in_buffer = vmci_dev->data_buffer; struct vmci_datagram *dg; size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; size_t current_dg_in_buffer_size; size_t remaining_bytes; bool is_io_port = vmci_dev->mmio_base == NULL; BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); if (!is_io_port) { /* For mmio, the first page is used for the header. */ dg_in_buffer += PAGE_SIZE; /* * For DMA-based datagram operations, there is no performance * penalty for reading the maximum buffer size. */ current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; } else { current_dg_in_buffer_size = PAGE_SIZE; } vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); dg = (struct vmci_datagram *)dg_in_buffer; remaining_bytes = current_dg_in_buffer_size; /* * Read through the buffer until an invalid datagram header is * encountered. The exit condition for datagrams read through * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram * can start on any page boundary in the buffer. */ while (dg->dst.resource != VMCI_INVALID_ID || (is_io_port && remaining_bytes > PAGE_SIZE)) { unsigned dg_in_size; /* * If using VMCI_DATA_IN_ADDR, skip to the next page * as a datagram can start on any page boundary. */ if (dg->dst.resource == VMCI_INVALID_ID) { dg = (struct vmci_datagram *)roundup( (uintptr_t)dg + 1, PAGE_SIZE); remaining_bytes = (size_t)(dg_in_buffer + current_dg_in_buffer_size - (u8 *)dg); continue; } dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); if (dg_in_size <= dg_in_buffer_size) { int result; /* * If the remaining bytes in the datagram * buffer doesn't contain the complete * datagram, we first make sure we have enough * room for it and then we read the reminder * of the datagram and possibly any following * datagrams. */ if (dg_in_size > remaining_bytes) { if (remaining_bytes != current_dg_in_buffer_size) { /* * We move the partial * datagram to the front and * read the reminder of the * datagram and possibly * following calls into the * following bytes. */ memmove(dg_in_buffer, dg_in_buffer + current_dg_in_buffer_size - remaining_bytes, remaining_bytes); dg = (struct vmci_datagram *) dg_in_buffer; } if (current_dg_in_buffer_size != dg_in_buffer_size) current_dg_in_buffer_size = dg_in_buffer_size; vmci_read_data(vmci_dev, dg_in_buffer + remaining_bytes, current_dg_in_buffer_size - remaining_bytes); } /* * We special case event datagrams from the * hypervisor. */ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && dg->dst.resource == VMCI_EVENT_HANDLER) { result = vmci_event_dispatch(dg); } else { result = vmci_datagram_invoke_guest_handler(dg); } if (result < VMCI_SUCCESS) dev_dbg(vmci_dev->dev, "Datagram with resource (ID=0x%x) failed (err=%d)\n", dg->dst.resource, result); /* On to the next datagram. */ dg = (struct vmci_datagram *)((u8 *)dg + dg_in_size); } else { size_t bytes_to_skip; /* * Datagram doesn't fit in datagram buffer of maximal * size. We drop it. */ dev_dbg(vmci_dev->dev, "Failed to receive datagram (size=%u bytes)\n", dg_in_size); bytes_to_skip = dg_in_size - remaining_bytes; if (current_dg_in_buffer_size != dg_in_buffer_size) current_dg_in_buffer_size = dg_in_buffer_size; for (;;) { vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); if (bytes_to_skip <= current_dg_in_buffer_size) break; bytes_to_skip -= current_dg_in_buffer_size; } dg = (struct vmci_datagram *)(dg_in_buffer + bytes_to_skip); } remaining_bytes = (size_t) (dg_in_buffer + current_dg_in_buffer_size - (u8 *)dg); if (remaining_bytes < VMCI_DG_HEADERSIZE) { /* Get the next batch of datagrams. */ vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); dg = (struct vmci_datagram *)dg_in_buffer; remaining_bytes = current_dg_in_buffer_size; } } } /* * Scans the notification bitmap for raised flags, clears them * and handles the notifications. */ static void vmci_process_bitmap(struct vmci_guest_device *dev) { if (!dev->notification_bitmap) { dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); return; } vmci_dbell_scan_notification_entries(dev->notification_bitmap); } /* * Interrupt handler for legacy or MSI interrupt, or for first MSI-X * interrupt (vector VMCI_INTR_DATAGRAM). */ static irqreturn_t vmci_interrupt(int irq, void *_dev) { struct vmci_guest_device *dev = _dev; /* * If we are using MSI-X with exclusive vectors then we simply call * vmci_dispatch_dgs(), since we know the interrupt was meant for us. * Otherwise we must read the ICR to determine what to do. */ if (dev->exclusive_vectors) { vmci_dispatch_dgs(dev); } else { unsigned int icr; /* Acknowledge interrupt and determine what needs doing. */ icr = vmci_read_reg(dev, VMCI_ICR_ADDR); if (icr == 0 || icr == ~0) return IRQ_NONE; if (icr & VMCI_ICR_DATAGRAM) { vmci_dispatch_dgs(dev); icr &= ~VMCI_ICR_DATAGRAM; } if (icr & VMCI_ICR_NOTIFICATION) { vmci_process_bitmap(dev); icr &= ~VMCI_ICR_NOTIFICATION; } if (icr & VMCI_ICR_DMA_DATAGRAM) { wake_up_all(&dev->inout_wq); icr &= ~VMCI_ICR_DMA_DATAGRAM; } if (icr != 0) dev_warn(dev->dev, "Ignoring unknown interrupt cause (%d)\n", icr); } return IRQ_HANDLED; } /* * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, * which is for the notification bitmap. Will only get called if we are * using MSI-X with exclusive vectors. */ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) { struct vmci_guest_device *dev = _dev; /* For MSI-X we can just assume it was meant for us. */ vmci_process_bitmap(dev); return IRQ_HANDLED; } /* * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, * which is for the completion of a DMA datagram send or receive operation. * Will only get called if we are using MSI-X with exclusive vectors. */ static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) { struct vmci_guest_device *dev = _dev; wake_up_all(&dev->inout_wq); return IRQ_HANDLED; } static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) { if (vmci_dev->mmio_base != NULL) { if (vmci_dev->tx_buffer != NULL) dma_free_coherent(vmci_dev->dev, VMCI_DMA_DG_BUFFER_SIZE, vmci_dev->tx_buffer, vmci_dev->tx_buffer_base); if (vmci_dev->data_buffer != NULL) dma_free_coherent(vmci_dev->dev, VMCI_DMA_DG_BUFFER_SIZE, vmci_dev->data_buffer, vmci_dev->data_buffer_base); } else { vfree(vmci_dev->data_buffer); } } /* * Most of the initialization at module load time is done here. */ static int vmci_guest_probe_device(struct pci_dev *pdev, const struct pci_device_id *id) { struct vmci_guest_device *vmci_dev; void __iomem *iobase = NULL; void __iomem *mmio_base = NULL; unsigned int num_irq_vectors; unsigned int capabilities; unsigned int caps_in_use; unsigned long cmd; int vmci_err; int error; dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); error = pcim_enable_device(pdev); if (error) { dev_err(&pdev->dev, "Failed to enable VMCI device: %d\n", error); return error; } /* * The VMCI device with mmio access to registers requests 256KB * for BAR1. If present, driver will use new VMCI device * functionality for register access and datagram send/recv. */ if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { dev_info(&pdev->dev, "MMIO register access is available\n"); mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, VMCI_MMIO_ACCESS_SIZE); /* If the map fails, we fall back to IOIO access. */ if (!mmio_base) dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); } if (!mmio_base) { if (IS_ENABLED(CONFIG_ARM64)) { dev_err(&pdev->dev, "MMIO base is invalid\n"); return -ENXIO; } error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); if (error) { dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); return error; } iobase = pcim_iomap_table(pdev)[0]; } vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); if (!vmci_dev) { dev_err(&pdev->dev, "Can't allocate memory for VMCI device\n"); return -ENOMEM; } vmci_dev->dev = &pdev->dev; vmci_dev->exclusive_vectors = false; vmci_dev->iobase = iobase; vmci_dev->mmio_base = mmio_base; init_waitqueue_head(&vmci_dev->inout_wq); if (mmio_base != NULL) { vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, &vmci_dev->tx_buffer_base, GFP_KERNEL); if (!vmci_dev->tx_buffer) { dev_err(&pdev->dev, "Can't allocate memory for datagram tx buffer\n"); return -ENOMEM; } vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, &vmci_dev->data_buffer_base, GFP_KERNEL); } else { vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); } if (!vmci_dev->data_buffer) { dev_err(&pdev->dev, "Can't allocate memory for datagram buffer\n"); error = -ENOMEM; goto err_free_data_buffers; } pci_set_master(pdev); /* To enable queue_pair functionality. */ /* * Verify that the VMCI Device supports the capabilities that * we need. If the device is missing capabilities that we would * like to use, check for fallback capabilities and use those * instead (so we can run a new VM on old hosts). Fail the load if * a required capability is missing and there is no fallback. * * Right now, we need datagrams. There are no fallbacks. */ capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); if (!(capabilities & VMCI_CAPS_DATAGRAM)) { dev_err(&pdev->dev, "Device does not support datagrams\n"); error = -ENXIO; goto err_free_data_buffers; } caps_in_use = VMCI_CAPS_DATAGRAM; /* * Use 64-bit PPNs if the device supports. * * There is no check for the return value of dma_set_mask_and_coherent * since this driver can handle the default mask values if * dma_set_mask_and_coherent fails. */ if (capabilities & VMCI_CAPS_PPN64) { dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); use_ppn64 = true; caps_in_use |= VMCI_CAPS_PPN64; } else { dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); use_ppn64 = false; } /* * If the hardware supports notifications, we will use that as * well. */ if (capabilities & VMCI_CAPS_NOTIFICATIONS) { vmci_dev->notification_bitmap = dma_alloc_coherent( &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, GFP_KERNEL); if (!vmci_dev->notification_bitmap) dev_warn(&pdev->dev, "Unable to allocate notification bitmap\n"); else caps_in_use |= VMCI_CAPS_NOTIFICATIONS; } if (mmio_base != NULL) { if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; } else { dev_err(&pdev->dev, "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); error = -ENXIO; goto err_free_notification_bitmap; } } dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); /* Let the host know which capabilities we intend to use. */ vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { /* Let the device know the size for pages passed down. */ vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); /* Configure the high order parts of the data in/out buffers. */ vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), VMCI_DATA_IN_HIGH_ADDR); vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), VMCI_DATA_OUT_HIGH_ADDR); } /* Set up global device so that we can start sending datagrams */ spin_lock_irq(&vmci_dev_spinlock); vmci_dev_g = vmci_dev; vmci_pdev = pdev; spin_unlock_irq(&vmci_dev_spinlock); /* * Register notification bitmap with device if that capability is * used. */ if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) { unsigned long bitmap_ppn = vmci_dev->notification_base >> PAGE_SHIFT; if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { dev_warn(&pdev->dev, "VMCI device unable to register notification bitmap with PPN 0x%lx\n", bitmap_ppn); error = -ENXIO; goto err_remove_vmci_dev_g; } } /* Check host capabilities. */ error = vmci_check_host_caps(pdev); if (error) goto err_remove_vmci_dev_g; /* Enable device. */ /* * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can * update the internal context id when needed. */ vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, vmci_guest_cid_update, NULL, &ctx_update_sub_id); if (vmci_err < VMCI_SUCCESS) dev_warn(&pdev->dev, "Failed to subscribe to event (type=%d): %d\n", VMCI_EVENT_CTX_ID_UPDATE, vmci_err); /* * Enable interrupts. Try MSI-X first, then MSI, and then fallback on * legacy interrupts. */ if (vmci_dev->mmio_base != NULL) num_irq_vectors = VMCI_MAX_INTRS; else num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, PCI_IRQ_MSIX); if (error < 0) { error = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); if (error < 0) goto err_unsubscribe_event; } else { vmci_dev->exclusive_vectors = true; } /* * Request IRQ for legacy or MSI interrupts, or for first * MSI-X vector. */ error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, vmci_interrupt, IRQF_SHARED, KBUILD_MODNAME, vmci_dev); if (error) { dev_err(&pdev->dev, "Irq %u in use: %d\n", pci_irq_vector(pdev, 0), error); goto err_disable_msi; } /* * For MSI-X with exclusive vectors we need to request an * interrupt for each vector so that we get a separate * interrupt handler routine. This allows us to distinguish * between the vectors. */ if (vmci_dev->exclusive_vectors) { error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL, vmci_interrupt_bm, 0, KBUILD_MODNAME, vmci_dev); if (error) { dev_err(&pdev->dev, "Failed to allocate irq %u: %d\n", pci_irq_vector(pdev, 1), error); goto err_free_irq; } if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { error = request_threaded_irq(pci_irq_vector(pdev, 2), NULL, vmci_interrupt_dma_datagram, 0, KBUILD_MODNAME, vmci_dev); if (error) { dev_err(&pdev->dev, "Failed to allocate irq %u: %d\n", pci_irq_vector(pdev, 2), error); goto err_free_bm_irq; } } } dev_dbg(&pdev->dev, "Registered device\n"); atomic_inc(&vmci_num_guest_devices); /* Enable specific interrupt bits. */ cmd = VMCI_IMR_DATAGRAM; if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) cmd |= VMCI_IMR_NOTIFICATION; if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) cmd |= VMCI_IMR_DMA_DATAGRAM; vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); /* Enable interrupts. */ vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); pci_set_drvdata(pdev, vmci_dev); vmci_call_vsock_callback(false); return 0; err_free_bm_irq: if (vmci_dev->exclusive_vectors) free_irq(pci_irq_vector(pdev, 1), vmci_dev); err_free_irq: free_irq(pci_irq_vector(pdev, 0), vmci_dev); err_disable_msi: pci_free_irq_vectors(pdev); err_unsubscribe_event: vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); if (vmci_err < VMCI_SUCCESS) dev_warn(&pdev->dev, "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); err_remove_vmci_dev_g: spin_lock_irq(&vmci_dev_spinlock); vmci_pdev = NULL; vmci_dev_g = NULL; spin_unlock_irq(&vmci_dev_spinlock); err_free_notification_bitmap: if (vmci_dev->notification_bitmap) { vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); dma_free_coherent(&pdev->dev, PAGE_SIZE, vmci_dev->notification_bitmap, vmci_dev->notification_base); } err_free_data_buffers: vmci_free_dg_buffers(vmci_dev); /* The rest are managed resources and will be freed by PCI core */ return error; } static void vmci_guest_remove_device(struct pci_dev *pdev) { struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); int vmci_err; dev_dbg(&pdev->dev, "Removing device\n"); atomic_dec(&vmci_num_guest_devices); vmci_qp_guest_endpoints_exit(); vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); if (vmci_err < VMCI_SUCCESS) dev_warn(&pdev->dev, "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); spin_lock_irq(&vmci_dev_spinlock); vmci_dev_g = NULL; vmci_pdev = NULL; spin_unlock_irq(&vmci_dev_spinlock); dev_dbg(&pdev->dev, "Resetting vmci device\n"); vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); /* * Free IRQ and then disable MSI/MSI-X as appropriate. For * MSI-X, we might have multiple vectors, each with their own * IRQ, which we must free too. */ if (vmci_dev->exclusive_vectors) { free_irq(pci_irq_vector(pdev, 1), vmci_dev); if (vmci_dev->mmio_base != NULL) free_irq(pci_irq_vector(pdev, 2), vmci_dev); } free_irq(pci_irq_vector(pdev, 0), vmci_dev); pci_free_irq_vectors(pdev); if (vmci_dev->notification_bitmap) { /* * The device reset above cleared the bitmap state of the * device, so we can safely free it here. */ dma_free_coherent(&pdev->dev, PAGE_SIZE, vmci_dev->notification_bitmap, vmci_dev->notification_base); } vmci_free_dg_buffers(vmci_dev); if (vmci_dev->mmio_base != NULL) pci_iounmap(pdev, vmci_dev->mmio_base); /* The rest are managed resources and will be freed by PCI core */ } static const struct pci_device_id vmci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, { 0 }, }; MODULE_DEVICE_TABLE(pci, vmci_ids); static struct pci_driver vmci_guest_driver = { .name = KBUILD_MODNAME, .id_table = vmci_ids, .probe = vmci_guest_probe_device, .remove = vmci_guest_remove_device, }; int __init vmci_guest_init(void) { return pci_register_driver(&vmci_guest_driver); } void __exit vmci_guest_exit(void) { pci_unregister_driver(&vmci_guest_driver); }