1/* $NetBSD: kvmgt.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $ */ 2 3/* 4 * KVMGT - the implementation of Intel mediated pass-through framework for KVM 5 * 6 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 * SOFTWARE. 26 * 27 * Authors: 28 * Kevin Tian <kevin.tian@intel.com> 29 * Jike Song <jike.song@intel.com> 30 * Xiaoguang Chen <xiaoguang.chen@intel.com> 31 */ 32 33#include <sys/cdefs.h> 34__KERNEL_RCSID(0, "$NetBSD: kvmgt.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $"); 35 36#include <linux/init.h> 37#include <linux/device.h> 38#include <linux/mm.h> 39#include <linux/mmu_context.h> 40#include <linux/sched/mm.h> 41#include <linux/types.h> 42#include <linux/list.h> 43#include <linux/rbtree.h> 44#include <linux/spinlock.h> 45#include <linux/eventfd.h> 46#include <linux/uuid.h> 47#include <linux/kvm_host.h> 48#include <linux/vfio.h> 49#include <linux/mdev.h> 50#include <linux/debugfs.h> 51 52#include <linux/nospec.h> 53 54#include "i915_drv.h" 55#include "gvt.h" 56 57static const struct intel_gvt_ops *intel_gvt_ops; 58 59/* helper macros copied from vfio-pci */ 60#define VFIO_PCI_OFFSET_SHIFT 40 61#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) 62#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) 63#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) 64 65#define EDID_BLOB_OFFSET (PAGE_SIZE/2) 66 67#define OPREGION_SIGNATURE "IntelGraphicsMem" 68 69struct vfio_region; 70struct intel_vgpu_regops { 71 size_t (*rw)(struct intel_vgpu *vgpu, char *buf, 72 size_t count, loff_t *ppos, bool iswrite); 73 void (*release)(struct intel_vgpu *vgpu, 74 struct vfio_region *region); 75}; 76 77struct vfio_region { 78 u32 type; 79 u32 subtype; 80 size_t size; 81 u32 flags; 82 const struct intel_vgpu_regops *ops; 83 void *data; 84}; 85 86struct vfio_edid_region { 87 struct vfio_region_gfx_edid vfio_edid_regs; 88 void *edid_blob; 89}; 90 91struct kvmgt_pgfn { 92 gfn_t gfn; 93 struct hlist_node hnode; 94}; 95 96struct kvmgt_guest_info { 97 struct kvm *kvm; 98 struct intel_vgpu *vgpu; 99 struct kvm_page_track_notifier_node track_node; 100#define NR_BKT (1 << 18) 101 struct hlist_head ptable[NR_BKT]; 102#undef NR_BKT 103 struct dentry *debugfs_cache_entries; 104}; 105 106struct gvt_dma { 107 struct intel_vgpu *vgpu; 108 struct rb_node gfn_node; 109 struct rb_node dma_addr_node; 110 gfn_t gfn; 111 dma_addr_t dma_addr; 112 unsigned long size; 113 struct kref ref; 114}; 115 116static inline bool handle_valid(unsigned long handle) 117{ 118 return !!(handle & ~0xff); 119} 120 121static int kvmgt_guest_init(struct mdev_device *mdev); 122static void intel_vgpu_release_work(struct work_struct *work); 123static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); 124 125static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 126 unsigned long size) 127{ 128 int total_pages; 129 int npage; 130 int ret; 131 132 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; 133 134 for (npage = 0; npage < total_pages; npage++) { 135 unsigned long cur_gfn = gfn + npage; 136 137 ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); 138 WARN_ON(ret != 1); 139 } 140} 141 142/* Pin a normal or compound guest page for dma. */ 143static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 144 unsigned long size, struct page **page) 145{ 146 unsigned long base_pfn = 0; 147 int total_pages; 148 int npage; 149 int ret; 150 151 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; 152 /* 153 * We pin the pages one-by-one to avoid allocating a big arrary 154 * on stack to hold pfns. 155 */ 156 for (npage = 0; npage < total_pages; npage++) { 157 unsigned long cur_gfn = gfn + npage; 158 unsigned long pfn; 159 160 ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, 161 IOMMU_READ | IOMMU_WRITE, &pfn); 162 if (ret != 1) { 163 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", 164 cur_gfn, ret); 165 goto err; 166 } 167 168 if (!pfn_valid(pfn)) { 169 gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); 170 npage++; 171 ret = -EFAULT; 172 goto err; 173 } 174 175 if (npage == 0) 176 base_pfn = pfn; 177 else if (base_pfn + npage != pfn) { 178 gvt_vgpu_err("The pages are not continuous\n"); 179 ret = -EINVAL; 180 npage++; 181 goto err; 182 } 183 } 184 185 *page = pfn_to_page(base_pfn); 186 return 0; 187err: 188 gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); 189 return ret; 190} 191 192static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, 193 dma_addr_t *dma_addr, unsigned long size) 194{ 195 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; 196 struct page *page = NULL; 197 int ret; 198 199 ret = gvt_pin_guest_page(vgpu, gfn, size, &page); 200 if (ret) 201 return ret; 202 203 /* Setup DMA mapping. */ 204 *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); 205 if (dma_mapping_error(dev, *dma_addr)) { 206 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", 207 page_to_pfn(page), ret); 208 gvt_unpin_guest_page(vgpu, gfn, size); 209 return -ENOMEM; 210 } 211 212 return 0; 213} 214 215static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, 216 dma_addr_t dma_addr, unsigned long size) 217{ 218 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; 219 220 dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); 221 gvt_unpin_guest_page(vgpu, gfn, size); 222} 223 224static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, 225 dma_addr_t dma_addr) 226{ 227 struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node; 228 struct gvt_dma *itr; 229 230 while (node) { 231 itr = rb_entry(node, struct gvt_dma, dma_addr_node); 232 233 if (dma_addr < itr->dma_addr) 234 node = node->rb_left; 235 else if (dma_addr > itr->dma_addr) 236 node = node->rb_right; 237 else 238 return itr; 239 } 240 return NULL; 241} 242 243static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) 244{ 245 struct rb_node *node = vgpu->vdev.gfn_cache.rb_node; 246 struct gvt_dma *itr; 247 248 while (node) { 249 itr = rb_entry(node, struct gvt_dma, gfn_node); 250 251 if (gfn < itr->gfn) 252 node = node->rb_left; 253 else if (gfn > itr->gfn) 254 node = node->rb_right; 255 else 256 return itr; 257 } 258 return NULL; 259} 260 261static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, 262 dma_addr_t dma_addr, unsigned long size) 263{ 264 struct gvt_dma *new, *itr; 265 struct rb_node **link, *parent = NULL; 266 267 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); 268 if (!new) 269 return -ENOMEM; 270 271 new->vgpu = vgpu; 272 new->gfn = gfn; 273 new->dma_addr = dma_addr; 274 new->size = size; 275 kref_init(&new->ref); 276 277 /* gfn_cache maps gfn to struct gvt_dma. */ 278 link = &vgpu->vdev.gfn_cache.rb_node; 279 while (*link) { 280 parent = *link; 281 itr = rb_entry(parent, struct gvt_dma, gfn_node); 282 283 if (gfn < itr->gfn) 284 link = &parent->rb_left; 285 else 286 link = &parent->rb_right; 287 } 288 rb_link_node(&new->gfn_node, parent, link); 289 rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache); 290 291 /* dma_addr_cache maps dma addr to struct gvt_dma. */ 292 parent = NULL; 293 link = &vgpu->vdev.dma_addr_cache.rb_node; 294 while (*link) { 295 parent = *link; 296 itr = rb_entry(parent, struct gvt_dma, dma_addr_node); 297 298 if (dma_addr < itr->dma_addr) 299 link = &parent->rb_left; 300 else 301 link = &parent->rb_right; 302 } 303 rb_link_node(&new->dma_addr_node, parent, link); 304 rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); 305 306 vgpu->vdev.nr_cache_entries++; 307 return 0; 308} 309 310static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, 311 struct gvt_dma *entry) 312{ 313 rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache); 314 rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache); 315 kfree(entry); 316 vgpu->vdev.nr_cache_entries--; 317} 318 319static void gvt_cache_destroy(struct intel_vgpu *vgpu) 320{ 321 struct gvt_dma *dma; 322 struct rb_node *node = NULL; 323 324 for (;;) { 325 mutex_lock(&vgpu->vdev.cache_lock); 326 node = rb_first(&vgpu->vdev.gfn_cache); 327 if (!node) { 328 mutex_unlock(&vgpu->vdev.cache_lock); 329 break; 330 } 331 dma = rb_entry(node, struct gvt_dma, gfn_node); 332 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); 333 __gvt_cache_remove_entry(vgpu, dma); 334 mutex_unlock(&vgpu->vdev.cache_lock); 335 } 336} 337 338static void gvt_cache_init(struct intel_vgpu *vgpu) 339{ 340 vgpu->vdev.gfn_cache = RB_ROOT; 341 vgpu->vdev.dma_addr_cache = RB_ROOT; 342 vgpu->vdev.nr_cache_entries = 0; 343 mutex_init(&vgpu->vdev.cache_lock); 344} 345 346static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) 347{ 348 hash_init(info->ptable); 349} 350 351static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) 352{ 353 struct kvmgt_pgfn *p; 354 struct hlist_node *tmp; 355 int i; 356 357 hash_for_each_safe(info->ptable, i, tmp, p, hnode) { 358 hash_del(&p->hnode); 359 kfree(p); 360 } 361} 362 363static struct kvmgt_pgfn * 364__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) 365{ 366 struct kvmgt_pgfn *p, *res = NULL; 367 368 hash_for_each_possible(info->ptable, p, hnode, gfn) { 369 if (gfn == p->gfn) { 370 res = p; 371 break; 372 } 373 } 374 375 return res; 376} 377 378static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, 379 gfn_t gfn) 380{ 381 struct kvmgt_pgfn *p; 382 383 p = __kvmgt_protect_table_find(info, gfn); 384 return !!p; 385} 386 387static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) 388{ 389 struct kvmgt_pgfn *p; 390 391 if (kvmgt_gfn_is_write_protected(info, gfn)) 392 return; 393 394 p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); 395 if (WARN(!p, "gfn: 0x%llx\n", gfn)) 396 return; 397 398 p->gfn = gfn; 399 hash_add(info->ptable, &p->hnode, gfn); 400} 401 402static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, 403 gfn_t gfn) 404{ 405 struct kvmgt_pgfn *p; 406 407 p = __kvmgt_protect_table_find(info, gfn); 408 if (p) { 409 hash_del(&p->hnode); 410 kfree(p); 411 } 412} 413 414static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, 415 size_t count, loff_t *ppos, bool iswrite) 416{ 417 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - 418 VFIO_PCI_NUM_REGIONS; 419 void *base = vgpu->vdev.region[i].data; 420 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 421 422 if (pos >= vgpu->vdev.region[i].size || iswrite) { 423 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); 424 return -EINVAL; 425 } 426 count = min(count, (size_t)(vgpu->vdev.region[i].size - pos)); 427 memcpy(buf, base + pos, count); 428 429 return count; 430} 431 432static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu, 433 struct vfio_region *region) 434{ 435} 436 437static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { 438 .rw = intel_vgpu_reg_rw_opregion, 439 .release = intel_vgpu_reg_release_opregion, 440}; 441 442static int handle_edid_regs(struct intel_vgpu *vgpu, 443 struct vfio_edid_region *region, char *buf, 444 size_t count, u16 offset, bool is_write) 445{ 446 struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; 447 unsigned int data; 448 449 if (offset + count > sizeof(*regs)) 450 return -EINVAL; 451 452 if (count != 4) 453 return -EINVAL; 454 455 if (is_write) { 456 data = *((unsigned int *)buf); 457 switch (offset) { 458 case offsetof(struct vfio_region_gfx_edid, link_state): 459 if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { 460 if (!drm_edid_block_valid( 461 (u8 *)region->edid_blob, 462 0, 463 true, 464 NULL)) { 465 gvt_vgpu_err("invalid EDID blob\n"); 466 return -EINVAL; 467 } 468 intel_gvt_ops->emulate_hotplug(vgpu, true); 469 } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) 470 intel_gvt_ops->emulate_hotplug(vgpu, false); 471 else { 472 gvt_vgpu_err("invalid EDID link state %d\n", 473 regs->link_state); 474 return -EINVAL; 475 } 476 regs->link_state = data; 477 break; 478 case offsetof(struct vfio_region_gfx_edid, edid_size): 479 if (data > regs->edid_max_size) { 480 gvt_vgpu_err("EDID size is bigger than %d!\n", 481 regs->edid_max_size); 482 return -EINVAL; 483 } 484 regs->edid_size = data; 485 break; 486 default: 487 /* read-only regs */ 488 gvt_vgpu_err("write read-only EDID region at offset %d\n", 489 offset); 490 return -EPERM; 491 } 492 } else { 493 memcpy(buf, (char *)regs + offset, count); 494 } 495 496 return count; 497} 498 499static int handle_edid_blob(struct vfio_edid_region *region, char *buf, 500 size_t count, u16 offset, bool is_write) 501{ 502 if (offset + count > region->vfio_edid_regs.edid_size) 503 return -EINVAL; 504 505 if (is_write) 506 memcpy(region->edid_blob + offset, buf, count); 507 else 508 memcpy(buf, region->edid_blob + offset, count); 509 510 return count; 511} 512 513static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, 514 size_t count, loff_t *ppos, bool iswrite) 515{ 516 int ret; 517 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - 518 VFIO_PCI_NUM_REGIONS; 519 struct vfio_edid_region *region = 520 (struct vfio_edid_region *)vgpu->vdev.region[i].data; 521 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 522 523 if (pos < region->vfio_edid_regs.edid_offset) { 524 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); 525 } else { 526 pos -= EDID_BLOB_OFFSET; 527 ret = handle_edid_blob(region, buf, count, pos, iswrite); 528 } 529 530 if (ret < 0) 531 gvt_vgpu_err("failed to access EDID region\n"); 532 533 return ret; 534} 535 536static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, 537 struct vfio_region *region) 538{ 539 kfree(region->data); 540} 541 542static const struct intel_vgpu_regops intel_vgpu_regops_edid = { 543 .rw = intel_vgpu_reg_rw_edid, 544 .release = intel_vgpu_reg_release_edid, 545}; 546 547static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, 548 unsigned int type, unsigned int subtype, 549 const struct intel_vgpu_regops *ops, 550 size_t size, u32 flags, void *data) 551{ 552 struct vfio_region *region; 553 554 region = krealloc(vgpu->vdev.region, 555 (vgpu->vdev.num_regions + 1) * sizeof(*region), 556 GFP_KERNEL); 557 if (!region) 558 return -ENOMEM; 559 560 vgpu->vdev.region = region; 561 vgpu->vdev.region[vgpu->vdev.num_regions].type = type; 562 vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype; 563 vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops; 564 vgpu->vdev.region[vgpu->vdev.num_regions].size = size; 565 vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags; 566 vgpu->vdev.region[vgpu->vdev.num_regions].data = data; 567 vgpu->vdev.num_regions++; 568 return 0; 569} 570 571static int kvmgt_get_vfio_device(void *p_vgpu) 572{ 573 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; 574 575 vgpu->vdev.vfio_device = vfio_device_get_from_dev( 576 mdev_dev(vgpu->vdev.mdev)); 577 if (!vgpu->vdev.vfio_device) { 578 gvt_vgpu_err("failed to get vfio device\n"); 579 return -ENODEV; 580 } 581 return 0; 582} 583 584 585static int kvmgt_set_opregion(void *p_vgpu) 586{ 587 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; 588 void *base; 589 int ret; 590 591 /* Each vgpu has its own opregion, although VFIO would create another 592 * one later. This one is used to expose opregion to VFIO. And the 593 * other one created by VFIO later, is used by guest actually. 594 */ 595 base = vgpu_opregion(vgpu)->va; 596 if (!base) 597 return -ENOMEM; 598 599 if (memcmp(base, OPREGION_SIGNATURE, 16)) { 600 memunmap(base); 601 return -EINVAL; 602 } 603 604 ret = intel_vgpu_register_reg(vgpu, 605 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, 606 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, 607 &intel_vgpu_regops_opregion, OPREGION_SIZE, 608 VFIO_REGION_INFO_FLAG_READ, base); 609 610 return ret; 611} 612 613static int kvmgt_set_edid(void *p_vgpu, int port_num) 614{ 615 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; 616 struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); 617 struct vfio_edid_region *base; 618 int ret; 619 620 base = kzalloc(sizeof(*base), GFP_KERNEL); 621 if (!base) 622 return -ENOMEM; 623 624 /* TODO: Add multi-port and EDID extension block support */ 625 base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; 626 base->vfio_edid_regs.edid_max_size = EDID_SIZE; 627 base->vfio_edid_regs.edid_size = EDID_SIZE; 628 base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); 629 base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); 630 base->edid_blob = port->edid->edid_block; 631 632 ret = intel_vgpu_register_reg(vgpu, 633 VFIO_REGION_TYPE_GFX, 634 VFIO_REGION_SUBTYPE_GFX_EDID, 635 &intel_vgpu_regops_edid, EDID_SIZE, 636 VFIO_REGION_INFO_FLAG_READ | 637 VFIO_REGION_INFO_FLAG_WRITE | 638 VFIO_REGION_INFO_FLAG_CAPS, base); 639 640 return ret; 641} 642 643static void kvmgt_put_vfio_device(void *vgpu) 644{ 645 if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) 646 return; 647 648 vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device); 649} 650 651static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) 652{ 653 struct intel_vgpu *vgpu = NULL; 654 struct intel_vgpu_type *type; 655 struct device *pdev; 656 void *gvt; 657 int ret; 658 659 pdev = mdev_parent_dev(mdev); 660 gvt = kdev_to_i915(pdev)->gvt; 661 662 type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj)); 663 if (!type) { 664 gvt_vgpu_err("failed to find type %s to create\n", 665 kobject_name(kobj)); 666 ret = -EINVAL; 667 goto out; 668 } 669 670 vgpu = intel_gvt_ops->vgpu_create(gvt, type); 671 if (IS_ERR_OR_NULL(vgpu)) { 672 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); 673 gvt_err("failed to create intel vgpu: %d\n", ret); 674 goto out; 675 } 676 677 INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work); 678 679 vgpu->vdev.mdev = mdev; 680 mdev_set_drvdata(mdev, vgpu); 681 682 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", 683 dev_name(mdev_dev(mdev))); 684 ret = 0; 685 686out: 687 return ret; 688} 689 690static int intel_vgpu_remove(struct mdev_device *mdev) 691{ 692 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 693 694 if (handle_valid(vgpu->handle)) 695 return -EBUSY; 696 697 intel_gvt_ops->vgpu_destroy(vgpu); 698 return 0; 699} 700 701static int intel_vgpu_iommu_notifier(struct notifier_block *nb, 702 unsigned long action, void *data) 703{ 704 struct intel_vgpu *vgpu = container_of(nb, 705 struct intel_vgpu, 706 vdev.iommu_notifier); 707 708 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { 709 struct vfio_iommu_type1_dma_unmap *unmap = data; 710 struct gvt_dma *entry; 711 unsigned long iov_pfn, end_iov_pfn; 712 713 iov_pfn = unmap->iova >> PAGE_SHIFT; 714 end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; 715 716 mutex_lock(&vgpu->vdev.cache_lock); 717 for (; iov_pfn < end_iov_pfn; iov_pfn++) { 718 entry = __gvt_cache_find_gfn(vgpu, iov_pfn); 719 if (!entry) 720 continue; 721 722 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, 723 entry->size); 724 __gvt_cache_remove_entry(vgpu, entry); 725 } 726 mutex_unlock(&vgpu->vdev.cache_lock); 727 } 728 729 return NOTIFY_OK; 730} 731 732static int intel_vgpu_group_notifier(struct notifier_block *nb, 733 unsigned long action, void *data) 734{ 735 struct intel_vgpu *vgpu = container_of(nb, 736 struct intel_vgpu, 737 vdev.group_notifier); 738 739 /* the only action we care about */ 740 if (action == VFIO_GROUP_NOTIFY_SET_KVM) { 741 vgpu->vdev.kvm = data; 742 743 if (!data) 744 schedule_work(&vgpu->vdev.release_work); 745 } 746 747 return NOTIFY_OK; 748} 749 750static int intel_vgpu_open(struct mdev_device *mdev) 751{ 752 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 753 unsigned long events; 754 int ret; 755 756 vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; 757 vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier; 758 759 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; 760 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, 761 &vgpu->vdev.iommu_notifier); 762 if (ret != 0) { 763 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", 764 ret); 765 goto out; 766 } 767 768 events = VFIO_GROUP_NOTIFY_SET_KVM; 769 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, 770 &vgpu->vdev.group_notifier); 771 if (ret != 0) { 772 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", 773 ret); 774 goto undo_iommu; 775 } 776 777 /* Take a module reference as mdev core doesn't take 778 * a reference for vendor driver. 779 */ 780 if (!try_module_get(THIS_MODULE)) 781 goto undo_group; 782 783 ret = kvmgt_guest_init(mdev); 784 if (ret) 785 goto undo_group; 786 787 intel_gvt_ops->vgpu_activate(vgpu); 788 789 atomic_set(&vgpu->vdev.released, 0); 790 return ret; 791 792undo_group: 793 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, 794 &vgpu->vdev.group_notifier); 795 796undo_iommu: 797 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, 798 &vgpu->vdev.iommu_notifier); 799out: 800 return ret; 801} 802 803static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) 804{ 805 struct eventfd_ctx *trigger; 806 807 trigger = vgpu->vdev.msi_trigger; 808 if (trigger) { 809 eventfd_ctx_put(trigger); 810 vgpu->vdev.msi_trigger = NULL; 811 } 812} 813 814static void __intel_vgpu_release(struct intel_vgpu *vgpu) 815{ 816 struct kvmgt_guest_info *info; 817 int ret; 818 819 if (!handle_valid(vgpu->handle)) 820 return; 821 822 if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) 823 return; 824 825 intel_gvt_ops->vgpu_release(vgpu); 826 827 ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, 828 &vgpu->vdev.iommu_notifier); 829 WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); 830 831 ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY, 832 &vgpu->vdev.group_notifier); 833 WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret); 834 835 /* dereference module reference taken at open */ 836 module_put(THIS_MODULE); 837 838 info = (struct kvmgt_guest_info *)vgpu->handle; 839 kvmgt_guest_exit(info); 840 841 intel_vgpu_release_msi_eventfd_ctx(vgpu); 842 843 vgpu->vdev.kvm = NULL; 844 vgpu->handle = 0; 845} 846 847static void intel_vgpu_release(struct mdev_device *mdev) 848{ 849 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 850 851 __intel_vgpu_release(vgpu); 852} 853 854static void intel_vgpu_release_work(struct work_struct *work) 855{ 856 struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu, 857 vdev.release_work); 858 859 __intel_vgpu_release(vgpu); 860} 861 862static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) 863{ 864 u32 start_lo, start_hi; 865 u32 mem_type; 866 867 start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & 868 PCI_BASE_ADDRESS_MEM_MASK; 869 mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & 870 PCI_BASE_ADDRESS_MEM_TYPE_MASK; 871 872 switch (mem_type) { 873 case PCI_BASE_ADDRESS_MEM_TYPE_64: 874 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space 875 + bar + 4)); 876 break; 877 case PCI_BASE_ADDRESS_MEM_TYPE_32: 878 case PCI_BASE_ADDRESS_MEM_TYPE_1M: 879 /* 1M mem BAR treated as 32-bit BAR */ 880 default: 881 /* mem unknown type treated as 32-bit BAR */ 882 start_hi = 0; 883 break; 884 } 885 886 return ((u64)start_hi << 32) | start_lo; 887} 888 889static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, 890 void *buf, unsigned int count, bool is_write) 891{ 892 u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar); 893 int ret; 894 895 if (is_write) 896 ret = intel_gvt_ops->emulate_mmio_write(vgpu, 897 bar_start + off, buf, count); 898 else 899 ret = intel_gvt_ops->emulate_mmio_read(vgpu, 900 bar_start + off, buf, count); 901 return ret; 902} 903 904static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) 905{ 906 return off >= vgpu_aperture_offset(vgpu) && 907 off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); 908} 909 910static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, 911 void *buf, unsigned long count, bool is_write) 912{ 913 void __iomem *aperture_va; 914 915 if (!intel_vgpu_in_aperture(vgpu, off) || 916 !intel_vgpu_in_aperture(vgpu, off + count)) { 917 gvt_vgpu_err("Invalid aperture offset %llu\n", off); 918 return -EINVAL; 919 } 920 921 aperture_va = io_mapping_map_wc(&vgpu->gvt->dev_priv->ggtt.iomap, 922 ALIGN_DOWN(off, PAGE_SIZE), 923 count + offset_in_page(off)); 924 if (!aperture_va) 925 return -EIO; 926 927 if (is_write) 928 memcpy_toio(aperture_va + offset_in_page(off), buf, count); 929 else 930 memcpy_fromio(buf, aperture_va + offset_in_page(off), count); 931 932 io_mapping_unmap(aperture_va); 933 934 return 0; 935} 936 937static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, 938 size_t count, loff_t *ppos, bool is_write) 939{ 940 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 941 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 942 u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; 943 int ret = -EINVAL; 944 945 946 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) { 947 gvt_vgpu_err("invalid index: %u\n", index); 948 return -EINVAL; 949 } 950 951 switch (index) { 952 case VFIO_PCI_CONFIG_REGION_INDEX: 953 if (is_write) 954 ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos, 955 buf, count); 956 else 957 ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos, 958 buf, count); 959 break; 960 case VFIO_PCI_BAR0_REGION_INDEX: 961 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos, 962 buf, count, is_write); 963 break; 964 case VFIO_PCI_BAR2_REGION_INDEX: 965 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write); 966 break; 967 case VFIO_PCI_BAR1_REGION_INDEX: 968 case VFIO_PCI_BAR3_REGION_INDEX: 969 case VFIO_PCI_BAR4_REGION_INDEX: 970 case VFIO_PCI_BAR5_REGION_INDEX: 971 case VFIO_PCI_VGA_REGION_INDEX: 972 case VFIO_PCI_ROM_REGION_INDEX: 973 break; 974 default: 975 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) 976 return -EINVAL; 977 978 index -= VFIO_PCI_NUM_REGIONS; 979 return vgpu->vdev.region[index].ops->rw(vgpu, buf, count, 980 ppos, is_write); 981 } 982 983 return ret == 0 ? count : ret; 984} 985 986static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) 987{ 988 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 989 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 990 struct intel_gvt *gvt = vgpu->gvt; 991 int offset; 992 993 /* Only allow MMIO GGTT entry access */ 994 if (index != PCI_BASE_ADDRESS_0) 995 return false; 996 997 offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) - 998 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); 999 1000 return (offset >= gvt->device_info.gtt_start_offset && 1001 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ? 1002 true : false; 1003} 1004 1005static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, 1006 size_t count, loff_t *ppos) 1007{ 1008 unsigned int done = 0; 1009 int ret; 1010 1011 while (count) { 1012 size_t filled; 1013 1014 /* Only support GGTT entry 8 bytes read */ 1015 if (count >= 8 && !(*ppos % 8) && 1016 gtt_entry(mdev, ppos)) { 1017 u64 val; 1018 1019 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), 1020 ppos, false); 1021 if (ret <= 0) 1022 goto read_err; 1023 1024 if (copy_to_user(buf, &val, sizeof(val))) 1025 goto read_err; 1026 1027 filled = 8; 1028 } else if (count >= 4 && !(*ppos % 4)) { 1029 u32 val; 1030 1031 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), 1032 ppos, false); 1033 if (ret <= 0) 1034 goto read_err; 1035 1036 if (copy_to_user(buf, &val, sizeof(val))) 1037 goto read_err; 1038 1039 filled = 4; 1040 } else if (count >= 2 && !(*ppos % 2)) { 1041 u16 val; 1042 1043 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), 1044 ppos, false); 1045 if (ret <= 0) 1046 goto read_err; 1047 1048 if (copy_to_user(buf, &val, sizeof(val))) 1049 goto read_err; 1050 1051 filled = 2; 1052 } else { 1053 u8 val; 1054 1055 ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos, 1056 false); 1057 if (ret <= 0) 1058 goto read_err; 1059 1060 if (copy_to_user(buf, &val, sizeof(val))) 1061 goto read_err; 1062 1063 filled = 1; 1064 } 1065 1066 count -= filled; 1067 done += filled; 1068 *ppos += filled; 1069 buf += filled; 1070 } 1071 1072 return done; 1073 1074read_err: 1075 return -EFAULT; 1076} 1077 1078static ssize_t intel_vgpu_write(struct mdev_device *mdev, 1079 const char __user *buf, 1080 size_t count, loff_t *ppos) 1081{ 1082 unsigned int done = 0; 1083 int ret; 1084 1085 while (count) { 1086 size_t filled; 1087 1088 /* Only support GGTT entry 8 bytes write */ 1089 if (count >= 8 && !(*ppos % 8) && 1090 gtt_entry(mdev, ppos)) { 1091 u64 val; 1092 1093 if (copy_from_user(&val, buf, sizeof(val))) 1094 goto write_err; 1095 1096 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), 1097 ppos, true); 1098 if (ret <= 0) 1099 goto write_err; 1100 1101 filled = 8; 1102 } else if (count >= 4 && !(*ppos % 4)) { 1103 u32 val; 1104 1105 if (copy_from_user(&val, buf, sizeof(val))) 1106 goto write_err; 1107 1108 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), 1109 ppos, true); 1110 if (ret <= 0) 1111 goto write_err; 1112 1113 filled = 4; 1114 } else if (count >= 2 && !(*ppos % 2)) { 1115 u16 val; 1116 1117 if (copy_from_user(&val, buf, sizeof(val))) 1118 goto write_err; 1119 1120 ret = intel_vgpu_rw(mdev, (char *)&val, 1121 sizeof(val), ppos, true); 1122 if (ret <= 0) 1123 goto write_err; 1124 1125 filled = 2; 1126 } else { 1127 u8 val; 1128 1129 if (copy_from_user(&val, buf, sizeof(val))) 1130 goto write_err; 1131 1132 ret = intel_vgpu_rw(mdev, &val, sizeof(val), 1133 ppos, true); 1134 if (ret <= 0) 1135 goto write_err; 1136 1137 filled = 1; 1138 } 1139 1140 count -= filled; 1141 done += filled; 1142 *ppos += filled; 1143 buf += filled; 1144 } 1145 1146 return done; 1147write_err: 1148 return -EFAULT; 1149} 1150 1151static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) 1152{ 1153 unsigned int index; 1154 u64 virtaddr; 1155 unsigned long req_size, pgoff, req_start; 1156 pgprot_t pg_prot; 1157 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 1158 1159 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); 1160 if (index >= VFIO_PCI_ROM_REGION_INDEX) 1161 return -EINVAL; 1162 1163 if (vma->vm_end < vma->vm_start) 1164 return -EINVAL; 1165 if ((vma->vm_flags & VM_SHARED) == 0) 1166 return -EINVAL; 1167 if (index != VFIO_PCI_BAR2_REGION_INDEX) 1168 return -EINVAL; 1169 1170 pg_prot = vma->vm_page_prot; 1171 virtaddr = vma->vm_start; 1172 req_size = vma->vm_end - vma->vm_start; 1173 pgoff = vma->vm_pgoff & 1174 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); 1175 req_start = pgoff << PAGE_SHIFT; 1176 1177 if (!intel_vgpu_in_aperture(vgpu, req_start)) 1178 return -EINVAL; 1179 if (req_start + req_size > 1180 vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu)) 1181 return -EINVAL; 1182 1183 pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff; 1184 1185 return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot); 1186} 1187 1188static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type) 1189{ 1190 if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX) 1191 return 1; 1192 1193 return 0; 1194} 1195 1196static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, 1197 unsigned int index, unsigned int start, 1198 unsigned int count, u32 flags, 1199 void *data) 1200{ 1201 return 0; 1202} 1203 1204static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu, 1205 unsigned int index, unsigned int start, 1206 unsigned int count, u32 flags, void *data) 1207{ 1208 return 0; 1209} 1210 1211static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu, 1212 unsigned int index, unsigned int start, unsigned int count, 1213 u32 flags, void *data) 1214{ 1215 return 0; 1216} 1217 1218static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, 1219 unsigned int index, unsigned int start, unsigned int count, 1220 u32 flags, void *data) 1221{ 1222 struct eventfd_ctx *trigger; 1223 1224 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 1225 int fd = *(int *)data; 1226 1227 trigger = eventfd_ctx_fdget(fd); 1228 if (IS_ERR(trigger)) { 1229 gvt_vgpu_err("eventfd_ctx_fdget failed\n"); 1230 return PTR_ERR(trigger); 1231 } 1232 vgpu->vdev.msi_trigger = trigger; 1233 } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) 1234 intel_vgpu_release_msi_eventfd_ctx(vgpu); 1235 1236 return 0; 1237} 1238 1239static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, 1240 unsigned int index, unsigned int start, unsigned int count, 1241 void *data) 1242{ 1243 int (*func)(struct intel_vgpu *vgpu, unsigned int index, 1244 unsigned int start, unsigned int count, u32 flags, 1245 void *data) = NULL; 1246 1247 switch (index) { 1248 case VFIO_PCI_INTX_IRQ_INDEX: 1249 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 1250 case VFIO_IRQ_SET_ACTION_MASK: 1251 func = intel_vgpu_set_intx_mask; 1252 break; 1253 case VFIO_IRQ_SET_ACTION_UNMASK: 1254 func = intel_vgpu_set_intx_unmask; 1255 break; 1256 case VFIO_IRQ_SET_ACTION_TRIGGER: 1257 func = intel_vgpu_set_intx_trigger; 1258 break; 1259 } 1260 break; 1261 case VFIO_PCI_MSI_IRQ_INDEX: 1262 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 1263 case VFIO_IRQ_SET_ACTION_MASK: 1264 case VFIO_IRQ_SET_ACTION_UNMASK: 1265 /* XXX Need masking support exported */ 1266 break; 1267 case VFIO_IRQ_SET_ACTION_TRIGGER: 1268 func = intel_vgpu_set_msi_trigger; 1269 break; 1270 } 1271 break; 1272 } 1273 1274 if (!func) 1275 return -ENOTTY; 1276 1277 return func(vgpu, index, start, count, flags, data); 1278} 1279 1280static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, 1281 unsigned long arg) 1282{ 1283 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); 1284 unsigned long minsz; 1285 1286 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd); 1287 1288 if (cmd == VFIO_DEVICE_GET_INFO) { 1289 struct vfio_device_info info; 1290 1291 minsz = offsetofend(struct vfio_device_info, num_irqs); 1292 1293 if (copy_from_user(&info, (void __user *)arg, minsz)) 1294 return -EFAULT; 1295 1296 if (info.argsz < minsz) 1297 return -EINVAL; 1298 1299 info.flags = VFIO_DEVICE_FLAGS_PCI; 1300 info.flags |= VFIO_DEVICE_FLAGS_RESET; 1301 info.num_regions = VFIO_PCI_NUM_REGIONS + 1302 vgpu->vdev.num_regions; 1303 info.num_irqs = VFIO_PCI_NUM_IRQS; 1304 1305 return copy_to_user((void __user *)arg, &info, minsz) ? 1306 -EFAULT : 0; 1307 1308 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { 1309 struct vfio_region_info info; 1310 struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; 1311 unsigned int i; 1312 int ret; 1313 struct vfio_region_info_cap_sparse_mmap *sparse = NULL; 1314 int nr_areas = 1; 1315 int cap_type_id; 1316 1317 minsz = offsetofend(struct vfio_region_info, offset); 1318 1319 if (copy_from_user(&info, (void __user *)arg, minsz)) 1320 return -EFAULT; 1321 1322 if (info.argsz < minsz) 1323 return -EINVAL; 1324 1325 switch (info.index) { 1326 case VFIO_PCI_CONFIG_REGION_INDEX: 1327 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1328 info.size = vgpu->gvt->device_info.cfg_space_size; 1329 info.flags = VFIO_REGION_INFO_FLAG_READ | 1330 VFIO_REGION_INFO_FLAG_WRITE; 1331 break; 1332 case VFIO_PCI_BAR0_REGION_INDEX: 1333 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1334 info.size = vgpu->cfg_space.bar[info.index].size; 1335 if (!info.size) { 1336 info.flags = 0; 1337 break; 1338 } 1339 1340 info.flags = VFIO_REGION_INFO_FLAG_READ | 1341 VFIO_REGION_INFO_FLAG_WRITE; 1342 break; 1343 case VFIO_PCI_BAR1_REGION_INDEX: 1344 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1345 info.size = 0; 1346 info.flags = 0; 1347 break; 1348 case VFIO_PCI_BAR2_REGION_INDEX: 1349 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1350 info.flags = VFIO_REGION_INFO_FLAG_CAPS | 1351 VFIO_REGION_INFO_FLAG_MMAP | 1352 VFIO_REGION_INFO_FLAG_READ | 1353 VFIO_REGION_INFO_FLAG_WRITE; 1354 info.size = gvt_aperture_sz(vgpu->gvt); 1355 1356 sparse = kzalloc(struct_size(sparse, areas, nr_areas), 1357 GFP_KERNEL); 1358 if (!sparse) 1359 return -ENOMEM; 1360 1361 sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1362 sparse->header.version = 1; 1363 sparse->nr_areas = nr_areas; 1364 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1365 sparse->areas[0].offset = 1366 PAGE_ALIGN(vgpu_aperture_offset(vgpu)); 1367 sparse->areas[0].size = vgpu_aperture_sz(vgpu); 1368 break; 1369 1370 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: 1371 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1372 info.size = 0; 1373 info.flags = 0; 1374 1375 gvt_dbg_core("get region info bar:%d\n", info.index); 1376 break; 1377 1378 case VFIO_PCI_ROM_REGION_INDEX: 1379 case VFIO_PCI_VGA_REGION_INDEX: 1380 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1381 info.size = 0; 1382 info.flags = 0; 1383 1384 gvt_dbg_core("get region info index:%d\n", info.index); 1385 break; 1386 default: 1387 { 1388 struct vfio_region_info_cap_type cap_type = { 1389 .header.id = VFIO_REGION_INFO_CAP_TYPE, 1390 .header.version = 1 }; 1391 1392 if (info.index >= VFIO_PCI_NUM_REGIONS + 1393 vgpu->vdev.num_regions) 1394 return -EINVAL; 1395 info.index = 1396 array_index_nospec(info.index, 1397 VFIO_PCI_NUM_REGIONS + 1398 vgpu->vdev.num_regions); 1399 1400 i = info.index - VFIO_PCI_NUM_REGIONS; 1401 1402 info.offset = 1403 VFIO_PCI_INDEX_TO_OFFSET(info.index); 1404 info.size = vgpu->vdev.region[i].size; 1405 info.flags = vgpu->vdev.region[i].flags; 1406 1407 cap_type.type = vgpu->vdev.region[i].type; 1408 cap_type.subtype = vgpu->vdev.region[i].subtype; 1409 1410 ret = vfio_info_add_capability(&caps, 1411 &cap_type.header, 1412 sizeof(cap_type)); 1413 if (ret) 1414 return ret; 1415 } 1416 } 1417 1418 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) { 1419 switch (cap_type_id) { 1420 case VFIO_REGION_INFO_CAP_SPARSE_MMAP: 1421 ret = vfio_info_add_capability(&caps, 1422 &sparse->header, 1423 struct_size(sparse, areas, 1424 sparse->nr_areas)); 1425 if (ret) { 1426 kfree(sparse); 1427 return ret; 1428 } 1429 break; 1430 default: 1431 kfree(sparse); 1432 return -EINVAL; 1433 } 1434 } 1435 1436 if (caps.size) { 1437 info.flags |= VFIO_REGION_INFO_FLAG_CAPS; 1438 if (info.argsz < sizeof(info) + caps.size) { 1439 info.argsz = sizeof(info) + caps.size; 1440 info.cap_offset = 0; 1441 } else { 1442 vfio_info_cap_shift(&caps, sizeof(info)); 1443 if (copy_to_user((void __user *)arg + 1444 sizeof(info), caps.buf, 1445 caps.size)) { 1446 kfree(caps.buf); 1447 kfree(sparse); 1448 return -EFAULT; 1449 } 1450 info.cap_offset = sizeof(info); 1451 } 1452 1453 kfree(caps.buf); 1454 } 1455 1456 kfree(sparse); 1457 return copy_to_user((void __user *)arg, &info, minsz) ? 1458 -EFAULT : 0; 1459 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { 1460 struct vfio_irq_info info; 1461 1462 minsz = offsetofend(struct vfio_irq_info, count); 1463 1464 if (copy_from_user(&info, (void __user *)arg, minsz)) 1465 return -EFAULT; 1466 1467 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS) 1468 return -EINVAL; 1469 1470 switch (info.index) { 1471 case VFIO_PCI_INTX_IRQ_INDEX: 1472 case VFIO_PCI_MSI_IRQ_INDEX: 1473 break; 1474 default: 1475 return -EINVAL; 1476 } 1477 1478 info.flags = VFIO_IRQ_INFO_EVENTFD; 1479 1480 info.count = intel_vgpu_get_irq_count(vgpu, info.index); 1481 1482 if (info.index == VFIO_PCI_INTX_IRQ_INDEX) 1483 info.flags |= (VFIO_IRQ_INFO_MASKABLE | 1484 VFIO_IRQ_INFO_AUTOMASKED); 1485 else 1486 info.flags |= VFIO_IRQ_INFO_NORESIZE; 1487 1488 return copy_to_user((void __user *)arg, &info, minsz) ? 1489 -EFAULT : 0; 1490 } else if (cmd == VFIO_DEVICE_SET_IRQS) { 1491 struct vfio_irq_set hdr; 1492 u8 *data = NULL; 1493 int ret = 0; 1494 size_t data_size = 0; 1495 1496 minsz = offsetofend(struct vfio_irq_set, count); 1497 1498 if (copy_from_user(&hdr, (void __user *)arg, minsz)) 1499 return -EFAULT; 1500 1501 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { 1502 int max = intel_vgpu_get_irq_count(vgpu, hdr.index); 1503 1504 ret = vfio_set_irqs_validate_and_prepare(&hdr, max, 1505 VFIO_PCI_NUM_IRQS, &data_size); 1506 if (ret) { 1507 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); 1508 return -EINVAL; 1509 } 1510 if (data_size) { 1511 data = memdup_user((void __user *)(arg + minsz), 1512 data_size); 1513 if (IS_ERR(data)) 1514 return PTR_ERR(data); 1515 } 1516 } 1517 1518 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index, 1519 hdr.start, hdr.count, data); 1520 kfree(data); 1521 1522 return ret; 1523 } else if (cmd == VFIO_DEVICE_RESET) { 1524 intel_gvt_ops->vgpu_reset(vgpu); 1525 return 0; 1526 } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { 1527 struct vfio_device_gfx_plane_info dmabuf; 1528 int ret = 0; 1529 1530 minsz = offsetofend(struct vfio_device_gfx_plane_info, 1531 dmabuf_id); 1532 if (copy_from_user(&dmabuf, (void __user *)arg, minsz)) 1533 return -EFAULT; 1534 if (dmabuf.argsz < minsz) 1535 return -EINVAL; 1536 1537 ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf); 1538 if (ret != 0) 1539 return ret; 1540 1541 return copy_to_user((void __user *)arg, &dmabuf, minsz) ? 1542 -EFAULT : 0; 1543 } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { 1544 __u32 dmabuf_id; 1545 __s32 dmabuf_fd; 1546 1547 if (get_user(dmabuf_id, (__u32 __user *)arg)) 1548 return -EFAULT; 1549 1550 dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id); 1551 return dmabuf_fd; 1552 1553 } 1554 1555 return -ENOTTY; 1556} 1557 1558static ssize_t 1559vgpu_id_show(struct device *dev, struct device_attribute *attr, 1560 char *buf) 1561{ 1562 struct mdev_device *mdev = mdev_from_dev(dev); 1563 1564 if (mdev) { 1565 struct intel_vgpu *vgpu = (struct intel_vgpu *) 1566 mdev_get_drvdata(mdev); 1567 return sprintf(buf, "%d\n", vgpu->id); 1568 } 1569 return sprintf(buf, "\n"); 1570} 1571 1572static DEVICE_ATTR_RO(vgpu_id); 1573 1574static struct attribute *intel_vgpu_attrs[] = { 1575 &dev_attr_vgpu_id.attr, 1576 NULL 1577}; 1578 1579static const struct attribute_group intel_vgpu_group = { 1580 .name = "intel_vgpu", 1581 .attrs = intel_vgpu_attrs, 1582}; 1583 1584static const struct attribute_group *intel_vgpu_groups[] = { 1585 &intel_vgpu_group, 1586 NULL, 1587}; 1588 1589static struct mdev_parent_ops intel_vgpu_ops = { 1590 .mdev_attr_groups = intel_vgpu_groups, 1591 .create = intel_vgpu_create, 1592 .remove = intel_vgpu_remove, 1593 1594 .open = intel_vgpu_open, 1595 .release = intel_vgpu_release, 1596 1597 .read = intel_vgpu_read, 1598 .write = intel_vgpu_write, 1599 .mmap = intel_vgpu_mmap, 1600 .ioctl = intel_vgpu_ioctl, 1601}; 1602 1603static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) 1604{ 1605 struct attribute **kvm_type_attrs; 1606 struct attribute_group **kvm_vgpu_type_groups; 1607 1608 intel_gvt_ops = ops; 1609 if (!intel_gvt_ops->get_gvt_attrs(&kvm_type_attrs, 1610 &kvm_vgpu_type_groups)) 1611 return -EFAULT; 1612 intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups; 1613 1614 return mdev_register_device(dev, &intel_vgpu_ops); 1615} 1616 1617static void kvmgt_host_exit(struct device *dev) 1618{ 1619 mdev_unregister_device(dev); 1620} 1621 1622static int kvmgt_page_track_add(unsigned long handle, u64 gfn) 1623{ 1624 struct kvmgt_guest_info *info; 1625 struct kvm *kvm; 1626 struct kvm_memory_slot *slot; 1627 int idx; 1628 1629 if (!handle_valid(handle)) 1630 return -ESRCH; 1631 1632 info = (struct kvmgt_guest_info *)handle; 1633 kvm = info->kvm; 1634 1635 idx = srcu_read_lock(&kvm->srcu); 1636 slot = gfn_to_memslot(kvm, gfn); 1637 if (!slot) { 1638 srcu_read_unlock(&kvm->srcu, idx); 1639 return -EINVAL; 1640 } 1641 1642 spin_lock(&kvm->mmu_lock); 1643 1644 if (kvmgt_gfn_is_write_protected(info, gfn)) 1645 goto out; 1646 1647 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 1648 kvmgt_protect_table_add(info, gfn); 1649 1650out: 1651 spin_unlock(&kvm->mmu_lock); 1652 srcu_read_unlock(&kvm->srcu, idx); 1653 return 0; 1654} 1655 1656static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) 1657{ 1658 struct kvmgt_guest_info *info; 1659 struct kvm *kvm; 1660 struct kvm_memory_slot *slot; 1661 int idx; 1662 1663 if (!handle_valid(handle)) 1664 return 0; 1665 1666 info = (struct kvmgt_guest_info *)handle; 1667 kvm = info->kvm; 1668 1669 idx = srcu_read_lock(&kvm->srcu); 1670 slot = gfn_to_memslot(kvm, gfn); 1671 if (!slot) { 1672 srcu_read_unlock(&kvm->srcu, idx); 1673 return -EINVAL; 1674 } 1675 1676 spin_lock(&kvm->mmu_lock); 1677 1678 if (!kvmgt_gfn_is_write_protected(info, gfn)) 1679 goto out; 1680 1681 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 1682 kvmgt_protect_table_del(info, gfn); 1683 1684out: 1685 spin_unlock(&kvm->mmu_lock); 1686 srcu_read_unlock(&kvm->srcu, idx); 1687 return 0; 1688} 1689 1690static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1691 const u8 *val, int len, 1692 struct kvm_page_track_notifier_node *node) 1693{ 1694 struct kvmgt_guest_info *info = container_of(node, 1695 struct kvmgt_guest_info, track_node); 1696 1697 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) 1698 intel_gvt_ops->write_protect_handler(info->vgpu, gpa, 1699 (void *)val, len); 1700} 1701 1702static void kvmgt_page_track_flush_slot(struct kvm *kvm, 1703 struct kvm_memory_slot *slot, 1704 struct kvm_page_track_notifier_node *node) 1705{ 1706 int i; 1707 gfn_t gfn; 1708 struct kvmgt_guest_info *info = container_of(node, 1709 struct kvmgt_guest_info, track_node); 1710 1711 spin_lock(&kvm->mmu_lock); 1712 for (i = 0; i < slot->npages; i++) { 1713 gfn = slot->base_gfn + i; 1714 if (kvmgt_gfn_is_write_protected(info, gfn)) { 1715 kvm_slot_page_track_remove_page(kvm, slot, gfn, 1716 KVM_PAGE_TRACK_WRITE); 1717 kvmgt_protect_table_del(info, gfn); 1718 } 1719 } 1720 spin_unlock(&kvm->mmu_lock); 1721} 1722 1723static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm) 1724{ 1725 struct intel_vgpu *itr; 1726 struct kvmgt_guest_info *info; 1727 int id; 1728 bool ret = false; 1729 1730 mutex_lock(&vgpu->gvt->lock); 1731 for_each_active_vgpu(vgpu->gvt, itr, id) { 1732 if (!handle_valid(itr->handle)) 1733 continue; 1734 1735 info = (struct kvmgt_guest_info *)itr->handle; 1736 if (kvm && kvm == info->kvm) { 1737 ret = true; 1738 goto out; 1739 } 1740 } 1741out: 1742 mutex_unlock(&vgpu->gvt->lock); 1743 return ret; 1744} 1745 1746static int kvmgt_guest_init(struct mdev_device *mdev) 1747{ 1748 struct kvmgt_guest_info *info; 1749 struct intel_vgpu *vgpu; 1750 struct kvm *kvm; 1751 1752 vgpu = mdev_get_drvdata(mdev); 1753 if (handle_valid(vgpu->handle)) 1754 return -EEXIST; 1755 1756 kvm = vgpu->vdev.kvm; 1757 if (!kvm || kvm->mm != current->mm) { 1758 gvt_vgpu_err("KVM is required to use Intel vGPU\n"); 1759 return -ESRCH; 1760 } 1761 1762 if (__kvmgt_vgpu_exist(vgpu, kvm)) 1763 return -EEXIST; 1764 1765 info = vzalloc(sizeof(struct kvmgt_guest_info)); 1766 if (!info) 1767 return -ENOMEM; 1768 1769 vgpu->handle = (unsigned long)info; 1770 info->vgpu = vgpu; 1771 info->kvm = kvm; 1772 kvm_get_kvm(info->kvm); 1773 1774 kvmgt_protect_table_init(info); 1775 gvt_cache_init(vgpu); 1776 1777 init_completion(&vgpu->vblank_done); 1778 1779 info->track_node.track_write = kvmgt_page_track_write; 1780 info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; 1781 kvm_page_track_register_notifier(kvm, &info->track_node); 1782 1783 info->debugfs_cache_entries = debugfs_create_ulong( 1784 "kvmgt_nr_cache_entries", 1785 0444, vgpu->debugfs, 1786 &vgpu->vdev.nr_cache_entries); 1787 return 0; 1788} 1789 1790static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) 1791{ 1792 debugfs_remove(info->debugfs_cache_entries); 1793 1794 kvm_page_track_unregister_notifier(info->kvm, &info->track_node); 1795 kvm_put_kvm(info->kvm); 1796 kvmgt_protect_table_destroy(info); 1797 gvt_cache_destroy(info->vgpu); 1798 vfree(info); 1799 1800 return true; 1801} 1802 1803static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) 1804{ 1805 /* nothing to do here */ 1806 return 0; 1807} 1808 1809static void kvmgt_detach_vgpu(void *p_vgpu) 1810{ 1811 int i; 1812 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; 1813 1814 if (!vgpu->vdev.region) 1815 return; 1816 1817 for (i = 0; i < vgpu->vdev.num_regions; i++) 1818 if (vgpu->vdev.region[i].ops->release) 1819 vgpu->vdev.region[i].ops->release(vgpu, 1820 &vgpu->vdev.region[i]); 1821 vgpu->vdev.num_regions = 0; 1822 kfree(vgpu->vdev.region); 1823 vgpu->vdev.region = NULL; 1824} 1825 1826static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) 1827{ 1828 struct kvmgt_guest_info *info; 1829 struct intel_vgpu *vgpu; 1830 1831 if (!handle_valid(handle)) 1832 return -ESRCH; 1833 1834 info = (struct kvmgt_guest_info *)handle; 1835 vgpu = info->vgpu; 1836 1837 /* 1838 * When guest is poweroff, msi_trigger is set to NULL, but vgpu's 1839 * config and mmio register isn't restored to default during guest 1840 * poweroff. If this vgpu is still used in next vm, this vgpu's pipe 1841 * may be enabled, then once this vgpu is active, it will get inject 1842 * vblank interrupt request. But msi_trigger is null until msi is 1843 * enabled by guest. so if msi_trigger is null, success is still 1844 * returned and don't inject interrupt into guest. 1845 */ 1846 if (vgpu->vdev.msi_trigger == NULL) 1847 return 0; 1848 1849 if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1) 1850 return 0; 1851 1852 return -EFAULT; 1853} 1854 1855static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) 1856{ 1857 struct kvmgt_guest_info *info; 1858 kvm_pfn_t pfn; 1859 1860 if (!handle_valid(handle)) 1861 return INTEL_GVT_INVALID_ADDR; 1862 1863 info = (struct kvmgt_guest_info *)handle; 1864 1865 pfn = gfn_to_pfn(info->kvm, gfn); 1866 if (is_error_noslot_pfn(pfn)) 1867 return INTEL_GVT_INVALID_ADDR; 1868 1869 return pfn; 1870} 1871 1872static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, 1873 unsigned long size, dma_addr_t *dma_addr) 1874{ 1875 struct kvmgt_guest_info *info; 1876 struct intel_vgpu *vgpu; 1877 struct gvt_dma *entry; 1878 int ret; 1879 1880 if (!handle_valid(handle)) 1881 return -EINVAL; 1882 1883 info = (struct kvmgt_guest_info *)handle; 1884 vgpu = info->vgpu; 1885 1886 mutex_lock(&info->vgpu->vdev.cache_lock); 1887 1888 entry = __gvt_cache_find_gfn(info->vgpu, gfn); 1889 if (!entry) { 1890 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); 1891 if (ret) 1892 goto err_unlock; 1893 1894 ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); 1895 if (ret) 1896 goto err_unmap; 1897 } else if (entry->size != size) { 1898 /* the same gfn with different size: unmap and re-map */ 1899 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size); 1900 __gvt_cache_remove_entry(vgpu, entry); 1901 1902 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); 1903 if (ret) 1904 goto err_unlock; 1905 1906 ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); 1907 if (ret) 1908 goto err_unmap; 1909 } else { 1910 kref_get(&entry->ref); 1911 *dma_addr = entry->dma_addr; 1912 } 1913 1914 mutex_unlock(&info->vgpu->vdev.cache_lock); 1915 return 0; 1916 1917err_unmap: 1918 gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); 1919err_unlock: 1920 mutex_unlock(&info->vgpu->vdev.cache_lock); 1921 return ret; 1922} 1923 1924static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) 1925{ 1926 struct kvmgt_guest_info *info; 1927 struct gvt_dma *entry; 1928 int ret = 0; 1929 1930 if (!handle_valid(handle)) 1931 return -ENODEV; 1932 1933 info = (struct kvmgt_guest_info *)handle; 1934 1935 mutex_lock(&info->vgpu->vdev.cache_lock); 1936 entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); 1937 if (entry) 1938 kref_get(&entry->ref); 1939 else 1940 ret = -ENOMEM; 1941 mutex_unlock(&info->vgpu->vdev.cache_lock); 1942 1943 return ret; 1944} 1945 1946static void __gvt_dma_release(struct kref *ref) 1947{ 1948 struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); 1949 1950 gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, 1951 entry->size); 1952 __gvt_cache_remove_entry(entry->vgpu, entry); 1953} 1954 1955static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) 1956{ 1957 struct kvmgt_guest_info *info; 1958 struct gvt_dma *entry; 1959 1960 if (!handle_valid(handle)) 1961 return; 1962 1963 info = (struct kvmgt_guest_info *)handle; 1964 1965 mutex_lock(&info->vgpu->vdev.cache_lock); 1966 entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); 1967 if (entry) 1968 kref_put(&entry->ref, __gvt_dma_release); 1969 mutex_unlock(&info->vgpu->vdev.cache_lock); 1970} 1971 1972static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, 1973 void *buf, unsigned long len, bool write) 1974{ 1975 struct kvmgt_guest_info *info; 1976 struct kvm *kvm; 1977 int idx, ret; 1978 bool kthread = current->mm == NULL; 1979 1980 if (!handle_valid(handle)) 1981 return -ESRCH; 1982 1983 info = (struct kvmgt_guest_info *)handle; 1984 kvm = info->kvm; 1985 1986 if (kthread) { 1987 if (!mmget_not_zero(kvm->mm)) 1988 return -EFAULT; 1989 use_mm(kvm->mm); 1990 } 1991 1992 idx = srcu_read_lock(&kvm->srcu); 1993 ret = write ? kvm_write_guest(kvm, gpa, buf, len) : 1994 kvm_read_guest(kvm, gpa, buf, len); 1995 srcu_read_unlock(&kvm->srcu, idx); 1996 1997 if (kthread) { 1998 unuse_mm(kvm->mm); 1999 mmput(kvm->mm); 2000 } 2001 2002 return ret; 2003} 2004 2005static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, 2006 void *buf, unsigned long len) 2007{ 2008 return kvmgt_rw_gpa(handle, gpa, buf, len, false); 2009} 2010 2011static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, 2012 void *buf, unsigned long len) 2013{ 2014 return kvmgt_rw_gpa(handle, gpa, buf, len, true); 2015} 2016 2017static unsigned long kvmgt_virt_to_pfn(void *addr) 2018{ 2019 return PFN_DOWN(__pa(addr)); 2020} 2021 2022static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn) 2023{ 2024 struct kvmgt_guest_info *info; 2025 struct kvm *kvm; 2026 int idx; 2027 bool ret; 2028 2029 if (!handle_valid(handle)) 2030 return false; 2031 2032 info = (struct kvmgt_guest_info *)handle; 2033 kvm = info->kvm; 2034 2035 idx = srcu_read_lock(&kvm->srcu); 2036 ret = kvm_is_visible_gfn(kvm, gfn); 2037 srcu_read_unlock(&kvm->srcu, idx); 2038 2039 return ret; 2040} 2041 2042static struct intel_gvt_mpt kvmgt_mpt = { 2043 .type = INTEL_GVT_HYPERVISOR_KVM, 2044 .host_init = kvmgt_host_init, 2045 .host_exit = kvmgt_host_exit, 2046 .attach_vgpu = kvmgt_attach_vgpu, 2047 .detach_vgpu = kvmgt_detach_vgpu, 2048 .inject_msi = kvmgt_inject_msi, 2049 .from_virt_to_mfn = kvmgt_virt_to_pfn, 2050 .enable_page_track = kvmgt_page_track_add, 2051 .disable_page_track = kvmgt_page_track_remove, 2052 .read_gpa = kvmgt_read_gpa, 2053 .write_gpa = kvmgt_write_gpa, 2054 .gfn_to_mfn = kvmgt_gfn_to_pfn, 2055 .dma_map_guest_page = kvmgt_dma_map_guest_page, 2056 .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, 2057 .dma_pin_guest_page = kvmgt_dma_pin_guest_page, 2058 .set_opregion = kvmgt_set_opregion, 2059 .set_edid = kvmgt_set_edid, 2060 .get_vfio_device = kvmgt_get_vfio_device, 2061 .put_vfio_device = kvmgt_put_vfio_device, 2062 .is_valid_gfn = kvmgt_is_valid_gfn, 2063}; 2064 2065static int __init kvmgt_init(void) 2066{ 2067 if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0) 2068 return -ENODEV; 2069 return 0; 2070} 2071 2072static void __exit kvmgt_exit(void) 2073{ 2074 intel_gvt_unregister_hypervisor(); 2075} 2076 2077module_init(kvmgt_init); 2078module_exit(kvmgt_exit); 2079 2080MODULE_LICENSE("GPL and additional rights"); 2081MODULE_AUTHOR("Intel Corporation"); 2082