hv_vmbus_drv_freebsd.c revision 283280
1/*- 2 * Copyright (c) 2009-2012 Microsoft Corp. 3 * Copyright (c) 2012 NetApp Inc. 4 * Copyright (c) 2012 Citrix Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * VM Bus Driver Implementation 31 */ 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 283280 2015-05-22 09:03:55Z whu $"); 34 35#include <sys/param.h> 36#include <sys/bus.h> 37#include <sys/kernel.h> 38#include <sys/lock.h> 39#include <sys/malloc.h> 40#include <sys/module.h> 41#include <sys/sysctl.h> 42#include <sys/syslog.h> 43#include <sys/systm.h> 44#include <sys/rtprio.h> 45#include <sys/interrupt.h> 46#include <sys/sx.h> 47#include <sys/taskqueue.h> 48#include <sys/mutex.h> 49#include <sys/smp.h> 50 51#include <machine/resource.h> 52#include <sys/rman.h> 53 54#include <machine/stdarg.h> 55#include <machine/intr_machdep.h> 56#include <machine/md_var.h> 57#include <machine/segments.h> 58#include <sys/pcpu.h> 59#include <machine/apicvar.h> 60 61#include "hv_vmbus_priv.h" 62 63 64#define VMBUS_IRQ 0x5 65 66static device_t vmbus_devp; 67static int vmbus_inited; 68static hv_setup_args setup_args; /* only CPU 0 supported at this time */ 69 70/** 71 * @brief Software interrupt thread routine to handle channel messages from 72 * the hypervisor. 73 */ 74static void 75vmbus_msg_swintr(void *arg) 76{ 77 int cpu; 78 void* page_addr; 79 hv_vmbus_message* msg; 80 hv_vmbus_message* copied; 81 82 cpu = (int)(long)arg; 83 KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: " 84 "cpu out of range!")); 85 86 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 87 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 88 89 for (;;) { 90 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { 91 break; /* no message */ 92 } else { 93 copied = malloc(sizeof(hv_vmbus_message), 94 M_DEVBUF, M_NOWAIT); 95 KASSERT(copied != NULL, 96 ("Error VMBUS: malloc failed to allocate" 97 " hv_vmbus_message!")); 98 if (copied == NULL) 99 continue; 100 memcpy(copied, msg, sizeof(hv_vmbus_message)); 101 hv_queue_work_item(hv_vmbus_g_connection.work_queue, 102 hv_vmbus_on_channel_message, copied); 103 } 104 105 msg->header.message_type = HV_MESSAGE_TYPE_NONE; 106 107 /* 108 * Make sure the write to message_type (ie set to 109 * HV_MESSAGE_TYPE_NONE) happens before we read the 110 * message_pending and EOMing. Otherwise, the EOMing will 111 * not deliver any more messages 112 * since there is no empty slot 113 */ 114 wmb(); 115 116 if (msg->header.message_flags.u.message_pending) { 117 /* 118 * This will cause message queue rescan to possibly 119 * deliver another msg from the hypervisor 120 */ 121 wrmsr(HV_X64_MSR_EOM, 0); 122 } 123 } 124} 125 126/** 127 * @brief Interrupt filter routine for VMBUS. 128 * 129 * The purpose of this routine is to determine the type of VMBUS protocol 130 * message to process - an event or a channel message. 131 */ 132static inline int 133hv_vmbus_isr(void *unused) 134{ 135 int cpu; 136 hv_vmbus_message* msg; 137 hv_vmbus_synic_event_flags* event; 138 void* page_addr; 139 140 cpu = PCPU_GET(cpuid); 141 142 /* 143 * The Windows team has advised that we check for events 144 * before checking for messages. This is the way they do it 145 * in Windows when running as a guest in Hyper-V 146 */ 147 148 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; 149 event = (hv_vmbus_synic_event_flags*) 150 page_addr + HV_VMBUS_MESSAGE_SINT; 151 152 if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || 153 (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { 154 /* Since we are a child, we only need to check bit 0 */ 155 if (synch_test_and_clear_bit(0, &event->flags32[0])) { 156 swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0); 157 } 158 } else { 159 /* 160 * On host with Win8 or above, we can directly look at 161 * the event page. If bit n is set, we have an interrupt 162 * on the channel with id n. 163 * Directly schedule the event software interrupt on 164 * current cpu. 165 */ 166 swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0); 167 } 168 169 /* Check if there are actual msgs to be process */ 170 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 171 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 172 173 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { 174 swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0); 175 } 176 177 return FILTER_HANDLED; 178} 179 180#ifdef HV_DEBUG_INTR 181uint32_t hv_intr_count = 0; 182#endif 183uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; 184uint32_t hv_vmbus_intr_cpu[MAXCPU]; 185 186void 187hv_vector_handler(struct trapframe *trap_frame) 188{ 189#ifdef HV_DEBUG_INTR 190 int cpu; 191#endif 192 193 /* 194 * Disable preemption. 195 */ 196 critical_enter(); 197 198#ifdef HV_DEBUG_INTR 199 /* 200 * Do a little interrupt counting. 201 */ 202 cpu = PCPU_GET(cpuid); 203 hv_vmbus_intr_cpu[cpu]++; 204 hv_intr_count++; 205#endif 206 207 hv_vmbus_isr(NULL); 208 209 /* 210 * Enable preemption. 211 */ 212 critical_exit(); 213} 214 215static int 216vmbus_read_ivar( 217 device_t dev, 218 device_t child, 219 int index, 220 uintptr_t* result) 221{ 222 struct hv_device *child_dev_ctx = device_get_ivars(child); 223 224 switch (index) { 225 226 case HV_VMBUS_IVAR_TYPE: 227 *result = (uintptr_t) &child_dev_ctx->class_id; 228 return (0); 229 case HV_VMBUS_IVAR_INSTANCE: 230 *result = (uintptr_t) &child_dev_ctx->device_id; 231 return (0); 232 case HV_VMBUS_IVAR_DEVCTX: 233 *result = (uintptr_t) child_dev_ctx; 234 return (0); 235 case HV_VMBUS_IVAR_NODE: 236 *result = (uintptr_t) child_dev_ctx->device; 237 return (0); 238 } 239 return (ENOENT); 240} 241 242static int 243vmbus_write_ivar( 244 device_t dev, 245 device_t child, 246 int index, 247 uintptr_t value) 248{ 249 switch (index) { 250 251 case HV_VMBUS_IVAR_TYPE: 252 case HV_VMBUS_IVAR_INSTANCE: 253 case HV_VMBUS_IVAR_DEVCTX: 254 case HV_VMBUS_IVAR_NODE: 255 /* read-only */ 256 return (EINVAL); 257 } 258 return (ENOENT); 259} 260 261struct hv_device* 262hv_vmbus_child_device_create( 263 hv_guid type, 264 hv_guid instance, 265 hv_vmbus_channel* channel) 266{ 267 hv_device* child_dev; 268 269 /* 270 * Allocate the new child device 271 */ 272 child_dev = malloc(sizeof(hv_device), M_DEVBUF, 273 M_NOWAIT | M_ZERO); 274 KASSERT(child_dev != NULL, 275 ("Error VMBUS: malloc failed to allocate hv_device!")); 276 277 if (child_dev == NULL) 278 return (NULL); 279 280 child_dev->channel = channel; 281 memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); 282 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); 283 284 return (child_dev); 285} 286 287static void 288print_dev_guid(struct hv_device *dev) 289{ 290 int i; 291 unsigned char guid_name[100]; 292 for (i = 0; i < 32; i += 2) 293 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); 294 if(bootverbose) 295 printf("VMBUS: Class ID: %s\n", guid_name); 296} 297 298int 299hv_vmbus_child_device_register(struct hv_device *child_dev) 300{ 301 device_t child; 302 int ret = 0; 303 304 print_dev_guid(child_dev); 305 306 307 child = device_add_child(vmbus_devp, NULL, -1); 308 child_dev->device = child; 309 device_set_ivars(child, child_dev); 310 311 mtx_lock(&Giant); 312 ret = device_probe_and_attach(child); 313 mtx_unlock(&Giant); 314 315 return (0); 316} 317 318int 319hv_vmbus_child_device_unregister(struct hv_device *child_dev) 320{ 321 int ret = 0; 322 /* 323 * XXXKYS: Ensure that this is the opposite of 324 * device_add_child() 325 */ 326 mtx_lock(&Giant); 327 ret = device_delete_child(vmbus_devp, child_dev->device); 328 mtx_unlock(&Giant); 329 return(ret); 330} 331 332static void 333vmbus_identify(driver_t *driver, device_t parent) 334{ 335 if (!hv_vmbus_query_hypervisor_presence()) 336 return; 337 338 vm_guest = VM_GUEST_HV; 339 340 BUS_ADD_CHILD(parent, 0, "vmbus", 0); 341} 342 343static int 344vmbus_probe(device_t dev) { 345 if(bootverbose) 346 device_printf(dev, "VMBUS: probe\n"); 347 348 device_set_desc(dev, "Vmbus Devices"); 349 350 return (BUS_PROBE_NOWILDCARD); 351} 352 353#ifdef HYPERV 354extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback); 355 356/** 357 * @brief Find a free IDT slot and setup the interrupt handler. 358 */ 359static int 360vmbus_vector_alloc(void) 361{ 362 int vector; 363 uintptr_t func; 364 struct gate_descriptor *ip; 365 366 /* 367 * Search backwards form the highest IDT vector available for use 368 * as vmbus channel callback vector. We install 'hv_vmbus_callback' 369 * handler at that vector and use it to interrupt vcpus. 370 */ 371 vector = APIC_SPURIOUS_INT; 372 while (--vector >= APIC_IPI_INTS) { 373 ip = &idt[vector]; 374 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 375 if (func == (uintptr_t)&IDTVEC(rsvd)) { 376#ifdef __i386__ 377 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT, 378 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 379#else 380 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT, 381 SEL_KPL, 0); 382#endif 383 384 return (vector); 385 } 386 } 387 return (0); 388} 389 390/** 391 * @brief Restore the IDT slot to rsvd. 392 */ 393static void 394vmbus_vector_free(int vector) 395{ 396 uintptr_t func; 397 struct gate_descriptor *ip; 398 399 if (vector == 0) 400 return; 401 402 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT, 403 ("invalid vector %d", vector)); 404 405 ip = &idt[vector]; 406 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 407 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback), 408 ("invalid vector %d", vector)); 409 410 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); 411} 412 413#else /* HYPERV */ 414 415static int 416vmbus_vector_alloc(void) 417{ 418 return(0); 419} 420 421static void 422vmbus_vector_free(int vector) 423{ 424} 425 426#endif /* HYPERV */ 427 428/** 429 * @brief Main vmbus driver initialization routine. 430 * 431 * Here, we 432 * - initialize the vmbus driver context 433 * - setup various driver entry points 434 * - invoke the vmbus hv main init routine 435 * - get the irq resource 436 * - invoke the vmbus to add the vmbus root device 437 * - setup the vmbus root device 438 * - retrieve the channel offers 439 */ 440static int 441vmbus_bus_init(void) 442{ 443 int i, j, n, ret; 444 445 if (vmbus_inited) 446 return (0); 447 448 vmbus_inited = 1; 449 450 ret = hv_vmbus_init(); 451 452 if (ret) { 453 if(bootverbose) 454 printf("Error VMBUS: Hypervisor Initialization Failed!\n"); 455 return (ret); 456 } 457 458 /* 459 * Find a free IDT slot for vmbus callback. 460 */ 461 hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc(); 462 463 if (hv_vmbus_g_context.hv_cb_vector == 0) { 464 if(bootverbose) 465 printf("Error VMBUS: Cannot find free IDT slot for " 466 "vmbus callback!\n"); 467 goto cleanup; 468 } 469 470 if(bootverbose) 471 printf("VMBUS: vmbus callback vector %d\n", 472 hv_vmbus_g_context.hv_cb_vector); 473 474 /* 475 * Notify the hypervisor of our vector. 476 */ 477 setup_args.vector = hv_vmbus_g_context.hv_cb_vector; 478 479 CPU_FOREACH(j) { 480 hv_vmbus_intr_cpu[j] = 0; 481 hv_vmbus_swintr_event_cpu[j] = 0; 482 hv_vmbus_g_context.hv_event_intr_event[j] = NULL; 483 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; 484 hv_vmbus_g_context.event_swintr[j] = NULL; 485 hv_vmbus_g_context.msg_swintr[j] = NULL; 486 487 for (i = 0; i < 2; i++) 488 setup_args.page_buffers[2 * j + i] = NULL; 489 } 490 491 /* 492 * Per cpu setup. 493 */ 494 CPU_FOREACH(j) { 495 /* 496 * Setup software interrupt thread and handler for msg handling. 497 */ 498 ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j], 499 "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0, 500 &hv_vmbus_g_context.msg_swintr[j]); 501 if (ret) { 502 if(bootverbose) 503 printf("VMBUS: failed to setup msg swi for " 504 "cpu %d\n", j); 505 goto cleanup1; 506 } 507 508 /* 509 * Bind the swi thread to the cpu. 510 */ 511 ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j], 512 j); 513 if (ret) { 514 if(bootverbose) 515 printf("VMBUS: failed to bind msg swi thread " 516 "to cpu %d\n", j); 517 goto cleanup1; 518 } 519 520 /* 521 * Setup software interrupt thread and handler for 522 * event handling. 523 */ 524 ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j], 525 "hv_event", hv_vmbus_on_events, (void *)(long)j, 526 SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]); 527 if (ret) { 528 if(bootverbose) 529 printf("VMBUS: failed to setup event swi for " 530 "cpu %d\n", j); 531 goto cleanup1; 532 } 533 534 /* 535 * Prepare the per cpu msg and event pages to be called on each cpu. 536 */ 537 for(i = 0; i < 2; i++) { 538 setup_args.page_buffers[2 * j + i] = 539 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); 540 if (setup_args.page_buffers[2 * j + i] == NULL) { 541 KASSERT(setup_args.page_buffers[2 * j + i] != NULL, 542 ("Error VMBUS: malloc failed!")); 543 goto cleanup1; 544 } 545 } 546 } 547 548 if (bootverbose) 549 printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n", 550 smp_started); 551 552 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); 553 554 /* 555 * Connect to VMBus in the root partition 556 */ 557 ret = hv_vmbus_connect(); 558 559 if (ret != 0) 560 goto cleanup1; 561 562 hv_vmbus_request_channel_offers(); 563 return (ret); 564 565 cleanup1: 566 /* 567 * Free pages alloc'ed 568 */ 569 for (n = 0; n < 2 * MAXCPU; n++) 570 if (setup_args.page_buffers[n] != NULL) 571 free(setup_args.page_buffers[n], M_DEVBUF); 572 573 /* 574 * remove swi and vmbus callback vector; 575 */ 576 CPU_FOREACH(j) { 577 if (hv_vmbus_g_context.msg_swintr[j] != NULL) 578 swi_remove(hv_vmbus_g_context.msg_swintr[j]); 579 if (hv_vmbus_g_context.event_swintr[j] != NULL) 580 swi_remove(hv_vmbus_g_context.event_swintr[j]); 581 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; 582 hv_vmbus_g_context.hv_event_intr_event[j] = NULL; 583 } 584 585 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); 586 587 cleanup: 588 hv_vmbus_cleanup(); 589 590 return (ret); 591} 592 593static int 594vmbus_attach(device_t dev) 595{ 596 if(bootverbose) 597 device_printf(dev, "VMBUS: attach dev: %p\n", dev); 598 vmbus_devp = dev; 599 600 /* 601 * If the system has already booted and thread 602 * scheduling is possible indicated by the global 603 * cold set to zero, we just call the driver 604 * initialization directly. 605 */ 606 if (!cold) 607 vmbus_bus_init(); 608 609 return (0); 610} 611 612static void 613vmbus_init(void) 614{ 615 if (vm_guest != VM_GUEST_HV) 616 return; 617 618 /* 619 * If the system has already booted and thread 620 * scheduling is possible, as indicated by the 621 * global cold set to zero, we just call the driver 622 * initialization directly. 623 */ 624 if (!cold) 625 vmbus_bus_init(); 626} 627 628static void 629vmbus_bus_exit(void) 630{ 631 int i; 632 633 hv_vmbus_release_unattached_channels(); 634 hv_vmbus_disconnect(); 635 636 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); 637 638 for(i = 0; i < 2 * MAXCPU; i++) { 639 if (setup_args.page_buffers[i] != 0) 640 free(setup_args.page_buffers[i], M_DEVBUF); 641 } 642 643 hv_vmbus_cleanup(); 644 645 /* remove swi */ 646 CPU_FOREACH(i) { 647 if (hv_vmbus_g_context.msg_swintr[i] != NULL) 648 swi_remove(hv_vmbus_g_context.msg_swintr[i]); 649 if (hv_vmbus_g_context.event_swintr[i] != NULL) 650 swi_remove(hv_vmbus_g_context.event_swintr[i]); 651 hv_vmbus_g_context.hv_msg_intr_event[i] = NULL; 652 hv_vmbus_g_context.hv_event_intr_event[i] = NULL; 653 } 654 655 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); 656 657 return; 658} 659 660static void 661vmbus_exit(void) 662{ 663 vmbus_bus_exit(); 664} 665 666static int 667vmbus_detach(device_t dev) 668{ 669 vmbus_exit(); 670 return (0); 671} 672 673static void 674vmbus_mod_load(void) 675{ 676 if(bootverbose) 677 printf("VMBUS: load\n"); 678} 679 680static void 681vmbus_mod_unload(void) 682{ 683 if(bootverbose) 684 printf("VMBUS: unload\n"); 685} 686 687static int 688vmbus_modevent(module_t mod, int what, void *arg) 689{ 690 switch (what) { 691 692 case MOD_LOAD: 693 vmbus_mod_load(); 694 break; 695 case MOD_UNLOAD: 696 vmbus_mod_unload(); 697 break; 698 } 699 700 return (0); 701} 702 703static device_method_t vmbus_methods[] = { 704 /** Device interface */ 705 DEVMETHOD(device_identify, vmbus_identify), 706 DEVMETHOD(device_probe, vmbus_probe), 707 DEVMETHOD(device_attach, vmbus_attach), 708 DEVMETHOD(device_detach, vmbus_detach), 709 DEVMETHOD(device_shutdown, bus_generic_shutdown), 710 DEVMETHOD(device_suspend, bus_generic_suspend), 711 DEVMETHOD(device_resume, bus_generic_resume), 712 713 /** Bus interface */ 714 DEVMETHOD(bus_add_child, bus_generic_add_child), 715 DEVMETHOD(bus_print_child, bus_generic_print_child), 716 DEVMETHOD(bus_read_ivar, vmbus_read_ivar), 717 DEVMETHOD(bus_write_ivar, vmbus_write_ivar), 718 719 { 0, 0 } }; 720 721static char driver_name[] = "vmbus"; 722static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; 723 724 725devclass_t vmbus_devclass; 726 727DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); 728MODULE_VERSION(vmbus,1); 729 730/* We want to be started after SMP is initialized */ 731SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); 732 733