vmbus.c revision 295308
1/*- 2 * Copyright (c) 2009-2012 Microsoft Corp. 3 * Copyright (c) 2012 NetApp Inc. 4 * Copyright (c) 2012 Citrix Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * VM Bus Driver Implementation 31 */ 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 295308 2016-02-05 07:20:31Z sephe $"); 34 35#include <sys/param.h> 36#include <sys/bus.h> 37#include <sys/kernel.h> 38#include <sys/lock.h> 39#include <sys/malloc.h> 40#include <sys/module.h> 41#include <sys/proc.h> 42#include <sys/sysctl.h> 43#include <sys/syslog.h> 44#include <sys/systm.h> 45#include <sys/rtprio.h> 46#include <sys/interrupt.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49#include <sys/mutex.h> 50#include <sys/smp.h> 51 52#include <machine/resource.h> 53#include <sys/rman.h> 54 55#include <machine/stdarg.h> 56#include <machine/intr_machdep.h> 57#include <machine/md_var.h> 58#include <machine/segments.h> 59#include <sys/pcpu.h> 60#include <x86/apicvar.h> 61 62#include "hv_vmbus_priv.h" 63 64#include <contrib/dev/acpica/include/acpi.h> 65#include "acpi_if.h" 66 67static device_t vmbus_devp; 68static int vmbus_inited; 69static hv_setup_args setup_args; /* only CPU 0 supported at this time */ 70 71static char *vmbus_ids[] = { "VMBUS", NULL }; 72 73/** 74 * @brief Software interrupt thread routine to handle channel messages from 75 * the hypervisor. 76 */ 77static void 78vmbus_msg_swintr(void *arg) 79{ 80 int cpu; 81 void* page_addr; 82 hv_vmbus_channel_msg_header *hdr; 83 hv_vmbus_channel_msg_table_entry *entry; 84 hv_vmbus_channel_msg_type msg_type; 85 hv_vmbus_message* msg; 86 87 cpu = (int)(long)arg; 88 KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: " 89 "cpu out of range!")); 90 91 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 92 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 93 94 for (;;) { 95 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) 96 break; /* no message */ 97 98 hdr = (hv_vmbus_channel_msg_header *)msg->u.payload; 99 msg_type = hdr->message_type; 100 101 if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) { 102 printf("VMBUS: unknown message type = %d\n", msg_type); 103 goto handled; 104 } 105 106 entry = &g_channel_message_table[msg_type]; 107 108 if (entry->messageHandler) 109 entry->messageHandler(hdr); 110handled: 111 msg->header.message_type = HV_MESSAGE_TYPE_NONE; 112 113 /* 114 * Make sure the write to message_type (ie set to 115 * HV_MESSAGE_TYPE_NONE) happens before we read the 116 * message_pending and EOMing. Otherwise, the EOMing will 117 * not deliver any more messages 118 * since there is no empty slot 119 */ 120 wmb(); 121 122 if (msg->header.message_flags.u.message_pending) { 123 /* 124 * This will cause message queue rescan to possibly 125 * deliver another msg from the hypervisor 126 */ 127 wrmsr(HV_X64_MSR_EOM, 0); 128 } 129 } 130} 131 132/** 133 * @brief Interrupt filter routine for VMBUS. 134 * 135 * The purpose of this routine is to determine the type of VMBUS protocol 136 * message to process - an event or a channel message. 137 */ 138static inline int 139hv_vmbus_isr(struct trapframe *frame) 140{ 141 int cpu; 142 hv_vmbus_message* msg; 143 hv_vmbus_synic_event_flags* event; 144 void* page_addr; 145 146 cpu = PCPU_GET(cpuid); 147 148 /* 149 * The Windows team has advised that we check for events 150 * before checking for messages. This is the way they do it 151 * in Windows when running as a guest in Hyper-V 152 */ 153 154 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; 155 event = (hv_vmbus_synic_event_flags*) 156 page_addr + HV_VMBUS_MESSAGE_SINT; 157 158 if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || 159 (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { 160 /* Since we are a child, we only need to check bit 0 */ 161 if (synch_test_and_clear_bit(0, &event->flags32[0])) { 162 hv_vmbus_on_events(cpu); 163 } 164 } else { 165 /* 166 * On host with Win8 or above, we can directly look at 167 * the event page. If bit n is set, we have an interrupt 168 * on the channel with id n. 169 * Directly schedule the event software interrupt on 170 * current cpu. 171 */ 172 hv_vmbus_on_events(cpu); 173 } 174 175 /* Check if there are actual msgs to be process */ 176 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 177 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 178 179 /* we call eventtimer process the message */ 180 if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) { 181 msg->header.message_type = HV_MESSAGE_TYPE_NONE; 182 183 /* 184 * Make sure the write to message_type (ie set to 185 * HV_MESSAGE_TYPE_NONE) happens before we read the 186 * message_pending and EOMing. Otherwise, the EOMing will 187 * not deliver any more messages 188 * since there is no empty slot 189 */ 190 wmb(); 191 192 if (msg->header.message_flags.u.message_pending) { 193 /* 194 * This will cause message queue rescan to possibly 195 * deliver another msg from the hypervisor 196 */ 197 wrmsr(HV_X64_MSR_EOM, 0); 198 } 199 hv_et_intr(frame); 200 return (FILTER_HANDLED); 201 } 202 203 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { 204 swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0); 205 } 206 207 return (FILTER_HANDLED); 208} 209 210u_long *hv_vmbus_intr_cpu[MAXCPU]; 211 212void 213hv_vector_handler(struct trapframe *trap_frame) 214{ 215 int cpu; 216 217 /* 218 * Disable preemption. 219 */ 220 critical_enter(); 221 222 /* 223 * Do a little interrupt counting. 224 */ 225 cpu = PCPU_GET(cpuid); 226 (*hv_vmbus_intr_cpu[cpu])++; 227 228 hv_vmbus_isr(trap_frame); 229 230 /* 231 * Enable preemption. 232 */ 233 critical_exit(); 234} 235 236static int 237vmbus_read_ivar( 238 device_t dev, 239 device_t child, 240 int index, 241 uintptr_t* result) 242{ 243 struct hv_device *child_dev_ctx = device_get_ivars(child); 244 245 switch (index) { 246 247 case HV_VMBUS_IVAR_TYPE: 248 *result = (uintptr_t) &child_dev_ctx->class_id; 249 return (0); 250 case HV_VMBUS_IVAR_INSTANCE: 251 *result = (uintptr_t) &child_dev_ctx->device_id; 252 return (0); 253 case HV_VMBUS_IVAR_DEVCTX: 254 *result = (uintptr_t) child_dev_ctx; 255 return (0); 256 case HV_VMBUS_IVAR_NODE: 257 *result = (uintptr_t) child_dev_ctx->device; 258 return (0); 259 } 260 return (ENOENT); 261} 262 263static int 264vmbus_write_ivar( 265 device_t dev, 266 device_t child, 267 int index, 268 uintptr_t value) 269{ 270 switch (index) { 271 272 case HV_VMBUS_IVAR_TYPE: 273 case HV_VMBUS_IVAR_INSTANCE: 274 case HV_VMBUS_IVAR_DEVCTX: 275 case HV_VMBUS_IVAR_NODE: 276 /* read-only */ 277 return (EINVAL); 278 } 279 return (ENOENT); 280} 281 282struct hv_device* 283hv_vmbus_child_device_create( 284 hv_guid type, 285 hv_guid instance, 286 hv_vmbus_channel* channel) 287{ 288 hv_device* child_dev; 289 290 /* 291 * Allocate the new child device 292 */ 293 child_dev = malloc(sizeof(hv_device), M_DEVBUF, 294 M_WAITOK | M_ZERO); 295 296 child_dev->channel = channel; 297 memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); 298 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); 299 300 return (child_dev); 301} 302 303static void 304print_dev_guid(struct hv_device *dev) 305{ 306 int i; 307 unsigned char guid_name[100]; 308 for (i = 0; i < 32; i += 2) 309 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); 310 if(bootverbose) 311 printf("VMBUS: Class ID: %s\n", guid_name); 312} 313 314int 315hv_vmbus_child_device_register(struct hv_device *child_dev) 316{ 317 device_t child; 318 int ret = 0; 319 320 print_dev_guid(child_dev); 321 322 323 child = device_add_child(vmbus_devp, NULL, -1); 324 child_dev->device = child; 325 device_set_ivars(child, child_dev); 326 327 mtx_lock(&Giant); 328 ret = device_probe_and_attach(child); 329 mtx_unlock(&Giant); 330 331 return (0); 332} 333 334int 335hv_vmbus_child_device_unregister(struct hv_device *child_dev) 336{ 337 int ret = 0; 338 /* 339 * XXXKYS: Ensure that this is the opposite of 340 * device_add_child() 341 */ 342 mtx_lock(&Giant); 343 ret = device_delete_child(vmbus_devp, child_dev->device); 344 mtx_unlock(&Giant); 345 return(ret); 346} 347 348static int 349vmbus_probe(device_t dev) { 350 if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL || 351 device_get_unit(dev) != 0) 352 return (ENXIO); 353 354 device_set_desc(dev, "Vmbus Devices"); 355 356 return (BUS_PROBE_DEFAULT); 357} 358 359#ifdef HYPERV 360extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback); 361 362/** 363 * @brief Find a free IDT slot and setup the interrupt handler. 364 */ 365static int 366vmbus_vector_alloc(void) 367{ 368 int vector; 369 uintptr_t func; 370 struct gate_descriptor *ip; 371 372 /* 373 * Search backwards form the highest IDT vector available for use 374 * as vmbus channel callback vector. We install 'hv_vmbus_callback' 375 * handler at that vector and use it to interrupt vcpus. 376 */ 377 vector = APIC_SPURIOUS_INT; 378 while (--vector >= APIC_IPI_INTS) { 379 ip = &idt[vector]; 380 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 381 if (func == (uintptr_t)&IDTVEC(rsvd)) { 382#ifdef __i386__ 383 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT, 384 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 385#else 386 setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT, 387 SEL_KPL, 0); 388#endif 389 390 return (vector); 391 } 392 } 393 return (0); 394} 395 396/** 397 * @brief Restore the IDT slot to rsvd. 398 */ 399static void 400vmbus_vector_free(int vector) 401{ 402 uintptr_t func; 403 struct gate_descriptor *ip; 404 405 if (vector == 0) 406 return; 407 408 KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT, 409 ("invalid vector %d", vector)); 410 411 ip = &idt[vector]; 412 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 413 KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback), 414 ("invalid vector %d", vector)); 415 416 setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); 417} 418 419#else /* HYPERV */ 420 421static int 422vmbus_vector_alloc(void) 423{ 424 return(0); 425} 426 427static void 428vmbus_vector_free(int vector) 429{ 430} 431 432#endif /* HYPERV */ 433 434/** 435 * @brief Main vmbus driver initialization routine. 436 * 437 * Here, we 438 * - initialize the vmbus driver context 439 * - setup various driver entry points 440 * - invoke the vmbus hv main init routine 441 * - get the irq resource 442 * - invoke the vmbus to add the vmbus root device 443 * - setup the vmbus root device 444 * - retrieve the channel offers 445 */ 446static int 447vmbus_bus_init(void) 448{ 449 int i, j, n, ret; 450 char buf[MAXCOMLEN + 1]; 451 cpuset_t cpu_mask; 452 453 if (vmbus_inited) 454 return (0); 455 456 vmbus_inited = 1; 457 458 ret = hv_vmbus_init(); 459 460 if (ret) { 461 if(bootverbose) 462 printf("Error VMBUS: Hypervisor Initialization Failed!\n"); 463 return (ret); 464 } 465 466 /* 467 * Find a free IDT slot for vmbus callback. 468 */ 469 hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc(); 470 471 if (hv_vmbus_g_context.hv_cb_vector == 0) { 472 if(bootverbose) 473 printf("Error VMBUS: Cannot find free IDT slot for " 474 "vmbus callback!\n"); 475 goto cleanup; 476 } 477 478 if(bootverbose) 479 printf("VMBUS: vmbus callback vector %d\n", 480 hv_vmbus_g_context.hv_cb_vector); 481 482 /* 483 * Notify the hypervisor of our vector. 484 */ 485 setup_args.vector = hv_vmbus_g_context.hv_cb_vector; 486 487 CPU_FOREACH(j) { 488 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; 489 hv_vmbus_g_context.msg_swintr[j] = NULL; 490 491 snprintf(buf, sizeof(buf), "cpu%d:hyperv", j); 492 intrcnt_add(buf, &hv_vmbus_intr_cpu[j]); 493 494 for (i = 0; i < 2; i++) 495 setup_args.page_buffers[2 * j + i] = NULL; 496 } 497 498 /* 499 * Per cpu setup. 500 */ 501 CPU_FOREACH(j) { 502 /* 503 * Setup taskqueue to handle events 504 */ 505 hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK, 506 taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]); 507 if (hv_vmbus_g_context.hv_event_queue[j] == NULL) { 508 if (bootverbose) 509 printf("VMBUS: failed to setup taskqueue\n"); 510 goto cleanup1; 511 } 512 CPU_SETOF(j, &cpu_mask); 513 taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, &cpu_mask, 514 "hvevent%d", j); 515 516 /* 517 * Setup software interrupt thread and handler for msg handling. 518 */ 519 ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j], 520 "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0, 521 &hv_vmbus_g_context.msg_swintr[j]); 522 if (ret) { 523 if(bootverbose) 524 printf("VMBUS: failed to setup msg swi for " 525 "cpu %d\n", j); 526 goto cleanup1; 527 } 528 529 /* 530 * Bind the swi thread to the cpu. 531 */ 532 ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j], 533 j); 534 if (ret) { 535 if(bootverbose) 536 printf("VMBUS: failed to bind msg swi thread " 537 "to cpu %d\n", j); 538 goto cleanup1; 539 } 540 541 /* 542 * Prepare the per cpu msg and event pages to be called on each cpu. 543 */ 544 for(i = 0; i < 2; i++) { 545 setup_args.page_buffers[2 * j + i] = 546 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); 547 } 548 } 549 550 if (bootverbose) 551 printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n", 552 smp_started); 553 554 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); 555 556 /* 557 * Connect to VMBus in the root partition 558 */ 559 ret = hv_vmbus_connect(); 560 561 if (ret != 0) 562 goto cleanup1; 563 564 hv_vmbus_request_channel_offers(); 565 return (ret); 566 567 cleanup1: 568 /* 569 * Free pages alloc'ed 570 */ 571 for (n = 0; n < 2 * MAXCPU; n++) 572 if (setup_args.page_buffers[n] != NULL) 573 free(setup_args.page_buffers[n], M_DEVBUF); 574 575 /* 576 * remove swi and vmbus callback vector; 577 */ 578 CPU_FOREACH(j) { 579 if (hv_vmbus_g_context.hv_event_queue[j] != NULL) 580 taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]); 581 if (hv_vmbus_g_context.msg_swintr[j] != NULL) 582 swi_remove(hv_vmbus_g_context.msg_swintr[j]); 583 hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; 584 } 585 586 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); 587 588 cleanup: 589 hv_vmbus_cleanup(); 590 591 return (ret); 592} 593 594static int 595vmbus_attach(device_t dev) 596{ 597 if(bootverbose) 598 device_printf(dev, "VMBUS: attach dev: %p\n", dev); 599 vmbus_devp = dev; 600 601 /* 602 * If the system has already booted and thread 603 * scheduling is possible indicated by the global 604 * cold set to zero, we just call the driver 605 * initialization directly. 606 */ 607 if (!cold) 608 vmbus_bus_init(); 609 610 return (0); 611} 612 613static void 614vmbus_init(void) 615{ 616 if (vm_guest != VM_GUEST_HV) 617 return; 618 619 /* 620 * If the system has already booted and thread 621 * scheduling is possible, as indicated by the 622 * global cold set to zero, we just call the driver 623 * initialization directly. 624 */ 625 if (!cold) 626 vmbus_bus_init(); 627} 628 629static void 630vmbus_bus_exit(void) 631{ 632 int i; 633 634 hv_vmbus_release_unattached_channels(); 635 hv_vmbus_disconnect(); 636 637 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); 638 639 for(i = 0; i < 2 * MAXCPU; i++) { 640 if (setup_args.page_buffers[i] != 0) 641 free(setup_args.page_buffers[i], M_DEVBUF); 642 } 643 644 hv_vmbus_cleanup(); 645 646 /* remove swi */ 647 CPU_FOREACH(i) { 648 if (hv_vmbus_g_context.hv_event_queue[i] != NULL) 649 taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]); 650 if (hv_vmbus_g_context.msg_swintr[i] != NULL) 651 swi_remove(hv_vmbus_g_context.msg_swintr[i]); 652 hv_vmbus_g_context.hv_msg_intr_event[i] = NULL; 653 } 654 655 vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); 656 657 return; 658} 659 660static void 661vmbus_exit(void) 662{ 663 vmbus_bus_exit(); 664} 665 666static int 667vmbus_detach(device_t dev) 668{ 669 vmbus_exit(); 670 return (0); 671} 672 673static void 674vmbus_mod_load(void) 675{ 676 if(bootverbose) 677 printf("VMBUS: load\n"); 678} 679 680static void 681vmbus_mod_unload(void) 682{ 683 if(bootverbose) 684 printf("VMBUS: unload\n"); 685} 686 687static int 688vmbus_modevent(module_t mod, int what, void *arg) 689{ 690 switch (what) { 691 692 case MOD_LOAD: 693 vmbus_mod_load(); 694 break; 695 case MOD_UNLOAD: 696 vmbus_mod_unload(); 697 break; 698 } 699 700 return (0); 701} 702 703static device_method_t vmbus_methods[] = { 704 /** Device interface */ 705 DEVMETHOD(device_probe, vmbus_probe), 706 DEVMETHOD(device_attach, vmbus_attach), 707 DEVMETHOD(device_detach, vmbus_detach), 708 DEVMETHOD(device_shutdown, bus_generic_shutdown), 709 DEVMETHOD(device_suspend, bus_generic_suspend), 710 DEVMETHOD(device_resume, bus_generic_resume), 711 712 /** Bus interface */ 713 DEVMETHOD(bus_add_child, bus_generic_add_child), 714 DEVMETHOD(bus_print_child, bus_generic_print_child), 715 DEVMETHOD(bus_read_ivar, vmbus_read_ivar), 716 DEVMETHOD(bus_write_ivar, vmbus_write_ivar), 717 718 { 0, 0 } }; 719 720static char driver_name[] = "vmbus"; 721static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; 722 723 724devclass_t vmbus_devclass; 725 726DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); 727MODULE_DEPEND(vmbus, acpi, 1, 1, 1); 728MODULE_VERSION(vmbus, 1); 729 730/* We want to be started after SMP is initialized */ 731SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); 732 733