hv_vmbus_drv_freebsd.c revision 256758
1/*- 2 * Copyright (c) 2009-2012 Microsoft Corp. 3 * Copyright (c) 2012 NetApp Inc. 4 * Copyright (c) 2012 Citrix Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * VM Bus Driver Implementation 31 */ 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 256758 2013-10-18 23:19:27Z gibbs $"); 34 35#include <sys/param.h> 36#include <sys/bus.h> 37#include <sys/kernel.h> 38#include <sys/lock.h> 39#include <sys/malloc.h> 40#include <sys/module.h> 41#include <sys/sysctl.h> 42#include <sys/syslog.h> 43#include <sys/systm.h> 44#include <sys/rtprio.h> 45#include <sys/interrupt.h> 46#include <sys/sx.h> 47#include <sys/taskqueue.h> 48#include <sys/mutex.h> 49#include <sys/smp.h> 50 51#include <machine/resource.h> 52#include <sys/rman.h> 53 54#include <machine/stdarg.h> 55#include <machine/intr_machdep.h> 56#include <sys/pcpu.h> 57 58#include "hv_vmbus_priv.h" 59 60 61#define VMBUS_IRQ 0x5 62 63static struct intr_event *hv_msg_intr_event; 64static struct intr_event *hv_event_intr_event; 65static void *msg_swintr; 66static void *event_swintr; 67static device_t vmbus_devp; 68static void *vmbus_cookiep; 69static int vmbus_rid; 70struct resource *intr_res; 71static int vmbus_irq = VMBUS_IRQ; 72static int vmbus_inited; 73static hv_setup_args setup_args; /* only CPU 0 supported at this time */ 74 75/** 76 * @brief Software interrupt thread routine to handle channel messages from 77 * the hypervisor. 78 */ 79static void 80vmbus_msg_swintr(void *dummy) 81{ 82 int cpu; 83 void* page_addr; 84 hv_vmbus_message* msg; 85 hv_vmbus_message* copied; 86 87 cpu = PCPU_GET(cpuid); 88 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 89 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 90 91 for (;;) { 92 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { 93 break; /* no message */ 94 } else { 95 copied = malloc(sizeof(hv_vmbus_message), 96 M_DEVBUF, M_NOWAIT); 97 KASSERT(copied != NULL, 98 ("Error VMBUS: malloc failed to allocate" 99 " hv_vmbus_message!")); 100 if (copied == NULL) 101 continue; 102 memcpy(copied, msg, sizeof(hv_vmbus_message)); 103 hv_queue_work_item(hv_vmbus_g_connection.work_queue, 104 hv_vmbus_on_channel_message, copied); 105 } 106 107 msg->header.message_type = HV_MESSAGE_TYPE_NONE; 108 109 /* 110 * Make sure the write to message_type (ie set to 111 * HV_MESSAGE_TYPE_NONE) happens before we read the 112 * message_pending and EOMing. Otherwise, the EOMing will 113 * not deliver any more messages 114 * since there is no empty slot 115 */ 116 wmb(); 117 118 if (msg->header.message_flags.u.message_pending) { 119 /* 120 * This will cause message queue rescan to possibly 121 * deliver another msg from the hypervisor 122 */ 123 wrmsr(HV_X64_MSR_EOM, 0); 124 } 125 } 126} 127 128/** 129 * @brief Interrupt filter routine for VMBUS. 130 * 131 * The purpose of this routine is to determine the type of VMBUS protocol 132 * message to process - an event or a channel message. 133 * As this is an interrupt filter routine, the function runs in a very 134 * restricted envinronment. From the manpage for bus_setup_intr(9) 135 * 136 * In this restricted environment, care must be taken to account for all 137 * races. A careful analysis of races should be done as well. It is gener- 138 * ally cheaper to take an extra interrupt, for example, than to protect 139 * variables with spinlocks. Read, modify, write cycles of hardware regis- 140 * ters need to be carefully analyzed if other threads are accessing the 141 * same registers. 142 */ 143static int 144hv_vmbus_isr(void *unused) 145{ 146 int cpu; 147 hv_vmbus_message* msg; 148 hv_vmbus_synic_event_flags* event; 149 void* page_addr; 150 151 cpu = PCPU_GET(cpuid); 152 /* (Temporary limit) */ 153 KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero")); 154 155 /* 156 * The Windows team has advised that we check for events 157 * before checking for messages. This is the way they do it 158 * in Windows when running as a guest in Hyper-V 159 */ 160 161 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; 162 event = (hv_vmbus_synic_event_flags*) 163 page_addr + HV_VMBUS_MESSAGE_SINT; 164 165 /* Since we are a child, we only need to check bit 0 */ 166 if (synch_test_and_clear_bit(0, &event->flags32[0])) { 167 swi_sched(event_swintr, 0); 168 } 169 170 /* Check if there are actual msgs to be process */ 171 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 172 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 173 174 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { 175 swi_sched(msg_swintr, 0); 176 } 177 178 return FILTER_HANDLED; 179} 180 181static int 182vmbus_read_ivar( 183 device_t dev, 184 device_t child, 185 int index, 186 uintptr_t* result) 187{ 188 struct hv_device *child_dev_ctx = device_get_ivars(child); 189 190 switch (index) { 191 192 case HV_VMBUS_IVAR_TYPE: 193 *result = (uintptr_t) &child_dev_ctx->class_id; 194 return (0); 195 case HV_VMBUS_IVAR_INSTANCE: 196 *result = (uintptr_t) &child_dev_ctx->device_id; 197 return (0); 198 case HV_VMBUS_IVAR_DEVCTX: 199 *result = (uintptr_t) child_dev_ctx; 200 return (0); 201 case HV_VMBUS_IVAR_NODE: 202 *result = (uintptr_t) child_dev_ctx->device; 203 return (0); 204 } 205 return (ENOENT); 206} 207 208static int 209vmbus_write_ivar( 210 device_t dev, 211 device_t child, 212 int index, 213 uintptr_t value) 214{ 215 switch (index) { 216 217 case HV_VMBUS_IVAR_TYPE: 218 case HV_VMBUS_IVAR_INSTANCE: 219 case HV_VMBUS_IVAR_DEVCTX: 220 case HV_VMBUS_IVAR_NODE: 221 /* read-only */ 222 return (EINVAL); 223 } 224 return (ENOENT); 225} 226 227struct hv_device* 228hv_vmbus_child_device_create( 229 hv_guid type, 230 hv_guid instance, 231 hv_vmbus_channel* channel) 232{ 233 hv_device* child_dev; 234 235 /* 236 * Allocate the new child device 237 */ 238 child_dev = malloc(sizeof(hv_device), M_DEVBUF, 239 M_NOWAIT | M_ZERO); 240 KASSERT(child_dev != NULL, 241 ("Error VMBUS: malloc failed to allocate hv_device!")); 242 243 if (child_dev == NULL) 244 return (NULL); 245 246 child_dev->channel = channel; 247 memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); 248 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); 249 250 return (child_dev); 251} 252 253static void 254print_dev_guid(struct hv_device *dev) 255{ 256 int i; 257 unsigned char guid_name[100]; 258 for (i = 0; i < 32; i += 2) 259 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); 260 if(bootverbose) 261 printf("VMBUS: Class ID: %s\n", guid_name); 262} 263 264int 265hv_vmbus_child_device_register(struct hv_device *child_dev) 266{ 267 device_t child; 268 int ret = 0; 269 270 print_dev_guid(child_dev); 271 272 273 child = device_add_child(vmbus_devp, NULL, -1); 274 child_dev->device = child; 275 device_set_ivars(child, child_dev); 276 277 mtx_lock(&Giant); 278 ret = device_probe_and_attach(child); 279 mtx_unlock(&Giant); 280 281 return (0); 282} 283 284int 285hv_vmbus_child_device_unregister(struct hv_device *child_dev) 286{ 287 int ret = 0; 288 /* 289 * XXXKYS: Ensure that this is the opposite of 290 * device_add_child() 291 */ 292 mtx_lock(&Giant); 293 ret = device_delete_child(vmbus_devp, child_dev->device); 294 mtx_unlock(&Giant); 295 return(ret); 296} 297 298static void 299vmbus_identify(driver_t *driver, device_t parent) 300{ 301 if (!hv_vmbus_query_hypervisor_presence()) 302 return; 303 304 vm_guest = VM_GUEST_HV; 305 306 BUS_ADD_CHILD(parent, 0, "vmbus", 0); 307} 308 309static int 310vmbus_probe(device_t dev) { 311 if(bootverbose) 312 device_printf(dev, "VMBUS: probe\n"); 313 314 device_set_desc(dev, "Vmbus Devices"); 315 316 return (0); 317} 318 319/** 320 * @brief Main vmbus driver initialization routine. 321 * 322 * Here, we 323 * - initialize the vmbus driver context 324 * - setup various driver entry points 325 * - invoke the vmbus hv main init routine 326 * - get the irq resource 327 * - invoke the vmbus to add the vmbus root device 328 * - setup the vmbus root device 329 * - retrieve the channel offers 330 */ 331static int 332vmbus_bus_init(void) 333{ 334 struct ioapic_intsrc { 335 struct intsrc io_intsrc; 336 u_int io_irq; 337 u_int io_intpin:8; 338 u_int io_vector:8; 339 u_int io_cpu:8; 340 u_int io_activehi:1; 341 u_int io_edgetrigger:1; 342 u_int io_masked:1; 343 int io_bus:4; 344 uint32_t io_lowreg; 345 }; 346 int i, ret; 347 unsigned int vector = 0; 348 struct intsrc *isrc; 349 struct ioapic_intsrc *intpin; 350 351 if (vmbus_inited) 352 return (0); 353 354 vmbus_inited = 1; 355 356 ret = hv_vmbus_init(); 357 358 if (ret) { 359 if(bootverbose) 360 printf("Error VMBUS: Hypervisor Initialization Failed!\n"); 361 return (ret); 362 } 363 364 ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr, 365 NULL, SWI_CLOCK, 0, &msg_swintr); 366 367 if (ret) 368 goto cleanup; 369 370 /* 371 * Message SW interrupt handler checks a per-CPU page and 372 * thus the thread needs to be bound to CPU-0 - which is where 373 * all interrupts are processed. 374 */ 375 ret = intr_event_bind(hv_msg_intr_event, 0); 376 377 if (ret) 378 goto cleanup1; 379 380 ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events, 381 NULL, SWI_CLOCK, 0, &event_swintr); 382 383 if (ret) 384 goto cleanup1; 385 386 intr_res = bus_alloc_resource(vmbus_devp, 387 SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE); 388 389 if (intr_res == NULL) { 390 ret = ENOMEM; /* XXXKYS: Need a better errno */ 391 goto cleanup2; 392 } 393 394 /* 395 * Setup interrupt filter handler 396 */ 397 ret = bus_setup_intr(vmbus_devp, intr_res, 398 INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL, 399 NULL, &vmbus_cookiep); 400 401 if (ret != 0) 402 goto cleanup3; 403 404 ret = bus_bind_intr(vmbus_devp, intr_res, 0); 405 if (ret != 0) 406 goto cleanup4; 407 408 isrc = intr_lookup_source(vmbus_irq); 409 if ((isrc == NULL) || (isrc->is_event == NULL)) { 410 ret = EINVAL; 411 goto cleanup4; 412 } 413 414 /* vector = isrc->is_event->ie_vector; */ 415 intpin = (struct ioapic_intsrc *)isrc; 416 vector = intpin->io_vector; 417 418 if(bootverbose) 419 printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector); 420 421 /** 422 * Notify the hypervisor of our irq. 423 */ 424 setup_args.vector = vector; 425 for(i = 0; i < 2; i++) { 426 setup_args.page_buffers[i] = 427 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); 428 if (setup_args.page_buffers[i] == NULL) { 429 KASSERT(setup_args.page_buffers[i] != NULL, 430 ("Error VMBUS: malloc failed!")); 431 if (i > 0) 432 free(setup_args.page_buffers[0], M_DEVBUF); 433 goto cleanup4; 434 } 435 } 436 437 /* only CPU #0 supported at this time */ 438 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); 439 440 /* 441 * Connect to VMBus in the root partition 442 */ 443 ret = hv_vmbus_connect(); 444 445 if (ret != 0) 446 goto cleanup4; 447 448 hv_vmbus_request_channel_offers(); 449 return (ret); 450 451 cleanup4: 452 453 /* 454 * remove swi, bus and intr resource 455 */ 456 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); 457 458 cleanup3: 459 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); 460 461 cleanup2: 462 swi_remove(event_swintr); 463 464 cleanup1: 465 swi_remove(msg_swintr); 466 467 cleanup: 468 hv_vmbus_cleanup(); 469 470 return (ret); 471} 472 473static int 474vmbus_attach(device_t dev) 475{ 476 if(bootverbose) 477 device_printf(dev, "VMBUS: attach dev: %p\n", dev); 478 vmbus_devp = dev; 479 480 /* 481 * If the system has already booted and thread 482 * scheduling is possible indicated by the global 483 * cold set to zero, we just call the driver 484 * initialization directly. 485 */ 486 if (!cold) 487 vmbus_bus_init(); 488 489 return (0); 490} 491 492static void 493vmbus_init(void) 494{ 495 if (vm_guest != VM_GUEST_HV) 496 return; 497 498 /* 499 * If the system has already booted and thread 500 * scheduling is possible, as indicated by the 501 * global cold set to zero, we just call the driver 502 * initialization directly. 503 */ 504 if (!cold) 505 vmbus_bus_init(); 506} 507 508static void 509vmbus_bus_exit(void) 510{ 511 int i; 512 513 hv_vmbus_release_unattached_channels(); 514 hv_vmbus_disconnect(); 515 516 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); 517 518 for(i = 0; i < 2; i++) { 519 if (setup_args.page_buffers[i] != 0) 520 free(setup_args.page_buffers[i], M_DEVBUF); 521 } 522 523 hv_vmbus_cleanup(); 524 525 /* remove swi, bus and intr resource */ 526 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); 527 528 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); 529 530 swi_remove(msg_swintr); 531 swi_remove(event_swintr); 532 533 return; 534} 535 536static void 537vmbus_exit(void) 538{ 539 vmbus_bus_exit(); 540} 541 542static int 543vmbus_detach(device_t dev) 544{ 545 vmbus_exit(); 546 return (0); 547} 548 549static void 550vmbus_mod_load(void) 551{ 552 if(bootverbose) 553 printf("VMBUS: load\n"); 554} 555 556static void 557vmbus_mod_unload(void) 558{ 559 if(bootverbose) 560 printf("VMBUS: unload\n"); 561} 562 563static int 564vmbus_modevent(module_t mod, int what, void *arg) 565{ 566 switch (what) { 567 568 case MOD_LOAD: 569 vmbus_mod_load(); 570 break; 571 case MOD_UNLOAD: 572 vmbus_mod_unload(); 573 break; 574 } 575 576 return (0); 577} 578 579static device_method_t vmbus_methods[] = { 580 /** Device interface */ 581 DEVMETHOD(device_identify, vmbus_identify), 582 DEVMETHOD(device_probe, vmbus_probe), 583 DEVMETHOD(device_attach, vmbus_attach), 584 DEVMETHOD(device_detach, vmbus_detach), 585 DEVMETHOD(device_shutdown, bus_generic_shutdown), 586 DEVMETHOD(device_suspend, bus_generic_suspend), 587 DEVMETHOD(device_resume, bus_generic_resume), 588 589 /** Bus interface */ 590 DEVMETHOD(bus_add_child, bus_generic_add_child), 591 DEVMETHOD(bus_print_child, bus_generic_print_child), 592 DEVMETHOD(bus_read_ivar, vmbus_read_ivar), 593 DEVMETHOD(bus_write_ivar, vmbus_write_ivar), 594 595 { 0, 0 } }; 596 597static char driver_name[] = "vmbus"; 598static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; 599 600 601devclass_t vmbus_devclass; 602 603DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); 604MODULE_VERSION(vmbus,1); 605 606/* TODO: We want to be earlier than SI_SUB_VFS */ 607SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL); 608 609