hv_vmbus_drv_freebsd.c revision 256281
1/*- 2 * Copyright (c) 2009-2012 Microsoft Corp. 3 * Copyright (c) 2012 NetApp Inc. 4 * Copyright (c) 2012 Citrix Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * VM Bus Driver Implementation 31 */ 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c 256276 2013-10-10 16:25:53Z dim $"); 34 35#include <sys/param.h> 36#include <sys/bus.h> 37#include <sys/kernel.h> 38#include <sys/lock.h> 39#include <sys/malloc.h> 40#include <sys/module.h> 41#include <sys/sysctl.h> 42#include <sys/syslog.h> 43#include <sys/systm.h> 44#include <sys/rtprio.h> 45#include <sys/interrupt.h> 46#include <sys/sx.h> 47#include <sys/taskqueue.h> 48#include <sys/mutex.h> 49#include <sys/smp.h> 50 51#include <machine/resource.h> 52#include <sys/rman.h> 53 54#include <machine/stdarg.h> 55#include <machine/intr_machdep.h> 56#include <sys/pcpu.h> 57 58#include "hv_vmbus_priv.h" 59 60 61#define VMBUS_IRQ 0x5 62 63static struct intr_event *hv_msg_intr_event; 64static struct intr_event *hv_event_intr_event; 65static void *msg_swintr; 66static void *event_swintr; 67static device_t vmbus_devp; 68static void *vmbus_cookiep; 69static int vmbus_rid; 70struct resource *intr_res; 71static int vmbus_irq = VMBUS_IRQ; 72static int vmbus_inited; 73static hv_setup_args setup_args; /* only CPU 0 supported at this time */ 74 75/** 76 * @brief Software interrupt thread routine to handle channel messages from 77 * the hypervisor. 78 */ 79static void 80vmbus_msg_swintr(void *dummy) 81{ 82 int cpu; 83 void* page_addr; 84 hv_vmbus_message* msg; 85 hv_vmbus_message* copied; 86 87 cpu = PCPU_GET(cpuid); 88 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 89 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 90 91 for (;;) { 92 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { 93 break; /* no message */ 94 } else { 95 copied = malloc(sizeof(hv_vmbus_message), 96 M_DEVBUF, M_NOWAIT); 97 KASSERT(copied != NULL, 98 ("Error VMBUS: malloc failed to allocate" 99 " hv_vmbus_message!")); 100 if (copied == NULL) 101 continue; 102 memcpy(copied, msg, sizeof(hv_vmbus_message)); 103 hv_queue_work_item(hv_vmbus_g_connection.work_queue, 104 hv_vmbus_on_channel_message, copied); 105 } 106 107 msg->header.message_type = HV_MESSAGE_TYPE_NONE; 108 109 /* 110 * Make sure the write to message_type (ie set to 111 * HV_MESSAGE_TYPE_NONE) happens before we read the 112 * message_pending and EOMing. Otherwise, the EOMing will 113 * not deliver any more messages 114 * since there is no empty slot 115 */ 116 wmb(); 117 118 if (msg->header.message_flags.u.message_pending) { 119 /* 120 * This will cause message queue rescan to possibly 121 * deliver another msg from the hypervisor 122 */ 123 wrmsr(HV_X64_MSR_EOM, 0); 124 } 125 } 126} 127 128/** 129 * @brief Interrupt filter routine for VMBUS. 130 * 131 * The purpose of this routine is to determine the type of VMBUS protocol 132 * message to process - an event or a channel message. 133 * As this is an interrupt filter routine, the function runs in a very 134 * restricted envinronment. From the manpage for bus_setup_intr(9) 135 * 136 * In this restricted environment, care must be taken to account for all 137 * races. A careful analysis of races should be done as well. It is gener- 138 * ally cheaper to take an extra interrupt, for example, than to protect 139 * variables with spinlocks. Read, modify, write cycles of hardware regis- 140 * ters need to be carefully analyzed if other threads are accessing the 141 * same registers. 142 */ 143static int 144hv_vmbus_isr(void *unused) 145{ 146 int cpu; 147 hv_vmbus_message* msg; 148 hv_vmbus_synic_event_flags* event; 149 void* page_addr; 150 151 cpu = PCPU_GET(cpuid); 152 /* (Temporary limit) */ 153 KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero")); 154 155 /* 156 * The Windows team has advised that we check for events 157 * before checking for messages. This is the way they do it 158 * in Windows when running as a guest in Hyper-V 159 */ 160 161 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; 162 event = (hv_vmbus_synic_event_flags*) 163 page_addr + HV_VMBUS_MESSAGE_SINT; 164 165 /* Since we are a child, we only need to check bit 0 */ 166 if (synch_test_and_clear_bit(0, &event->flags32[0])) { 167 swi_sched(event_swintr, 0); 168 } 169 170 /* Check if there are actual msgs to be process */ 171 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 172 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 173 174 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { 175 swi_sched(msg_swintr, 0); 176 } 177 178 return FILTER_HANDLED; 179} 180 181static int 182vmbus_read_ivar( 183 device_t dev, 184 device_t child, 185 int index, 186 uintptr_t* result) 187{ 188 struct hv_device *child_dev_ctx = device_get_ivars(child); 189 190 switch (index) { 191 192 case HV_VMBUS_IVAR_TYPE: 193 *result = (uintptr_t) &child_dev_ctx->class_id; 194 return (0); 195 case HV_VMBUS_IVAR_INSTANCE: 196 *result = (uintptr_t) &child_dev_ctx->device_id; 197 return (0); 198 case HV_VMBUS_IVAR_DEVCTX: 199 *result = (uintptr_t) child_dev_ctx; 200 return (0); 201 case HV_VMBUS_IVAR_NODE: 202 *result = (uintptr_t) child_dev_ctx->device; 203 return (0); 204 } 205 return (ENOENT); 206} 207 208static int 209vmbus_write_ivar( 210 device_t dev, 211 device_t child, 212 int index, 213 uintptr_t value) 214{ 215 switch (index) { 216 217 case HV_VMBUS_IVAR_TYPE: 218 case HV_VMBUS_IVAR_INSTANCE: 219 case HV_VMBUS_IVAR_DEVCTX: 220 case HV_VMBUS_IVAR_NODE: 221 /* read-only */ 222 return (EINVAL); 223 } 224 return (ENOENT); 225} 226 227struct hv_device* 228hv_vmbus_child_device_create( 229 hv_guid type, 230 hv_guid instance, 231 hv_vmbus_channel* channel) 232{ 233 hv_device* child_dev; 234 235 /* 236 * Allocate the new child device 237 */ 238 child_dev = malloc(sizeof(hv_device), M_DEVBUF, 239 M_NOWAIT | M_ZERO); 240 KASSERT(child_dev != NULL, 241 ("Error VMBUS: malloc failed to allocate hv_device!")); 242 243 if (child_dev == NULL) 244 return (NULL); 245 246 child_dev->channel = channel; 247 memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); 248 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); 249 250 return (child_dev); 251} 252 253static void 254print_dev_guid(struct hv_device *dev) 255{ 256 int i; 257 unsigned char guid_name[100]; 258 for (i = 0; i < 32; i += 2) 259 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); 260 if(bootverbose) 261 printf("VMBUS: Class ID: %s\n", guid_name); 262} 263 264int 265hv_vmbus_child_device_register(struct hv_device *child_dev) 266{ 267 device_t child; 268 int ret = 0; 269 270 print_dev_guid(child_dev); 271 272 273 child = device_add_child(vmbus_devp, NULL, -1); 274 child_dev->device = child; 275 device_set_ivars(child, child_dev); 276 277 mtx_lock(&Giant); 278 ret = device_probe_and_attach(child); 279 mtx_unlock(&Giant); 280 281 return (0); 282} 283 284int 285hv_vmbus_child_device_unregister(struct hv_device *child_dev) 286{ 287 int ret = 0; 288 /* 289 * XXXKYS: Ensure that this is the opposite of 290 * device_add_child() 291 */ 292 mtx_lock(&Giant); 293 ret = device_delete_child(vmbus_devp, child_dev->device); 294 mtx_unlock(&Giant); 295 return(ret); 296} 297 298static void vmbus_identify(driver_t *driver, device_t parent) { 299 BUS_ADD_CHILD(parent, 0, "vmbus", 0); 300 if (device_find_child(parent, "vmbus", 0) == NULL) { 301 BUS_ADD_CHILD(parent, 0, "vmbus", 0); 302 } 303} 304 305static int 306vmbus_probe(device_t dev) { 307 if(bootverbose) 308 device_printf(dev, "VMBUS: probe\n"); 309 310 if (!hv_vmbus_query_hypervisor_presence()) 311 return (ENXIO); 312 313 device_set_desc(dev, "Vmbus Devices"); 314 315 return (0); 316} 317 318/** 319 * @brief Main vmbus driver initialization routine. 320 * 321 * Here, we 322 * - initialize the vmbus driver context 323 * - setup various driver entry points 324 * - invoke the vmbus hv main init routine 325 * - get the irq resource 326 * - invoke the vmbus to add the vmbus root device 327 * - setup the vmbus root device 328 * - retrieve the channel offers 329 */ 330static int 331vmbus_bus_init(void) 332{ 333 struct ioapic_intsrc { 334 struct intsrc io_intsrc; 335 u_int io_irq; 336 u_int io_intpin:8; 337 u_int io_vector:8; 338 u_int io_cpu:8; 339 u_int io_activehi:1; 340 u_int io_edgetrigger:1; 341 u_int io_masked:1; 342 int io_bus:4; 343 uint32_t io_lowreg; 344 }; 345 int i, ret; 346 unsigned int vector = 0; 347 struct intsrc *isrc; 348 struct ioapic_intsrc *intpin; 349 350 if (vmbus_inited) 351 return (0); 352 353 vmbus_inited = 1; 354 355 ret = hv_vmbus_init(); 356 357 if (ret) { 358 if(bootverbose) 359 printf("Error VMBUS: Hypervisor Initialization Failed!\n"); 360 return (ret); 361 } 362 363 ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr, 364 NULL, SWI_CLOCK, 0, &msg_swintr); 365 366 if (ret) 367 goto cleanup; 368 369 /* 370 * Message SW interrupt handler checks a per-CPU page and 371 * thus the thread needs to be bound to CPU-0 - which is where 372 * all interrupts are processed. 373 */ 374 ret = intr_event_bind(hv_msg_intr_event, 0); 375 376 if (ret) 377 goto cleanup1; 378 379 ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events, 380 NULL, SWI_CLOCK, 0, &event_swintr); 381 382 if (ret) 383 goto cleanup1; 384 385 intr_res = bus_alloc_resource(vmbus_devp, 386 SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE); 387 388 if (intr_res == NULL) { 389 ret = ENOMEM; /* XXXKYS: Need a better errno */ 390 goto cleanup2; 391 } 392 393 /* 394 * Setup interrupt filter handler 395 */ 396 ret = bus_setup_intr(vmbus_devp, intr_res, 397 INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL, 398 NULL, &vmbus_cookiep); 399 400 if (ret != 0) 401 goto cleanup3; 402 403 ret = bus_bind_intr(vmbus_devp, intr_res, 0); 404 if (ret != 0) 405 goto cleanup4; 406 407 isrc = intr_lookup_source(vmbus_irq); 408 if ((isrc == NULL) || (isrc->is_event == NULL)) { 409 ret = EINVAL; 410 goto cleanup4; 411 } 412 413 /* vector = isrc->is_event->ie_vector; */ 414 intpin = (struct ioapic_intsrc *)isrc; 415 vector = intpin->io_vector; 416 417 if(bootverbose) 418 printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector); 419 420 /** 421 * Notify the hypervisor of our irq. 422 */ 423 setup_args.vector = vector; 424 for(i = 0; i < 2; i++) { 425 setup_args.page_buffers[i] = 426 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); 427 if (setup_args.page_buffers[i] == NULL) { 428 KASSERT(setup_args.page_buffers[i] != NULL, 429 ("Error VMBUS: malloc failed!")); 430 if (i > 0) 431 free(setup_args.page_buffers[0], M_DEVBUF); 432 goto cleanup4; 433 } 434 } 435 436 /* only CPU #0 supported at this time */ 437 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); 438 439 /* 440 * Connect to VMBus in the root partition 441 */ 442 ret = hv_vmbus_connect(); 443 444 if (ret != 0) 445 goto cleanup4; 446 447 hv_vmbus_request_channel_offers(); 448 return (ret); 449 450 cleanup4: 451 452 /* 453 * remove swi, bus and intr resource 454 */ 455 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); 456 457 cleanup3: 458 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); 459 460 cleanup2: 461 swi_remove(event_swintr); 462 463 cleanup1: 464 swi_remove(msg_swintr); 465 466 cleanup: 467 hv_vmbus_cleanup(); 468 469 return (ret); 470} 471 472static int 473vmbus_attach(device_t dev) 474{ 475 if(bootverbose) 476 device_printf(dev, "VMBUS: attach dev: %p\n", dev); 477 vmbus_devp = dev; 478 479 /* 480 * If the system has already booted and thread 481 * scheduling is possible indicated by the global 482 * cold set to zero, we just call the driver 483 * initialization directly. 484 */ 485 if (!cold) 486 vmbus_bus_init(); 487 488 return (0); 489} 490 491static void 492vmbus_init(void) 493{ 494 /* 495 * If the system has already booted and thread 496 * scheduling is possible indicated by the global 497 * cold set to zero, we just call the driver 498 * initialization directly. 499 */ 500 if (!cold) 501 vmbus_bus_init(); 502} 503 504static void 505vmbus_bus_exit(void) 506{ 507 int i; 508 509 hv_vmbus_release_unattached_channels(); 510 hv_vmbus_disconnect(); 511 512 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); 513 514 for(i = 0; i < 2; i++) { 515 if (setup_args.page_buffers[i] != 0) 516 free(setup_args.page_buffers[i], M_DEVBUF); 517 } 518 519 hv_vmbus_cleanup(); 520 521 /* remove swi, bus and intr resource */ 522 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); 523 524 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); 525 526 swi_remove(msg_swintr); 527 swi_remove(event_swintr); 528 529 return; 530} 531 532static void 533vmbus_exit(void) 534{ 535 vmbus_bus_exit(); 536} 537 538static int 539vmbus_detach(device_t dev) 540{ 541 vmbus_exit(); 542 return (0); 543} 544 545static void 546vmbus_mod_load(void) 547{ 548 if(bootverbose) 549 printf("VMBUS: load\n"); 550} 551 552static void 553vmbus_mod_unload(void) 554{ 555 if(bootverbose) 556 printf("VMBUS: unload\n"); 557} 558 559static int 560vmbus_modevent(module_t mod, int what, void *arg) 561{ 562 switch (what) { 563 564 case MOD_LOAD: 565 vmbus_mod_load(); 566 break; 567 case MOD_UNLOAD: 568 vmbus_mod_unload(); 569 break; 570 } 571 572 return (0); 573} 574 575static device_method_t vmbus_methods[] = { 576 /** Device interface */ 577 DEVMETHOD(device_identify, vmbus_identify), 578 DEVMETHOD(device_probe, vmbus_probe), 579 DEVMETHOD(device_attach, vmbus_attach), 580 DEVMETHOD(device_detach, vmbus_detach), 581 DEVMETHOD(device_shutdown, bus_generic_shutdown), 582 DEVMETHOD(device_suspend, bus_generic_suspend), 583 DEVMETHOD(device_resume, bus_generic_resume), 584 585 /** Bus interface */ 586 DEVMETHOD(bus_add_child, bus_generic_add_child), 587 DEVMETHOD(bus_print_child, bus_generic_print_child), 588 DEVMETHOD(bus_read_ivar, vmbus_read_ivar), 589 DEVMETHOD(bus_write_ivar, vmbus_write_ivar), 590 591 { 0, 0 } }; 592 593static char driver_name[] = "vmbus"; 594static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; 595 596 597devclass_t vmbus_devclass; 598 599DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); 600MODULE_VERSION(vmbus,1); 601 602/* TODO: We want to be earlier than SI_SUB_VFS */ 603SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL); 604 605