hv_vmbus_drv_freebsd.c revision 255414
1/*- 2 * Copyright (c) 2009-2012 Microsoft Corp. 3 * Copyright (c) 2012 NetApp Inc. 4 * Copyright (c) 2012 Citrix Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * VM Bus Driver Implementation 31 */ 32 33#include <sys/param.h> 34#include <sys/bus.h> 35#include <sys/kernel.h> 36#include <sys/lock.h> 37#include <sys/malloc.h> 38#include <sys/module.h> 39#include <sys/sysctl.h> 40#include <sys/syslog.h> 41#include <sys/systm.h> 42#include <sys/rtprio.h> 43#include <sys/interrupt.h> 44#include <sys/sx.h> 45#include <sys/taskqueue.h> 46#include <sys/mutex.h> 47#include <sys/smp.h> 48 49#include <machine/resource.h> 50#include <sys/rman.h> 51 52#include <machine/stdarg.h> 53#include <machine/intr_machdep.h> 54#include <sys/pcpu.h> 55 56#include "hv_vmbus_priv.h" 57 58 59#define VMBUS_IRQ 0x5 60 61static struct intr_event *hv_msg_intr_event; 62static struct intr_event *hv_event_intr_event; 63static void *msg_swintr; 64static void *event_swintr; 65static device_t vmbus_devp; 66static void *vmbus_cookiep; 67static int vmbus_rid; 68struct resource *intr_res; 69static int vmbus_irq = VMBUS_IRQ; 70static int vmbus_inited; 71static hv_setup_args setup_args; /* only CPU 0 supported at this time */ 72 73/** 74 * @brief Software interrupt thread routine to handle channel messages from 75 * the hypervisor. 76 */ 77static void 78vmbus_msg_swintr(void *dummy) 79{ 80 int cpu; 81 void* page_addr; 82 hv_vmbus_message* msg; 83 hv_vmbus_message* copied; 84 85 cpu = PCPU_GET(cpuid); 86 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 87 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 88 89 for (;;) { 90 if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { 91 break; /* no message */ 92 } else { 93 copied = malloc(sizeof(hv_vmbus_message), 94 M_DEVBUF, M_NOWAIT); 95 KASSERT(copied != NULL, 96 ("Error VMBUS: malloc failed to allocate" 97 " hv_vmbus_message!")); 98 if (copied == NULL) 99 continue; 100 memcpy(copied, msg, sizeof(hv_vmbus_message)); 101 hv_queue_work_item(hv_vmbus_g_connection.work_queue, 102 hv_vmbus_on_channel_message, copied); 103 } 104 105 msg->header.message_type = HV_MESSAGE_TYPE_NONE; 106 107 /* 108 * Make sure the write to message_type (ie set to 109 * HV_MESSAGE_TYPE_NONE) happens before we read the 110 * message_pending and EOMing. Otherwise, the EOMing will 111 * not deliver any more messages 112 * since there is no empty slot 113 */ 114 wmb(); 115 116 if (msg->header.message_flags.message_pending) { 117 /* 118 * This will cause message queue rescan to possibly 119 * deliver another msg from the hypervisor 120 */ 121 wrmsr(HV_X64_MSR_EOM, 0); 122 } 123 } 124} 125 126/** 127 * @brief Interrupt filter routine for VMBUS. 128 * 129 * The purpose of this routine is to determine the type of VMBUS protocol 130 * message to process - an event or a channel message. 131 * As this is an interrupt filter routine, the function runs in a very 132 * restricted envinronment. From the manpage for bus_setup_intr(9) 133 * 134 * In this restricted environment, care must be taken to account for all 135 * races. A careful analysis of races should be done as well. It is gener- 136 * ally cheaper to take an extra interrupt, for example, than to protect 137 * variables with spinlocks. Read, modify, write cycles of hardware regis- 138 * ters need to be carefully analyzed if other threads are accessing the 139 * same registers. 140 */ 141static int 142hv_vmbus_isr(void *unused) 143{ 144 int cpu; 145 hv_vmbus_message* msg; 146 hv_vmbus_synic_event_flags* event; 147 void* page_addr; 148 149 cpu = PCPU_GET(cpuid); 150 /* (Temporary limit) */ 151 KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero")); 152 153 /* 154 * The Windows team has advised that we check for events 155 * before checking for messages. This is the way they do it 156 * in Windows when running as a guest in Hyper-V 157 */ 158 159 page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; 160 event = (hv_vmbus_synic_event_flags*) 161 page_addr + HV_VMBUS_MESSAGE_SINT; 162 163 /* Since we are a child, we only need to check bit 0 */ 164 if (synch_test_and_clear_bit(0, &event->flags32[0])) { 165 swi_sched(event_swintr, 0); 166 } 167 168 /* Check if there are actual msgs to be process */ 169 page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; 170 msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; 171 172 if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { 173 swi_sched(msg_swintr, 0); 174 } 175 176 return FILTER_HANDLED; 177} 178 179static int 180vmbus_read_ivar( 181 device_t dev, 182 device_t child, 183 int index, 184 uintptr_t* result) 185{ 186 struct hv_device *child_dev_ctx = device_get_ivars(child); 187 188 switch (index) { 189 190 case HV_VMBUS_IVAR_TYPE: 191 *result = (uintptr_t) &child_dev_ctx->class_id; 192 return (0); 193 case HV_VMBUS_IVAR_INSTANCE: 194 *result = (uintptr_t) &child_dev_ctx->device_id; 195 return (0); 196 case HV_VMBUS_IVAR_DEVCTX: 197 *result = (uintptr_t) child_dev_ctx; 198 return (0); 199 case HV_VMBUS_IVAR_NODE: 200 *result = (uintptr_t) child_dev_ctx->device; 201 return (0); 202 } 203 return (ENOENT); 204} 205 206static int 207vmbus_write_ivar( 208 device_t dev, 209 device_t child, 210 int index, 211 uintptr_t value) 212{ 213 switch (index) { 214 215 case HV_VMBUS_IVAR_TYPE: 216 case HV_VMBUS_IVAR_INSTANCE: 217 case HV_VMBUS_IVAR_DEVCTX: 218 case HV_VMBUS_IVAR_NODE: 219 /* read-only */ 220 return (EINVAL); 221 } 222 return (ENOENT); 223} 224 225struct hv_device* 226hv_vmbus_child_device_create( 227 hv_guid type, 228 hv_guid instance, 229 hv_vmbus_channel* channel) 230{ 231 hv_device* child_dev; 232 233 /* 234 * Allocate the new child device 235 */ 236 child_dev = malloc(sizeof(hv_device), M_DEVBUF, 237 M_NOWAIT | M_ZERO); 238 KASSERT(child_dev != NULL, 239 ("Error VMBUS: malloc failed to allocate hv_device!")); 240 241 if (child_dev == NULL) 242 return (NULL); 243 244 child_dev->channel = channel; 245 memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); 246 memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); 247 248 return (child_dev); 249} 250 251static void 252print_dev_guid(struct hv_device *dev) 253{ 254 int i; 255 unsigned char guid_name[100]; 256 for (i = 0; i < 32; i += 2) 257 sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); 258 if(bootverbose) 259 printf("VMBUS: Class ID: %s\n", guid_name); 260} 261 262int 263hv_vmbus_child_device_register(struct hv_device *child_dev) 264{ 265 device_t child; 266 int ret = 0; 267 268 print_dev_guid(child_dev); 269 270 271 child = device_add_child(vmbus_devp, NULL, -1); 272 child_dev->device = child; 273 device_set_ivars(child, child_dev); 274 275 mtx_lock(&Giant); 276 ret = device_probe_and_attach(child); 277 mtx_unlock(&Giant); 278 279 return (0); 280} 281 282int 283hv_vmbus_child_device_unregister(struct hv_device *child_dev) 284{ 285 int ret = 0; 286 /* 287 * XXXKYS: Ensure that this is the opposite of 288 * device_add_child() 289 */ 290 mtx_lock(&Giant); 291 ret = device_delete_child(vmbus_devp, child_dev->device); 292 mtx_unlock(&Giant); 293 return(ret); 294} 295 296static void vmbus_identify(driver_t *driver, device_t parent) { 297 BUS_ADD_CHILD(parent, 0, "vmbus", 0); 298 if (device_find_child(parent, "vmbus", 0) == NULL) { 299 BUS_ADD_CHILD(parent, 0, "vmbus", 0); 300 } 301} 302 303static int 304vmbus_probe(device_t dev) { 305 if(bootverbose) 306 device_printf(dev, "VMBUS: probe\n"); 307 308 if (!hv_vmbus_query_hypervisor_presence()) 309 return (ENXIO); 310 311 device_set_desc(dev, "Vmbus Devices"); 312 313 return (0); 314} 315 316/** 317 * @brief Main vmbus driver initialization routine. 318 * 319 * Here, we 320 * - initialize the vmbus driver context 321 * - setup various driver entry points 322 * - invoke the vmbus hv main init routine 323 * - get the irq resource 324 * - invoke the vmbus to add the vmbus root device 325 * - setup the vmbus root device 326 * - retrieve the channel offers 327 */ 328static int 329vmbus_bus_init(void) 330{ 331 struct ioapic_intsrc { 332 struct intsrc io_intsrc; 333 u_int io_irq; 334 u_int io_intpin:8; 335 u_int io_vector:8; 336 u_int io_cpu:8; 337 u_int io_activehi:1; 338 u_int io_edgetrigger:1; 339 u_int io_masked:1; 340 int io_bus:4; 341 uint32_t io_lowreg; 342 }; 343 int i, ret; 344 unsigned int vector = 0; 345 struct intsrc *isrc; 346 struct ioapic_intsrc *intpin; 347 348 if (vmbus_inited) 349 return (0); 350 351 vmbus_inited = 1; 352 353 ret = hv_vmbus_init(); 354 355 if (ret) { 356 if(bootverbose) 357 printf("Error VMBUS: Hypervisor Initialization Failed!\n"); 358 return (ret); 359 } 360 361 ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr, 362 NULL, SWI_CLOCK, 0, &msg_swintr); 363 364 if (ret) 365 goto cleanup; 366 367 /* 368 * Message SW interrupt handler checks a per-CPU page and 369 * thus the thread needs to be bound to CPU-0 - which is where 370 * all interrupts are processed. 371 */ 372 ret = intr_event_bind(hv_msg_intr_event, 0); 373 374 if (ret) 375 goto cleanup1; 376 377 ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events, 378 NULL, SWI_CLOCK, 0, &event_swintr); 379 380 if (ret) 381 goto cleanup1; 382 383 intr_res = bus_alloc_resource(vmbus_devp, 384 SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE); 385 386 if (intr_res == NULL) { 387 ret = ENOMEM; /* XXXKYS: Need a better errno */ 388 goto cleanup2; 389 } 390 391 /* 392 * Setup interrupt filter handler 393 */ 394 ret = bus_setup_intr(vmbus_devp, intr_res, 395 INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL, 396 NULL, &vmbus_cookiep); 397 398 if (ret != 0) 399 goto cleanup3; 400 401 ret = bus_bind_intr(vmbus_devp, intr_res, 0); 402 if (ret != 0) 403 goto cleanup4; 404 405 isrc = intr_lookup_source(vmbus_irq); 406 if ((isrc == NULL) || (isrc->is_event == NULL)) { 407 ret = EINVAL; 408 goto cleanup4; 409 } 410 411 /* vector = isrc->is_event->ie_vector; */ 412 intpin = (struct ioapic_intsrc *)isrc; 413 vector = intpin->io_vector; 414 415 if(bootverbose) 416 printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector); 417 418 /** 419 * Notify the hypervisor of our irq. 420 */ 421 setup_args.vector = vector; 422 for(i = 0; i < 2; i++) { 423 setup_args.page_buffers[i] = 424 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); 425 if (setup_args.page_buffers[i] == NULL) { 426 KASSERT(setup_args.page_buffers[i] != NULL, 427 ("Error VMBUS: malloc failed!")); 428 if (i > 0) 429 free(setup_args.page_buffers[0], M_DEVBUF); 430 goto cleanup4; 431 } 432 } 433 434 /* only CPU #0 supported at this time */ 435 smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); 436 437 /* 438 * Connect to VMBus in the root partition 439 */ 440 ret = hv_vmbus_connect(); 441 442 if (ret != 0) 443 goto cleanup4; 444 445 hv_vmbus_request_channel_offers(); 446 return (ret); 447 448 cleanup4: 449 450 /* 451 * remove swi, bus and intr resource 452 */ 453 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); 454 455 cleanup3: 456 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); 457 458 cleanup2: 459 swi_remove(event_swintr); 460 461 cleanup1: 462 swi_remove(msg_swintr); 463 464 cleanup: 465 hv_vmbus_cleanup(); 466 467 return (ret); 468} 469 470static int 471vmbus_attach(device_t dev) 472{ 473 if(bootverbose) 474 device_printf(dev, "VMBUS: attach dev: %p\n", dev); 475 vmbus_devp = dev; 476 477 /* 478 * If the system has already booted and thread 479 * scheduling is possible indicated by the global 480 * cold set to zero, we just call the driver 481 * initialization directly. 482 */ 483 if (!cold) 484 vmbus_bus_init(); 485 486 return (0); 487} 488 489static void 490vmbus_init(void) 491{ 492 /* 493 * If the system has already booted and thread 494 * scheduling is possible indicated by the global 495 * cold set to zero, we just call the driver 496 * initialization directly. 497 */ 498 if (!cold) 499 vmbus_bus_init(); 500} 501 502static void 503vmbus_bus_exit(void) 504{ 505 int i; 506 507 hv_vmbus_release_unattached_channels(); 508 hv_vmbus_disconnect(); 509 510 smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); 511 512 for(i = 0; i < 2; i++) { 513 if (setup_args.page_buffers[i] != 0) 514 free(setup_args.page_buffers[i], M_DEVBUF); 515 } 516 517 hv_vmbus_cleanup(); 518 519 /* remove swi, bus and intr resource */ 520 bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); 521 522 bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); 523 524 swi_remove(msg_swintr); 525 swi_remove(event_swintr); 526 527 return; 528} 529 530static void 531vmbus_exit(void) 532{ 533 vmbus_bus_exit(); 534} 535 536static int 537vmbus_detach(device_t dev) 538{ 539 vmbus_exit(); 540 return (0); 541} 542 543static void 544vmbus_mod_load(void) 545{ 546 if(bootverbose) 547 printf("VMBUS: load\n"); 548} 549 550static void 551vmbus_mod_unload(void) 552{ 553 if(bootverbose) 554 printf("VMBUS: unload\n"); 555} 556 557static int 558vmbus_modevent(module_t mod, int what, void *arg) 559{ 560 switch (what) { 561 562 case MOD_LOAD: 563 vmbus_mod_load(); 564 break; 565 case MOD_UNLOAD: 566 vmbus_mod_unload(); 567 break; 568 } 569 570 return (0); 571} 572 573static device_method_t vmbus_methods[] = { 574 /** Device interface */ 575 DEVMETHOD(device_identify, vmbus_identify), 576 DEVMETHOD(device_probe, vmbus_probe), 577 DEVMETHOD(device_attach, vmbus_attach), 578 DEVMETHOD(device_detach, vmbus_detach), 579 DEVMETHOD(device_shutdown, bus_generic_shutdown), 580 DEVMETHOD(device_suspend, bus_generic_suspend), 581 DEVMETHOD(device_resume, bus_generic_resume), 582 583 /** Bus interface */ 584 DEVMETHOD(bus_add_child, bus_generic_add_child), 585 DEVMETHOD(bus_print_child, bus_generic_print_child), 586 DEVMETHOD(bus_read_ivar, vmbus_read_ivar), 587 DEVMETHOD(bus_write_ivar, vmbus_write_ivar), 588 589 { 0, 0 } }; 590 591static char driver_name[] = "vmbus"; 592static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; 593 594 595devclass_t vmbus_devclass; 596 597DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); 598MODULE_VERSION(vmbus,1); 599 600/* TODO: We want to be earlier than SI_SUB_VFS */ 601SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL); 602 603