hv_connection.c revision 300105
1250199Sgrehan/*- 2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp. 3250199Sgrehan * Copyright (c) 2012 NetApp Inc. 4250199Sgrehan * Copyright (c) 2012 Citrix Inc. 5250199Sgrehan * All rights reserved. 6250199Sgrehan * 7250199Sgrehan * Redistribution and use in source and binary forms, with or without 8250199Sgrehan * modification, are permitted provided that the following conditions 9250199Sgrehan * are met: 10250199Sgrehan * 1. Redistributions of source code must retain the above copyright 11250199Sgrehan * notice unmodified, this list of conditions, and the following 12250199Sgrehan * disclaimer. 13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 14250199Sgrehan * notice, this list of conditions and the following disclaimer in the 15250199Sgrehan * documentation and/or other materials provided with the distribution. 16250199Sgrehan * 17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27250199Sgrehan */ 28250199Sgrehan 29250199Sgrehan#include <sys/param.h> 30296028Ssephe#include <sys/kernel.h> 31250199Sgrehan#include <sys/malloc.h> 32250199Sgrehan#include <sys/systm.h> 33250199Sgrehan#include <sys/lock.h> 34250199Sgrehan#include <sys/mutex.h> 35250199Sgrehan#include <machine/bus.h> 36299927Ssephe#include <machine/atomic.h> 37250199Sgrehan#include <vm/vm.h> 38250199Sgrehan#include <vm/vm_param.h> 39250199Sgrehan#include <vm/pmap.h> 40250199Sgrehan 41300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h> 42300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h> 43250199Sgrehan 44250199Sgrehan/* 45250199Sgrehan * Globals 46250199Sgrehan */ 47250199Sgrehanhv_vmbus_connection hv_vmbus_g_connection = 48250199Sgrehan { .connect_state = HV_DISCONNECTED, 49250199Sgrehan .next_gpadl_handle = 0xE1E10, }; 50250199Sgrehan 51282212Swhuuint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008; 52282212Swhu 53282212Swhustatic uint32_t 54282212Swhuhv_vmbus_get_next_version(uint32_t current_ver) 55282212Swhu{ 56282212Swhu switch (current_ver) { 57282212Swhu case (HV_VMBUS_VERSION_WIN7): 58282212Swhu return(HV_VMBUS_VERSION_WS2008); 59282212Swhu 60282212Swhu case (HV_VMBUS_VERSION_WIN8): 61282212Swhu return(HV_VMBUS_VERSION_WIN7); 62282212Swhu 63282212Swhu case (HV_VMBUS_VERSION_WIN8_1): 64282212Swhu return(HV_VMBUS_VERSION_WIN8); 65282212Swhu 66282212Swhu case (HV_VMBUS_VERSION_WS2008): 67282212Swhu default: 68282212Swhu return(HV_VMBUS_VERSION_INVALID); 69282212Swhu } 70282212Swhu} 71282212Swhu 72250199Sgrehan/** 73282212Swhu * Negotiate the highest supported hypervisor version. 74282212Swhu */ 75282212Swhustatic int 76282212Swhuhv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, 77282212Swhu uint32_t version) 78282212Swhu{ 79282212Swhu int ret = 0; 80282212Swhu hv_vmbus_channel_initiate_contact *msg; 81282212Swhu 82282212Swhu sema_init(&msg_info->wait_sema, 0, "Msg Info Sema"); 83282212Swhu msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg; 84282212Swhu 85282212Swhu msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; 86282212Swhu msg->vmbus_version_requested = version; 87282212Swhu 88282212Swhu msg->interrupt_page = hv_get_phys_addr( 89282212Swhu hv_vmbus_g_connection.interrupt_page); 90282212Swhu 91282212Swhu msg->monitor_page_1 = hv_get_phys_addr( 92295309Ssephe hv_vmbus_g_connection.monitor_page_1); 93282212Swhu 94295308Ssephe msg->monitor_page_2 = hv_get_phys_addr( 95295309Ssephe hv_vmbus_g_connection.monitor_page_2); 96282212Swhu 97282212Swhu /** 98282212Swhu * Add to list before we send the request since we may receive the 99282212Swhu * response before returning from this routine 100282212Swhu */ 101297635Ssephe mtx_lock(&hv_vmbus_g_connection.channel_msg_lock); 102282212Swhu 103282212Swhu TAILQ_INSERT_TAIL( 104282212Swhu &hv_vmbus_g_connection.channel_msg_anchor, 105282212Swhu msg_info, 106282212Swhu msg_list_entry); 107282212Swhu 108297635Ssephe mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock); 109282212Swhu 110282212Swhu ret = hv_vmbus_post_message( 111282212Swhu msg, 112282212Swhu sizeof(hv_vmbus_channel_initiate_contact)); 113282212Swhu 114282212Swhu if (ret != 0) { 115297635Ssephe mtx_lock(&hv_vmbus_g_connection.channel_msg_lock); 116282212Swhu TAILQ_REMOVE( 117282212Swhu &hv_vmbus_g_connection.channel_msg_anchor, 118282212Swhu msg_info, 119282212Swhu msg_list_entry); 120297635Ssephe mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock); 121282212Swhu return (ret); 122282212Swhu } 123282212Swhu 124282212Swhu /** 125282212Swhu * Wait for the connection response 126282212Swhu */ 127296028Ssephe ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */ 128282212Swhu 129297635Ssephe mtx_lock(&hv_vmbus_g_connection.channel_msg_lock); 130282212Swhu TAILQ_REMOVE( 131282212Swhu &hv_vmbus_g_connection.channel_msg_anchor, 132282212Swhu msg_info, 133282212Swhu msg_list_entry); 134297635Ssephe mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock); 135282212Swhu 136282212Swhu /** 137282212Swhu * Check if successful 138282212Swhu */ 139282212Swhu if (msg_info->response.version_response.version_supported) { 140282212Swhu hv_vmbus_g_connection.connect_state = HV_CONNECTED; 141282212Swhu } else { 142282212Swhu ret = ECONNREFUSED; 143282212Swhu } 144282212Swhu 145282212Swhu return (ret); 146282212Swhu} 147282212Swhu 148282212Swhu/** 149250199Sgrehan * Send a connect request on the partition service connection 150250199Sgrehan */ 151250199Sgrehanint 152250199Sgrehanhv_vmbus_connect(void) { 153250199Sgrehan int ret = 0; 154282212Swhu uint32_t version; 155250199Sgrehan hv_vmbus_channel_msg_info* msg_info = NULL; 156250199Sgrehan 157250199Sgrehan /** 158250199Sgrehan * Make sure we are not connecting or connected 159250199Sgrehan */ 160250199Sgrehan if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) { 161250199Sgrehan return (-1); 162250199Sgrehan } 163250199Sgrehan 164250199Sgrehan /** 165250199Sgrehan * Initialize the vmbus connection 166250199Sgrehan */ 167250199Sgrehan hv_vmbus_g_connection.connect_state = HV_CONNECTING; 168250199Sgrehan 169250199Sgrehan TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor); 170250199Sgrehan mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg", 171297635Ssephe NULL, MTX_DEF); 172250199Sgrehan 173250199Sgrehan TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor); 174250199Sgrehan mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel", 175282212Swhu NULL, MTX_DEF); 176250199Sgrehan 177250199Sgrehan /** 178250199Sgrehan * Setup the vmbus event connection for channel interrupt abstraction 179250199Sgrehan * stuff 180250199Sgrehan */ 181295309Ssephe hv_vmbus_g_connection.interrupt_page = malloc( 182250199Sgrehan PAGE_SIZE, M_DEVBUF, 183295309Ssephe M_WAITOK | M_ZERO); 184250199Sgrehan 185250199Sgrehan hv_vmbus_g_connection.recv_interrupt_page = 186250199Sgrehan hv_vmbus_g_connection.interrupt_page; 187250199Sgrehan 188250199Sgrehan hv_vmbus_g_connection.send_interrupt_page = 189250199Sgrehan ((uint8_t *) hv_vmbus_g_connection.interrupt_page + 190250199Sgrehan (PAGE_SIZE >> 1)); 191250199Sgrehan 192250199Sgrehan /** 193250199Sgrehan * Set up the monitor notification facility. The 1st page for 194250199Sgrehan * parent->child and the 2nd page for child->parent 195250199Sgrehan */ 196295309Ssephe hv_vmbus_g_connection.monitor_page_1 = malloc( 197295309Ssephe PAGE_SIZE, 198250199Sgrehan M_DEVBUF, 199295309Ssephe M_WAITOK | M_ZERO); 200295309Ssephe hv_vmbus_g_connection.monitor_page_2 = malloc( 201250199Sgrehan PAGE_SIZE, 202295309Ssephe M_DEVBUF, 203295309Ssephe M_WAITOK | M_ZERO); 204250199Sgrehan 205250199Sgrehan msg_info = (hv_vmbus_channel_msg_info*) 206250199Sgrehan malloc(sizeof(hv_vmbus_channel_msg_info) + 207250199Sgrehan sizeof(hv_vmbus_channel_initiate_contact), 208295308Ssephe M_DEVBUF, M_WAITOK | M_ZERO); 209250199Sgrehan 210294553Ssephe hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) * 211294553Ssephe HV_CHANNEL_MAX_COUNT, 212294553Ssephe M_DEVBUF, M_WAITOK | M_ZERO); 213282212Swhu /* 214282212Swhu * Find the highest vmbus version number we can support. 215250199Sgrehan */ 216282212Swhu version = HV_VMBUS_VERSION_CURRENT; 217250199Sgrehan 218282212Swhu do { 219282212Swhu ret = hv_vmbus_negotiate_version(msg_info, version); 220282212Swhu if (ret == EWOULDBLOCK) { 221282212Swhu /* 222282212Swhu * We timed out. 223282212Swhu */ 224282212Swhu goto cleanup; 225282212Swhu } 226250199Sgrehan 227282212Swhu if (hv_vmbus_g_connection.connect_state == HV_CONNECTED) 228282212Swhu break; 229250199Sgrehan 230282212Swhu version = hv_vmbus_get_next_version(version); 231282212Swhu } while (version != HV_VMBUS_VERSION_INVALID); 232250199Sgrehan 233282212Swhu hv_vmbus_protocal_version = version; 234282212Swhu if (bootverbose) 235293870Ssephe printf("VMBUS: Protocol Version: %d.%d\n", 236282212Swhu version >> 16, version & 0xFFFF); 237250199Sgrehan 238250199Sgrehan sema_destroy(&msg_info->wait_sema); 239250199Sgrehan free(msg_info, M_DEVBUF); 240250199Sgrehan 241250199Sgrehan return (0); 242250199Sgrehan 243250199Sgrehan /* 244250199Sgrehan * Cleanup after failure! 245250199Sgrehan */ 246250199Sgrehan cleanup: 247250199Sgrehan 248250199Sgrehan hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; 249250199Sgrehan 250250199Sgrehan mtx_destroy(&hv_vmbus_g_connection.channel_lock); 251250199Sgrehan mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); 252250199Sgrehan 253250199Sgrehan if (hv_vmbus_g_connection.interrupt_page != NULL) { 254295964Ssephe free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF); 255250199Sgrehan hv_vmbus_g_connection.interrupt_page = NULL; 256250199Sgrehan } 257250199Sgrehan 258295309Ssephe free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF); 259295309Ssephe free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF); 260250199Sgrehan 261250199Sgrehan if (msg_info) { 262250199Sgrehan sema_destroy(&msg_info->wait_sema); 263250199Sgrehan free(msg_info, M_DEVBUF); 264250199Sgrehan } 265250199Sgrehan 266294553Ssephe free(hv_vmbus_g_connection.channels, M_DEVBUF); 267250199Sgrehan return (ret); 268250199Sgrehan} 269250199Sgrehan 270250199Sgrehan/** 271250199Sgrehan * Send a disconnect request on the partition service connection 272250199Sgrehan */ 273250199Sgrehanint 274250199Sgrehanhv_vmbus_disconnect(void) { 275250199Sgrehan int ret = 0; 276295308Ssephe hv_vmbus_channel_unload msg; 277250199Sgrehan 278295308Ssephe msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD; 279250199Sgrehan 280295308Ssephe ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload)); 281250199Sgrehan 282295964Ssephe free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF); 283250199Sgrehan 284250199Sgrehan mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); 285250199Sgrehan 286294553Ssephe free(hv_vmbus_g_connection.channels, M_DEVBUF); 287250199Sgrehan hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; 288250199Sgrehan 289250199Sgrehan return (ret); 290250199Sgrehan} 291250199Sgrehan 292250199Sgrehan/** 293250199Sgrehan * Handler for events 294250199Sgrehan */ 295250199Sgrehanvoid 296294886Ssephehv_vmbus_on_events(int cpu) 297250199Sgrehan{ 298300102Ssephe unsigned long *intr_flags; 299300101Ssephe hv_vmbus_synic_event_flags *event; 300282212Swhu void *page_addr; 301300102Ssephe int flag_cnt, f; 302250199Sgrehan 303282212Swhu KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: " 304282212Swhu "cpu out of range!")); 305250199Sgrehan 306297908Ssephe page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; 307297908Ssephe event = (hv_vmbus_synic_event_flags *) 308297908Ssephe page_addr + HV_VMBUS_MESSAGE_SINT; 309282212Swhu if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || 310282212Swhu (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { 311300102Ssephe flag_cnt = HV_MAX_NUM_CHANNELS_SUPPORTED >> 312300101Ssephe HV_CHANNEL_ULONG_SHIFT; 313282212Swhu /* 314282212Swhu * receive size is 1/2 page and divide that by 4 bytes 315282212Swhu */ 316300101Ssephe if (atomic_testandclear_int(&event->flags32[0], 0)) 317300102Ssephe intr_flags = hv_vmbus_g_connection.recv_interrupt_page; 318300101Ssephe else 319299889Ssephe return; 320282212Swhu } else { 321282212Swhu /* 322282212Swhu * On Host with Win8 or above, the event page can be 323282212Swhu * checked directly to get the id of the channel 324282212Swhu * that has the pending interrupt. 325282212Swhu */ 326300102Ssephe flag_cnt = VMBUS_PCPU_GET(event_flag_cnt, cpu); 327300102Ssephe intr_flags = event->flagsul; 328282212Swhu } 329282212Swhu 330250199Sgrehan /* 331250199Sgrehan * Check events 332250199Sgrehan */ 333300102Ssephe for (f = 0; f < flag_cnt; f++) { 334300101Ssephe uint32_t rel_id_base; 335300105Ssephe unsigned long flags; 336300101Ssephe int bit; 337300101Ssephe 338300102Ssephe if (intr_flags[f] == 0) 339299892Ssephe continue; 340299892Ssephe 341300105Ssephe flags = atomic_swap_long(&intr_flags[f], 0); 342300102Ssephe rel_id_base = f << HV_CHANNEL_ULONG_SHIFT; 343299890Ssephe 344300105Ssephe while ((bit = ffsl(flags)) != 0) { 345300105Ssephe struct hv_vmbus_channel *channel; 346300105Ssephe uint32_t rel_id; 347299890Ssephe 348300105Ssephe --bit; /* NOTE: ffsl is 1-based */ 349300105Ssephe flags &= ~(1UL << bit); 350250199Sgrehan 351300105Ssephe rel_id = rel_id_base + bit; 352300105Ssephe channel = hv_vmbus_g_connection.channels[rel_id]; 353300105Ssephe 354300105Ssephe /* if channel is closed or closing */ 355300105Ssephe if (channel == NULL || channel->rxq == NULL) 356300105Ssephe continue; 357300105Ssephe 358300105Ssephe if (channel->batched_reading) 359300105Ssephe hv_ring_buffer_read_begin(&channel->inbound); 360300105Ssephe taskqueue_enqueue(channel->rxq, &channel->channel_task); 361300101Ssephe } 362250199Sgrehan } 363250199Sgrehan} 364250199Sgrehan 365250199Sgrehan/** 366250199Sgrehan * Send a msg on the vmbus's message connection 367250199Sgrehan */ 368297219Ssepheint hv_vmbus_post_message(void *buffer, size_t bufferLen) 369297219Ssephe{ 370250199Sgrehan hv_vmbus_connection_id connId; 371297219Ssephe sbintime_t time = SBT_1MS; 372297219Ssephe int retries; 373297219Ssephe int ret; 374250199Sgrehan 375297219Ssephe connId.as_uint32_t = 0; 376297219Ssephe connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID; 377250199Sgrehan 378297219Ssephe /* 379297219Ssephe * We retry to cope with transient failures caused by host side's 380297219Ssephe * insufficient resources. 20 times should suffice in practice. 381250199Sgrehan */ 382297219Ssephe for (retries = 0; retries < 20; retries++) { 383297219Ssephe ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, 384297219Ssephe bufferLen); 385297219Ssephe if (ret == HV_STATUS_SUCCESS) 386297219Ssephe return (0); 387297219Ssephe 388297219Ssephe pause_sbt("pstmsg", time, 0, C_HARDCLOCK); 389297219Ssephe if (time < SBT_1S * 2) 390297219Ssephe time *= 2; 391250199Sgrehan } 392250199Sgrehan 393297219Ssephe KASSERT(ret == HV_STATUS_SUCCESS, 394297219Ssephe ("Error VMBUS: Message Post Failed, ret=%d\n", ret)); 395250199Sgrehan 396297219Ssephe return (EAGAIN); 397250199Sgrehan} 398250199Sgrehan 399250199Sgrehan/** 400250199Sgrehan * Send an event notification to the parent 401250199Sgrehan */ 402250199Sgrehanint 403282212Swhuhv_vmbus_set_event(hv_vmbus_channel *channel) { 404250199Sgrehan int ret = 0; 405282212Swhu uint32_t child_rel_id = channel->offer_msg.child_rel_id; 406250199Sgrehan 407250199Sgrehan /* Each uint32_t represents 32 channels */ 408250199Sgrehan 409250199Sgrehan synch_set_bit(child_rel_id & 31, 410250199Sgrehan (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page 411250199Sgrehan + (child_rel_id >> 5)))); 412282212Swhu ret = hv_vmbus_signal_event(channel->signal_event_param); 413250199Sgrehan 414250199Sgrehan return (ret); 415250199Sgrehan} 416300102Ssephe 417300102Ssephevoid 418300102Ssephevmbus_on_channel_open(const struct hv_vmbus_channel *chan) 419300102Ssephe{ 420300102Ssephe volatile int *flag_cnt_ptr; 421300102Ssephe int flag_cnt; 422300102Ssephe 423300102Ssephe flag_cnt = (chan->offer_msg.child_rel_id / HV_CHANNEL_ULONG_LEN) + 1; 424300102Ssephe flag_cnt_ptr = VMBUS_PCPU_PTR(event_flag_cnt, chan->target_cpu); 425300102Ssephe 426300102Ssephe for (;;) { 427300102Ssephe int old_flag_cnt; 428300102Ssephe 429300102Ssephe old_flag_cnt = *flag_cnt_ptr; 430300102Ssephe if (old_flag_cnt >= flag_cnt) 431300102Ssephe break; 432300102Ssephe if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) { 433300102Ssephe if (bootverbose) { 434300102Ssephe printf("VMBUS: channel%u update " 435300102Ssephe "cpu%d flag_cnt to %d\n", 436300102Ssephe chan->offer_msg.child_rel_id, 437300102Ssephe chan->target_cpu, flag_cnt); 438300102Ssephe } 439300102Ssephe break; 440300102Ssephe } 441300102Ssephe } 442300102Ssephe} 443