hv_connection.c revision 300105
198524Sfenner/*-
298524Sfenner * Copyright (c) 2009-2012,2016 Microsoft Corp.
398524Sfenner * Copyright (c) 2012 NetApp Inc.
498524Sfenner * Copyright (c) 2012 Citrix Inc.
598524Sfenner * All rights reserved.
698524Sfenner *
798524Sfenner * Redistribution and use in source and binary forms, with or without
898524Sfenner * modification, are permitted provided that the following conditions
998524Sfenner * are met:
1098524Sfenner * 1. Redistributions of source code must retain the above copyright
1198524Sfenner *    notice unmodified, this list of conditions, and the following
1298524Sfenner *    disclaimer.
1398524Sfenner * 2. Redistributions in binary form must reproduce the above copyright
1498524Sfenner *    notice, this list of conditions and the following disclaimer in the
1598524Sfenner *    documentation and/or other materials provided with the distribution.
1698524Sfenner *
1798524Sfenner * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1898524Sfenner * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1998524Sfenner * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2098524Sfenner * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2198524Sfenner * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2298524Sfenner * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2398524Sfenner * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2498524Sfenner * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2598524Sfenner * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2698524Sfenner * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2798524Sfenner */
2898524Sfenner
2998524Sfenner#include <sys/param.h>
3098524Sfenner#include <sys/kernel.h>
3198524Sfenner#include <sys/malloc.h>
3298524Sfenner#include <sys/systm.h>
3398524Sfenner#include <sys/lock.h>
3498524Sfenner#include <sys/mutex.h>
3598524Sfenner#include <machine/bus.h>
3698524Sfenner#include <machine/atomic.h>
37127668Sbms#include <vm/vm.h>
38127668Sbms#include <vm/vm_param.h>
3998524Sfenner#include <vm/pmap.h>
4098524Sfenner
4198524Sfenner#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
4298524Sfenner#include <dev/hyperv/vmbus/vmbus_var.h>
4398524Sfenner
4498524Sfenner/*
45127668Sbms * Globals
4698524Sfenner */
4798524Sfennerhv_vmbus_connection hv_vmbus_g_connection =
4898524Sfenner	{ .connect_state = HV_DISCONNECTED,
4998524Sfenner	  .next_gpadl_handle = 0xE1E10, };
5098524Sfenner
5198524Sfenneruint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
5298524Sfenner
5398524Sfennerstatic uint32_t
5498524Sfennerhv_vmbus_get_next_version(uint32_t current_ver)
5598524Sfenner{
5698524Sfenner	switch (current_ver) {
5798524Sfenner	case (HV_VMBUS_VERSION_WIN7):
5898524Sfenner		return(HV_VMBUS_VERSION_WS2008);
5998524Sfenner
6098524Sfenner	case (HV_VMBUS_VERSION_WIN8):
6198524Sfenner		return(HV_VMBUS_VERSION_WIN7);
6298524Sfenner
6398524Sfenner	case (HV_VMBUS_VERSION_WIN8_1):
6498524Sfenner		return(HV_VMBUS_VERSION_WIN8);
65127668Sbms
6698524Sfenner	case (HV_VMBUS_VERSION_WS2008):
6798524Sfenner	default:
6898524Sfenner		return(HV_VMBUS_VERSION_INVALID);
6998524Sfenner	}
7098524Sfenner}
7198524Sfenner
72111726Sfenner/**
7398524Sfenner * Negotiate the highest supported hypervisor version.
7498524Sfenner */
75111726Sfennerstatic int
76111726Sfennerhv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
7798524Sfenner	uint32_t version)
78111726Sfenner{
79111726Sfenner	int					ret = 0;
80127668Sbms	hv_vmbus_channel_initiate_contact	*msg;
8198524Sfenner
82111726Sfenner	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
8398524Sfenner	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
8498524Sfenner
8598524Sfenner	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
86111726Sfenner	msg->vmbus_version_requested = version;
8798524Sfenner
8898524Sfenner	msg->interrupt_page = hv_get_phys_addr(
8998524Sfenner		hv_vmbus_g_connection.interrupt_page);
90111726Sfenner
9198524Sfenner	msg->monitor_page_1 = hv_get_phys_addr(
92127668Sbms		hv_vmbus_g_connection.monitor_page_1);
9398524Sfenner
94127668Sbms	msg->monitor_page_2 = hv_get_phys_addr(
95127668Sbms		hv_vmbus_g_connection.monitor_page_2);
9698524Sfenner
97127668Sbms	/**
9898524Sfenner	 * Add to list before we send the request since we may receive the
99127668Sbms	 * response before returning from this routine
10098524Sfenner	 */
10198524Sfenner	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
10298524Sfenner
103127668Sbms	TAILQ_INSERT_TAIL(
10498524Sfenner		&hv_vmbus_g_connection.channel_msg_anchor,
10598524Sfenner		msg_info,
10698524Sfenner		msg_list_entry);
107127668Sbms
108127668Sbms	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
109127668Sbms
11098524Sfenner	ret = hv_vmbus_post_message(
11198524Sfenner		msg,
112127668Sbms		sizeof(hv_vmbus_channel_initiate_contact));
113127668Sbms
114127668Sbms	if (ret != 0) {
115127668Sbms		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
116127668Sbms		TAILQ_REMOVE(
11798524Sfenner			&hv_vmbus_g_connection.channel_msg_anchor,
11898524Sfenner			msg_info,
11998524Sfenner			msg_list_entry);
120127668Sbms		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
121127668Sbms		return (ret);
122127668Sbms	}
123127668Sbms
124127668Sbms	/**
12598524Sfenner	 * Wait for the connection response
12698524Sfenner	 */
12798524Sfenner	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
12898524Sfenner
12998524Sfenner	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
130127668Sbms	TAILQ_REMOVE(
13198524Sfenner		&hv_vmbus_g_connection.channel_msg_anchor,
132127668Sbms		msg_info,
133111726Sfenner		msg_list_entry);
134111726Sfenner	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
13598524Sfenner
136111726Sfenner	/**
137111726Sfenner	 * Check if successful
13898524Sfenner	 */
139127668Sbms	if (msg_info->response.version_response.version_supported) {
140111726Sfenner		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
14198524Sfenner	} else {
14298524Sfenner		ret = ECONNREFUSED;
143111726Sfenner	}
144127668Sbms
145127668Sbms	return (ret);
146127668Sbms}
147127668Sbms
14898524Sfenner/**
14998524Sfenner * Send a connect request on the partition service connection
15098524Sfenner */
151111726Sfennerint
15298524Sfennerhv_vmbus_connect(void) {
15398524Sfenner	int					ret = 0;
15498524Sfenner	uint32_t				version;
15598524Sfenner	hv_vmbus_channel_msg_info*		msg_info = NULL;
15698524Sfenner
15798524Sfenner	/**
158111726Sfenner	 * Make sure we are not connecting or connected
159127668Sbms	 */
16098524Sfenner	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
161127668Sbms		return (-1);
162127668Sbms	}
16398524Sfenner
16498524Sfenner	/**
16598524Sfenner	 * Initialize the vmbus connection
166127668Sbms	 */
16798524Sfenner	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
16898524Sfenner
169127668Sbms	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
170127668Sbms	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
17198524Sfenner		NULL, MTX_DEF);
17298524Sfenner
17398524Sfenner	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
174127668Sbms	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
175127668Sbms		NULL, MTX_DEF);
17698524Sfenner
177127668Sbms	/**
17898524Sfenner	 * Setup the vmbus event connection for channel interrupt abstraction
17998524Sfenner	 * stuff
180127668Sbms	 */
18198524Sfenner	hv_vmbus_g_connection.interrupt_page = malloc(
18298524Sfenner					PAGE_SIZE, M_DEVBUF,
18398524Sfenner					M_WAITOK | M_ZERO);
184111726Sfenner
185127668Sbms	hv_vmbus_g_connection.recv_interrupt_page =
186127668Sbms		hv_vmbus_g_connection.interrupt_page;
187127668Sbms
188127668Sbms	hv_vmbus_g_connection.send_interrupt_page =
189127668Sbms		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
19098524Sfenner		    (PAGE_SIZE >> 1));
19198524Sfenner
19298524Sfenner	/**
19398524Sfenner	 * Set up the monitor notification facility. The 1st page for
194111726Sfenner	 * parent->child and the 2nd page for child->parent
195127668Sbms	 */
19698524Sfenner	hv_vmbus_g_connection.monitor_page_1 = malloc(
19798524Sfenner		PAGE_SIZE,
19898524Sfenner		M_DEVBUF,
199111726Sfenner		M_WAITOK | M_ZERO);
20098524Sfenner	hv_vmbus_g_connection.monitor_page_2 = malloc(
201127668Sbms		PAGE_SIZE,
202127668Sbms		M_DEVBUF,
203127668Sbms		M_WAITOK | M_ZERO);
204127668Sbms
205127668Sbms	msg_info = (hv_vmbus_channel_msg_info*)
206127668Sbms		malloc(sizeof(hv_vmbus_channel_msg_info) +
207127668Sbms			sizeof(hv_vmbus_channel_initiate_contact),
20898524Sfenner			M_DEVBUF, M_WAITOK | M_ZERO);
209127668Sbms
210127668Sbms	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
211127668Sbms		HV_CHANNEL_MAX_COUNT,
21298524Sfenner		M_DEVBUF, M_WAITOK | M_ZERO);
21398524Sfenner	/*
21498524Sfenner	 * Find the highest vmbus version number we can support.
21598524Sfenner	 */
21698524Sfenner	version = HV_VMBUS_VERSION_CURRENT;
21798524Sfenner
21898524Sfenner	do {
219111726Sfenner		ret = hv_vmbus_negotiate_version(msg_info, version);
22098524Sfenner		if (ret == EWOULDBLOCK) {
22198524Sfenner			/*
222111726Sfenner			 * We timed out.
223127668Sbms			 */
224127668Sbms			goto cleanup;
225127668Sbms		}
226127668Sbms
227127668Sbms		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
22898524Sfenner			break;
22998524Sfenner
23098524Sfenner		version = hv_vmbus_get_next_version(version);
23198524Sfenner	} while (version != HV_VMBUS_VERSION_INVALID);
23298524Sfenner
23398524Sfenner	hv_vmbus_protocal_version = version;
23498524Sfenner	if (bootverbose)
23598524Sfenner		printf("VMBUS: Protocol Version: %d.%d\n",
23698524Sfenner		    version >> 16, version & 0xFFFF);
23798524Sfenner
238111726Sfenner	sema_destroy(&msg_info->wait_sema);
239127668Sbms	free(msg_info, M_DEVBUF);
24098524Sfenner
241111726Sfenner	return (0);
242127668Sbms
243127668Sbms	/*
244127668Sbms	 * Cleanup after failure!
245127668Sbms	 */
246127668Sbms	cleanup:
247127668Sbms
24898524Sfenner	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
24998524Sfenner
25098524Sfenner	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
25198524Sfenner	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
25298524Sfenner
25398524Sfenner	if (hv_vmbus_g_connection.interrupt_page != NULL) {
25498524Sfenner		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
25598524Sfenner		hv_vmbus_g_connection.interrupt_page = NULL;
25698524Sfenner	}
257111726Sfenner
258127668Sbms	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
25998524Sfenner	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
260111726Sfenner
26198524Sfenner	if (msg_info) {
26298524Sfenner		sema_destroy(&msg_info->wait_sema);
263111726Sfenner		free(msg_info, M_DEVBUF);
264127668Sbms	}
265127668Sbms
266127668Sbms	free(hv_vmbus_g_connection.channels, M_DEVBUF);
267127668Sbms	return (ret);
268127668Sbms}
269127668Sbms
27098524Sfenner/**
271111726Sfenner * Send a disconnect request on the partition service connection
272111726Sfenner */
27398524Sfennerint
274127668Sbmshv_vmbus_disconnect(void) {
27598524Sfenner	int			 ret = 0;
276127668Sbms	hv_vmbus_channel_unload  msg;
27798524Sfenner
278127668Sbms	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
279127668Sbms
28098524Sfenner	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
281127668Sbms
28298524Sfenner	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
283127668Sbms
284127668Sbms	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
28598524Sfenner
28698524Sfenner	free(hv_vmbus_g_connection.channels, M_DEVBUF);
287127668Sbms	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
28898524Sfenner
28998524Sfenner	return (ret);
29098524Sfenner}
29198524Sfenner
29298524Sfenner/**
293111726Sfenner * Handler for events
29498524Sfenner */
295111726Sfennervoid
29698524Sfennerhv_vmbus_on_events(int cpu)
29798524Sfenner{
298127668Sbms	unsigned long *intr_flags;
29998524Sfenner	hv_vmbus_synic_event_flags *event;
30098524Sfenner	void *page_addr;
30198524Sfenner	int flag_cnt, f;
30298524Sfenner
30398524Sfenner	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
30498524Sfenner	    "cpu out of range!"));
30598524Sfenner
30698524Sfenner	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
30798524Sfenner	event = (hv_vmbus_synic_event_flags *)
30898524Sfenner	    page_addr + HV_VMBUS_MESSAGE_SINT;
30998524Sfenner	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
31098524Sfenner	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
31198524Sfenner		flag_cnt = HV_MAX_NUM_CHANNELS_SUPPORTED >>
31298524Sfenner		    HV_CHANNEL_ULONG_SHIFT;
31398524Sfenner		/*
31498524Sfenner		 * receive size is 1/2 page and divide that by 4 bytes
31598524Sfenner		 */
31698524Sfenner		if (atomic_testandclear_int(&event->flags32[0], 0))
31798524Sfenner			intr_flags = hv_vmbus_g_connection.recv_interrupt_page;
31898524Sfenner		else
31998524Sfenner			return;
32098524Sfenner	} else {
32198524Sfenner		/*
32298524Sfenner		 * On Host with Win8 or above, the event page can be
32398524Sfenner		 * checked directly to get the id of the channel
32498524Sfenner		 * that has the pending interrupt.
325127668Sbms		 */
32698524Sfenner		flag_cnt = VMBUS_PCPU_GET(event_flag_cnt, cpu);
32798524Sfenner		intr_flags = event->flagsul;
32898524Sfenner	}
32998524Sfenner
33098524Sfenner	/*
33198524Sfenner	 * Check events
33298524Sfenner	 */
33398524Sfenner	for (f = 0; f < flag_cnt; f++) {
33498524Sfenner		uint32_t rel_id_base;
33598524Sfenner		unsigned long flags;
33698524Sfenner		int bit;
33798524Sfenner
33898524Sfenner		if (intr_flags[f] == 0)
33998524Sfenner			continue;
34098524Sfenner
34198524Sfenner		flags = atomic_swap_long(&intr_flags[f], 0);
34298524Sfenner		rel_id_base = f << HV_CHANNEL_ULONG_SHIFT;
34398524Sfenner
34498524Sfenner		while ((bit = ffsl(flags)) != 0) {
34598524Sfenner			struct hv_vmbus_channel *channel;
346			uint32_t rel_id;
347
348			--bit;	/* NOTE: ffsl is 1-based */
349			flags &= ~(1UL << bit);
350
351			rel_id = rel_id_base + bit;
352			channel = hv_vmbus_g_connection.channels[rel_id];
353
354			/* if channel is closed or closing */
355			if (channel == NULL || channel->rxq == NULL)
356				continue;
357
358			if (channel->batched_reading)
359				hv_ring_buffer_read_begin(&channel->inbound);
360			taskqueue_enqueue(channel->rxq, &channel->channel_task);
361		}
362	}
363}
364
365/**
366 * Send a msg on the vmbus's message connection
367 */
368int hv_vmbus_post_message(void *buffer, size_t bufferLen)
369{
370	hv_vmbus_connection_id connId;
371	sbintime_t time = SBT_1MS;
372	int retries;
373	int ret;
374
375	connId.as_uint32_t = 0;
376	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
377
378	/*
379	 * We retry to cope with transient failures caused by host side's
380	 * insufficient resources. 20 times should suffice in practice.
381	 */
382	for (retries = 0; retries < 20; retries++) {
383		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
384						    bufferLen);
385		if (ret == HV_STATUS_SUCCESS)
386			return (0);
387
388		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
389		if (time < SBT_1S * 2)
390			time *= 2;
391	}
392
393	KASSERT(ret == HV_STATUS_SUCCESS,
394		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
395
396	return (EAGAIN);
397}
398
399/**
400 * Send an event notification to the parent
401 */
402int
403hv_vmbus_set_event(hv_vmbus_channel *channel) {
404	int ret = 0;
405	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
406
407	/* Each uint32_t represents 32 channels */
408
409	synch_set_bit(child_rel_id & 31,
410		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
411			+ (child_rel_id >> 5))));
412	ret = hv_vmbus_signal_event(channel->signal_event_param);
413
414	return (ret);
415}
416
417void
418vmbus_on_channel_open(const struct hv_vmbus_channel *chan)
419{
420	volatile int *flag_cnt_ptr;
421	int flag_cnt;
422
423	flag_cnt = (chan->offer_msg.child_rel_id / HV_CHANNEL_ULONG_LEN) + 1;
424	flag_cnt_ptr = VMBUS_PCPU_PTR(event_flag_cnt, chan->target_cpu);
425
426	for (;;) {
427		int old_flag_cnt;
428
429		old_flag_cnt = *flag_cnt_ptr;
430		if (old_flag_cnt >= flag_cnt)
431			break;
432		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
433			if (bootverbose) {
434				printf("VMBUS: channel%u update "
435				    "cpu%d flag_cnt to %d\n",
436				    chan->offer_msg.child_rel_id,
437				    chan->target_cpu, flag_cnt);
438			}
439			break;
440		}
441	}
442}
443