hv_connection.c revision 300105
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29250199Sgrehan#include <sys/param.h>
30296028Ssephe#include <sys/kernel.h>
31250199Sgrehan#include <sys/malloc.h>
32250199Sgrehan#include <sys/systm.h>
33250199Sgrehan#include <sys/lock.h>
34250199Sgrehan#include <sys/mutex.h>
35250199Sgrehan#include <machine/bus.h>
36299927Ssephe#include <machine/atomic.h>
37250199Sgrehan#include <vm/vm.h>
38250199Sgrehan#include <vm/vm_param.h>
39250199Sgrehan#include <vm/pmap.h>
40250199Sgrehan
41300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
42300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
43250199Sgrehan
44250199Sgrehan/*
45250199Sgrehan * Globals
46250199Sgrehan */
47250199Sgrehanhv_vmbus_connection hv_vmbus_g_connection =
48250199Sgrehan	{ .connect_state = HV_DISCONNECTED,
49250199Sgrehan	  .next_gpadl_handle = 0xE1E10, };
50250199Sgrehan
51282212Swhuuint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
52282212Swhu
53282212Swhustatic uint32_t
54282212Swhuhv_vmbus_get_next_version(uint32_t current_ver)
55282212Swhu{
56282212Swhu	switch (current_ver) {
57282212Swhu	case (HV_VMBUS_VERSION_WIN7):
58282212Swhu		return(HV_VMBUS_VERSION_WS2008);
59282212Swhu
60282212Swhu	case (HV_VMBUS_VERSION_WIN8):
61282212Swhu		return(HV_VMBUS_VERSION_WIN7);
62282212Swhu
63282212Swhu	case (HV_VMBUS_VERSION_WIN8_1):
64282212Swhu		return(HV_VMBUS_VERSION_WIN8);
65282212Swhu
66282212Swhu	case (HV_VMBUS_VERSION_WS2008):
67282212Swhu	default:
68282212Swhu		return(HV_VMBUS_VERSION_INVALID);
69282212Swhu	}
70282212Swhu}
71282212Swhu
72250199Sgrehan/**
73282212Swhu * Negotiate the highest supported hypervisor version.
74282212Swhu */
75282212Swhustatic int
76282212Swhuhv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
77282212Swhu	uint32_t version)
78282212Swhu{
79282212Swhu	int					ret = 0;
80282212Swhu	hv_vmbus_channel_initiate_contact	*msg;
81282212Swhu
82282212Swhu	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
83282212Swhu	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
84282212Swhu
85282212Swhu	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
86282212Swhu	msg->vmbus_version_requested = version;
87282212Swhu
88282212Swhu	msg->interrupt_page = hv_get_phys_addr(
89282212Swhu		hv_vmbus_g_connection.interrupt_page);
90282212Swhu
91282212Swhu	msg->monitor_page_1 = hv_get_phys_addr(
92295309Ssephe		hv_vmbus_g_connection.monitor_page_1);
93282212Swhu
94295308Ssephe	msg->monitor_page_2 = hv_get_phys_addr(
95295309Ssephe		hv_vmbus_g_connection.monitor_page_2);
96282212Swhu
97282212Swhu	/**
98282212Swhu	 * Add to list before we send the request since we may receive the
99282212Swhu	 * response before returning from this routine
100282212Swhu	 */
101297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
102282212Swhu
103282212Swhu	TAILQ_INSERT_TAIL(
104282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
105282212Swhu		msg_info,
106282212Swhu		msg_list_entry);
107282212Swhu
108297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
109282212Swhu
110282212Swhu	ret = hv_vmbus_post_message(
111282212Swhu		msg,
112282212Swhu		sizeof(hv_vmbus_channel_initiate_contact));
113282212Swhu
114282212Swhu	if (ret != 0) {
115297635Ssephe		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
116282212Swhu		TAILQ_REMOVE(
117282212Swhu			&hv_vmbus_g_connection.channel_msg_anchor,
118282212Swhu			msg_info,
119282212Swhu			msg_list_entry);
120297635Ssephe		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
121282212Swhu		return (ret);
122282212Swhu	}
123282212Swhu
124282212Swhu	/**
125282212Swhu	 * Wait for the connection response
126282212Swhu	 */
127296028Ssephe	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
128282212Swhu
129297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
130282212Swhu	TAILQ_REMOVE(
131282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
132282212Swhu		msg_info,
133282212Swhu		msg_list_entry);
134297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
135282212Swhu
136282212Swhu	/**
137282212Swhu	 * Check if successful
138282212Swhu	 */
139282212Swhu	if (msg_info->response.version_response.version_supported) {
140282212Swhu		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
141282212Swhu	} else {
142282212Swhu		ret = ECONNREFUSED;
143282212Swhu	}
144282212Swhu
145282212Swhu	return (ret);
146282212Swhu}
147282212Swhu
148282212Swhu/**
149250199Sgrehan * Send a connect request on the partition service connection
150250199Sgrehan */
151250199Sgrehanint
152250199Sgrehanhv_vmbus_connect(void) {
153250199Sgrehan	int					ret = 0;
154282212Swhu	uint32_t				version;
155250199Sgrehan	hv_vmbus_channel_msg_info*		msg_info = NULL;
156250199Sgrehan
157250199Sgrehan	/**
158250199Sgrehan	 * Make sure we are not connecting or connected
159250199Sgrehan	 */
160250199Sgrehan	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
161250199Sgrehan		return (-1);
162250199Sgrehan	}
163250199Sgrehan
164250199Sgrehan	/**
165250199Sgrehan	 * Initialize the vmbus connection
166250199Sgrehan	 */
167250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
168250199Sgrehan
169250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
170250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
171297635Ssephe		NULL, MTX_DEF);
172250199Sgrehan
173250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
174250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
175282212Swhu		NULL, MTX_DEF);
176250199Sgrehan
177250199Sgrehan	/**
178250199Sgrehan	 * Setup the vmbus event connection for channel interrupt abstraction
179250199Sgrehan	 * stuff
180250199Sgrehan	 */
181295309Ssephe	hv_vmbus_g_connection.interrupt_page = malloc(
182250199Sgrehan					PAGE_SIZE, M_DEVBUF,
183295309Ssephe					M_WAITOK | M_ZERO);
184250199Sgrehan
185250199Sgrehan	hv_vmbus_g_connection.recv_interrupt_page =
186250199Sgrehan		hv_vmbus_g_connection.interrupt_page;
187250199Sgrehan
188250199Sgrehan	hv_vmbus_g_connection.send_interrupt_page =
189250199Sgrehan		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
190250199Sgrehan		    (PAGE_SIZE >> 1));
191250199Sgrehan
192250199Sgrehan	/**
193250199Sgrehan	 * Set up the monitor notification facility. The 1st page for
194250199Sgrehan	 * parent->child and the 2nd page for child->parent
195250199Sgrehan	 */
196295309Ssephe	hv_vmbus_g_connection.monitor_page_1 = malloc(
197295309Ssephe		PAGE_SIZE,
198250199Sgrehan		M_DEVBUF,
199295309Ssephe		M_WAITOK | M_ZERO);
200295309Ssephe	hv_vmbus_g_connection.monitor_page_2 = malloc(
201250199Sgrehan		PAGE_SIZE,
202295309Ssephe		M_DEVBUF,
203295309Ssephe		M_WAITOK | M_ZERO);
204250199Sgrehan
205250199Sgrehan	msg_info = (hv_vmbus_channel_msg_info*)
206250199Sgrehan		malloc(sizeof(hv_vmbus_channel_msg_info) +
207250199Sgrehan			sizeof(hv_vmbus_channel_initiate_contact),
208295308Ssephe			M_DEVBUF, M_WAITOK | M_ZERO);
209250199Sgrehan
210294553Ssephe	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
211294553Ssephe		HV_CHANNEL_MAX_COUNT,
212294553Ssephe		M_DEVBUF, M_WAITOK | M_ZERO);
213282212Swhu	/*
214282212Swhu	 * Find the highest vmbus version number we can support.
215250199Sgrehan	 */
216282212Swhu	version = HV_VMBUS_VERSION_CURRENT;
217250199Sgrehan
218282212Swhu	do {
219282212Swhu		ret = hv_vmbus_negotiate_version(msg_info, version);
220282212Swhu		if (ret == EWOULDBLOCK) {
221282212Swhu			/*
222282212Swhu			 * We timed out.
223282212Swhu			 */
224282212Swhu			goto cleanup;
225282212Swhu		}
226250199Sgrehan
227282212Swhu		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
228282212Swhu			break;
229250199Sgrehan
230282212Swhu		version = hv_vmbus_get_next_version(version);
231282212Swhu	} while (version != HV_VMBUS_VERSION_INVALID);
232250199Sgrehan
233282212Swhu	hv_vmbus_protocal_version = version;
234282212Swhu	if (bootverbose)
235293870Ssephe		printf("VMBUS: Protocol Version: %d.%d\n",
236282212Swhu		    version >> 16, version & 0xFFFF);
237250199Sgrehan
238250199Sgrehan	sema_destroy(&msg_info->wait_sema);
239250199Sgrehan	free(msg_info, M_DEVBUF);
240250199Sgrehan
241250199Sgrehan	return (0);
242250199Sgrehan
243250199Sgrehan	/*
244250199Sgrehan	 * Cleanup after failure!
245250199Sgrehan	 */
246250199Sgrehan	cleanup:
247250199Sgrehan
248250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
249250199Sgrehan
250250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
251250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
252250199Sgrehan
253250199Sgrehan	if (hv_vmbus_g_connection.interrupt_page != NULL) {
254295964Ssephe		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
255250199Sgrehan		hv_vmbus_g_connection.interrupt_page = NULL;
256250199Sgrehan	}
257250199Sgrehan
258295309Ssephe	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
259295309Ssephe	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
260250199Sgrehan
261250199Sgrehan	if (msg_info) {
262250199Sgrehan		sema_destroy(&msg_info->wait_sema);
263250199Sgrehan		free(msg_info, M_DEVBUF);
264250199Sgrehan	}
265250199Sgrehan
266294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
267250199Sgrehan	return (ret);
268250199Sgrehan}
269250199Sgrehan
270250199Sgrehan/**
271250199Sgrehan * Send a disconnect request on the partition service connection
272250199Sgrehan */
273250199Sgrehanint
274250199Sgrehanhv_vmbus_disconnect(void) {
275250199Sgrehan	int			 ret = 0;
276295308Ssephe	hv_vmbus_channel_unload  msg;
277250199Sgrehan
278295308Ssephe	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
279250199Sgrehan
280295308Ssephe	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
281250199Sgrehan
282295964Ssephe	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
283250199Sgrehan
284250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
285250199Sgrehan
286294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
287250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
288250199Sgrehan
289250199Sgrehan	return (ret);
290250199Sgrehan}
291250199Sgrehan
292250199Sgrehan/**
293250199Sgrehan * Handler for events
294250199Sgrehan */
295250199Sgrehanvoid
296294886Ssephehv_vmbus_on_events(int cpu)
297250199Sgrehan{
298300102Ssephe	unsigned long *intr_flags;
299300101Ssephe	hv_vmbus_synic_event_flags *event;
300282212Swhu	void *page_addr;
301300102Ssephe	int flag_cnt, f;
302250199Sgrehan
303282212Swhu	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
304282212Swhu	    "cpu out of range!"));
305250199Sgrehan
306297908Ssephe	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
307297908Ssephe	event = (hv_vmbus_synic_event_flags *)
308297908Ssephe	    page_addr + HV_VMBUS_MESSAGE_SINT;
309282212Swhu	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
310282212Swhu	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
311300102Ssephe		flag_cnt = HV_MAX_NUM_CHANNELS_SUPPORTED >>
312300101Ssephe		    HV_CHANNEL_ULONG_SHIFT;
313282212Swhu		/*
314282212Swhu		 * receive size is 1/2 page and divide that by 4 bytes
315282212Swhu		 */
316300101Ssephe		if (atomic_testandclear_int(&event->flags32[0], 0))
317300102Ssephe			intr_flags = hv_vmbus_g_connection.recv_interrupt_page;
318300101Ssephe		else
319299889Ssephe			return;
320282212Swhu	} else {
321282212Swhu		/*
322282212Swhu		 * On Host with Win8 or above, the event page can be
323282212Swhu		 * checked directly to get the id of the channel
324282212Swhu		 * that has the pending interrupt.
325282212Swhu		 */
326300102Ssephe		flag_cnt = VMBUS_PCPU_GET(event_flag_cnt, cpu);
327300102Ssephe		intr_flags = event->flagsul;
328282212Swhu	}
329282212Swhu
330250199Sgrehan	/*
331250199Sgrehan	 * Check events
332250199Sgrehan	 */
333300102Ssephe	for (f = 0; f < flag_cnt; f++) {
334300101Ssephe		uint32_t rel_id_base;
335300105Ssephe		unsigned long flags;
336300101Ssephe		int bit;
337300101Ssephe
338300102Ssephe		if (intr_flags[f] == 0)
339299892Ssephe			continue;
340299892Ssephe
341300105Ssephe		flags = atomic_swap_long(&intr_flags[f], 0);
342300102Ssephe		rel_id_base = f << HV_CHANNEL_ULONG_SHIFT;
343299890Ssephe
344300105Ssephe		while ((bit = ffsl(flags)) != 0) {
345300105Ssephe			struct hv_vmbus_channel *channel;
346300105Ssephe			uint32_t rel_id;
347299890Ssephe
348300105Ssephe			--bit;	/* NOTE: ffsl is 1-based */
349300105Ssephe			flags &= ~(1UL << bit);
350250199Sgrehan
351300105Ssephe			rel_id = rel_id_base + bit;
352300105Ssephe			channel = hv_vmbus_g_connection.channels[rel_id];
353300105Ssephe
354300105Ssephe			/* if channel is closed or closing */
355300105Ssephe			if (channel == NULL || channel->rxq == NULL)
356300105Ssephe				continue;
357300105Ssephe
358300105Ssephe			if (channel->batched_reading)
359300105Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
360300105Ssephe			taskqueue_enqueue(channel->rxq, &channel->channel_task);
361300101Ssephe		}
362250199Sgrehan	}
363250199Sgrehan}
364250199Sgrehan
365250199Sgrehan/**
366250199Sgrehan * Send a msg on the vmbus's message connection
367250199Sgrehan */
368297219Ssepheint hv_vmbus_post_message(void *buffer, size_t bufferLen)
369297219Ssephe{
370250199Sgrehan	hv_vmbus_connection_id connId;
371297219Ssephe	sbintime_t time = SBT_1MS;
372297219Ssephe	int retries;
373297219Ssephe	int ret;
374250199Sgrehan
375297219Ssephe	connId.as_uint32_t = 0;
376297219Ssephe	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
377250199Sgrehan
378297219Ssephe	/*
379297219Ssephe	 * We retry to cope with transient failures caused by host side's
380297219Ssephe	 * insufficient resources. 20 times should suffice in practice.
381250199Sgrehan	 */
382297219Ssephe	for (retries = 0; retries < 20; retries++) {
383297219Ssephe		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
384297219Ssephe						    bufferLen);
385297219Ssephe		if (ret == HV_STATUS_SUCCESS)
386297219Ssephe			return (0);
387297219Ssephe
388297219Ssephe		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
389297219Ssephe		if (time < SBT_1S * 2)
390297219Ssephe			time *= 2;
391250199Sgrehan	}
392250199Sgrehan
393297219Ssephe	KASSERT(ret == HV_STATUS_SUCCESS,
394297219Ssephe		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
395250199Sgrehan
396297219Ssephe	return (EAGAIN);
397250199Sgrehan}
398250199Sgrehan
399250199Sgrehan/**
400250199Sgrehan * Send an event notification to the parent
401250199Sgrehan */
402250199Sgrehanint
403282212Swhuhv_vmbus_set_event(hv_vmbus_channel *channel) {
404250199Sgrehan	int ret = 0;
405282212Swhu	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
406250199Sgrehan
407250199Sgrehan	/* Each uint32_t represents 32 channels */
408250199Sgrehan
409250199Sgrehan	synch_set_bit(child_rel_id & 31,
410250199Sgrehan		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
411250199Sgrehan			+ (child_rel_id >> 5))));
412282212Swhu	ret = hv_vmbus_signal_event(channel->signal_event_param);
413250199Sgrehan
414250199Sgrehan	return (ret);
415250199Sgrehan}
416300102Ssephe
417300102Ssephevoid
418300102Ssephevmbus_on_channel_open(const struct hv_vmbus_channel *chan)
419300102Ssephe{
420300102Ssephe	volatile int *flag_cnt_ptr;
421300102Ssephe	int flag_cnt;
422300102Ssephe
423300102Ssephe	flag_cnt = (chan->offer_msg.child_rel_id / HV_CHANNEL_ULONG_LEN) + 1;
424300102Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(event_flag_cnt, chan->target_cpu);
425300102Ssephe
426300102Ssephe	for (;;) {
427300102Ssephe		int old_flag_cnt;
428300102Ssephe
429300102Ssephe		old_flag_cnt = *flag_cnt_ptr;
430300102Ssephe		if (old_flag_cnt >= flag_cnt)
431300102Ssephe			break;
432300102Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
433300102Ssephe			if (bootverbose) {
434300102Ssephe				printf("VMBUS: channel%u update "
435300102Ssephe				    "cpu%d flag_cnt to %d\n",
436300102Ssephe				    chan->offer_msg.child_rel_id,
437300102Ssephe				    chan->target_cpu, flag_cnt);
438300102Ssephe			}
439300102Ssephe			break;
440300102Ssephe		}
441300102Ssephe	}
442300102Ssephe}
443