hv_connection.c revision 300573
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29250199Sgrehan#include <sys/param.h>
30296028Ssephe#include <sys/kernel.h>
31250199Sgrehan#include <sys/malloc.h>
32250199Sgrehan#include <sys/systm.h>
33250199Sgrehan#include <sys/lock.h>
34250199Sgrehan#include <sys/mutex.h>
35250199Sgrehan#include <machine/bus.h>
36299927Ssephe#include <machine/atomic.h>
37250199Sgrehan#include <vm/vm.h>
38250199Sgrehan#include <vm/vm_param.h>
39250199Sgrehan#include <vm/pmap.h>
40250199Sgrehan
41300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
42300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
43250199Sgrehan
44250199Sgrehan/*
45250199Sgrehan * Globals
46250199Sgrehan */
47250199Sgrehanhv_vmbus_connection hv_vmbus_g_connection =
48250199Sgrehan	{ .connect_state = HV_DISCONNECTED,
49250199Sgrehan	  .next_gpadl_handle = 0xE1E10, };
50250199Sgrehan
51282212Swhuuint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
52282212Swhu
53282212Swhustatic uint32_t
54282212Swhuhv_vmbus_get_next_version(uint32_t current_ver)
55282212Swhu{
56282212Swhu	switch (current_ver) {
57282212Swhu	case (HV_VMBUS_VERSION_WIN7):
58282212Swhu		return(HV_VMBUS_VERSION_WS2008);
59282212Swhu
60282212Swhu	case (HV_VMBUS_VERSION_WIN8):
61282212Swhu		return(HV_VMBUS_VERSION_WIN7);
62282212Swhu
63282212Swhu	case (HV_VMBUS_VERSION_WIN8_1):
64282212Swhu		return(HV_VMBUS_VERSION_WIN8);
65282212Swhu
66282212Swhu	case (HV_VMBUS_VERSION_WS2008):
67282212Swhu	default:
68282212Swhu		return(HV_VMBUS_VERSION_INVALID);
69282212Swhu	}
70282212Swhu}
71282212Swhu
72250199Sgrehan/**
73282212Swhu * Negotiate the highest supported hypervisor version.
74282212Swhu */
75282212Swhustatic int
76282212Swhuhv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
77282212Swhu	uint32_t version)
78282212Swhu{
79282212Swhu	int					ret = 0;
80282212Swhu	hv_vmbus_channel_initiate_contact	*msg;
81282212Swhu
82282212Swhu	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
83282212Swhu	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
84282212Swhu
85282212Swhu	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
86282212Swhu	msg->vmbus_version_requested = version;
87282212Swhu
88282212Swhu	msg->interrupt_page = hv_get_phys_addr(
89282212Swhu		hv_vmbus_g_connection.interrupt_page);
90282212Swhu
91282212Swhu	msg->monitor_page_1 = hv_get_phys_addr(
92295309Ssephe		hv_vmbus_g_connection.monitor_page_1);
93282212Swhu
94295308Ssephe	msg->monitor_page_2 = hv_get_phys_addr(
95295309Ssephe		hv_vmbus_g_connection.monitor_page_2);
96282212Swhu
97282212Swhu	/**
98282212Swhu	 * Add to list before we send the request since we may receive the
99282212Swhu	 * response before returning from this routine
100282212Swhu	 */
101297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
102282212Swhu
103282212Swhu	TAILQ_INSERT_TAIL(
104282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
105282212Swhu		msg_info,
106282212Swhu		msg_list_entry);
107282212Swhu
108297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
109282212Swhu
110282212Swhu	ret = hv_vmbus_post_message(
111282212Swhu		msg,
112282212Swhu		sizeof(hv_vmbus_channel_initiate_contact));
113282212Swhu
114282212Swhu	if (ret != 0) {
115297635Ssephe		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
116282212Swhu		TAILQ_REMOVE(
117282212Swhu			&hv_vmbus_g_connection.channel_msg_anchor,
118282212Swhu			msg_info,
119282212Swhu			msg_list_entry);
120297635Ssephe		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
121282212Swhu		return (ret);
122282212Swhu	}
123282212Swhu
124282212Swhu	/**
125282212Swhu	 * Wait for the connection response
126282212Swhu	 */
127296028Ssephe	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
128282212Swhu
129297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
130282212Swhu	TAILQ_REMOVE(
131282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
132282212Swhu		msg_info,
133282212Swhu		msg_list_entry);
134297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
135282212Swhu
136282212Swhu	/**
137282212Swhu	 * Check if successful
138282212Swhu	 */
139282212Swhu	if (msg_info->response.version_response.version_supported) {
140282212Swhu		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
141282212Swhu	} else {
142282212Swhu		ret = ECONNREFUSED;
143282212Swhu	}
144282212Swhu
145282212Swhu	return (ret);
146282212Swhu}
147282212Swhu
148282212Swhu/**
149250199Sgrehan * Send a connect request on the partition service connection
150250199Sgrehan */
151250199Sgrehanint
152300127Ssephehv_vmbus_connect(void)
153300127Ssephe{
154250199Sgrehan	int					ret = 0;
155282212Swhu	uint32_t				version;
156250199Sgrehan	hv_vmbus_channel_msg_info*		msg_info = NULL;
157250199Sgrehan
158250199Sgrehan	/**
159250199Sgrehan	 * Make sure we are not connecting or connected
160250199Sgrehan	 */
161250199Sgrehan	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
162250199Sgrehan		return (-1);
163250199Sgrehan	}
164250199Sgrehan
165250199Sgrehan	/**
166250199Sgrehan	 * Initialize the vmbus connection
167250199Sgrehan	 */
168250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
169250199Sgrehan
170250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
171250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
172297635Ssephe		NULL, MTX_DEF);
173250199Sgrehan
174250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
175250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
176282212Swhu		NULL, MTX_DEF);
177250199Sgrehan
178250199Sgrehan	/**
179250199Sgrehan	 * Setup the vmbus event connection for channel interrupt abstraction
180250199Sgrehan	 * stuff
181250199Sgrehan	 */
182295309Ssephe	hv_vmbus_g_connection.interrupt_page = malloc(
183250199Sgrehan					PAGE_SIZE, M_DEVBUF,
184295309Ssephe					M_WAITOK | M_ZERO);
185250199Sgrehan
186250199Sgrehan	hv_vmbus_g_connection.recv_interrupt_page =
187250199Sgrehan		hv_vmbus_g_connection.interrupt_page;
188250199Sgrehan
189250199Sgrehan	hv_vmbus_g_connection.send_interrupt_page =
190250199Sgrehan		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
191250199Sgrehan		    (PAGE_SIZE >> 1));
192250199Sgrehan
193250199Sgrehan	/**
194250199Sgrehan	 * Set up the monitor notification facility. The 1st page for
195250199Sgrehan	 * parent->child and the 2nd page for child->parent
196250199Sgrehan	 */
197295309Ssephe	hv_vmbus_g_connection.monitor_page_1 = malloc(
198295309Ssephe		PAGE_SIZE,
199250199Sgrehan		M_DEVBUF,
200295309Ssephe		M_WAITOK | M_ZERO);
201295309Ssephe	hv_vmbus_g_connection.monitor_page_2 = malloc(
202250199Sgrehan		PAGE_SIZE,
203295309Ssephe		M_DEVBUF,
204295309Ssephe		M_WAITOK | M_ZERO);
205250199Sgrehan
206250199Sgrehan	msg_info = (hv_vmbus_channel_msg_info*)
207250199Sgrehan		malloc(sizeof(hv_vmbus_channel_msg_info) +
208250199Sgrehan			sizeof(hv_vmbus_channel_initiate_contact),
209295308Ssephe			M_DEVBUF, M_WAITOK | M_ZERO);
210250199Sgrehan
211294553Ssephe	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
212294553Ssephe		HV_CHANNEL_MAX_COUNT,
213294553Ssephe		M_DEVBUF, M_WAITOK | M_ZERO);
214282212Swhu	/*
215282212Swhu	 * Find the highest vmbus version number we can support.
216250199Sgrehan	 */
217282212Swhu	version = HV_VMBUS_VERSION_CURRENT;
218250199Sgrehan
219282212Swhu	do {
220282212Swhu		ret = hv_vmbus_negotiate_version(msg_info, version);
221282212Swhu		if (ret == EWOULDBLOCK) {
222282212Swhu			/*
223282212Swhu			 * We timed out.
224282212Swhu			 */
225282212Swhu			goto cleanup;
226282212Swhu		}
227250199Sgrehan
228282212Swhu		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
229282212Swhu			break;
230250199Sgrehan
231282212Swhu		version = hv_vmbus_get_next_version(version);
232282212Swhu	} while (version != HV_VMBUS_VERSION_INVALID);
233250199Sgrehan
234282212Swhu	hv_vmbus_protocal_version = version;
235282212Swhu	if (bootverbose)
236293870Ssephe		printf("VMBUS: Protocol Version: %d.%d\n",
237282212Swhu		    version >> 16, version & 0xFFFF);
238250199Sgrehan
239250199Sgrehan	sema_destroy(&msg_info->wait_sema);
240250199Sgrehan	free(msg_info, M_DEVBUF);
241250199Sgrehan
242250199Sgrehan	return (0);
243250199Sgrehan
244250199Sgrehan	/*
245250199Sgrehan	 * Cleanup after failure!
246250199Sgrehan	 */
247250199Sgrehan	cleanup:
248250199Sgrehan
249250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
250250199Sgrehan
251250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
252250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
253250199Sgrehan
254250199Sgrehan	if (hv_vmbus_g_connection.interrupt_page != NULL) {
255295964Ssephe		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
256250199Sgrehan		hv_vmbus_g_connection.interrupt_page = NULL;
257250199Sgrehan	}
258250199Sgrehan
259295309Ssephe	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
260295309Ssephe	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
261250199Sgrehan
262250199Sgrehan	if (msg_info) {
263250199Sgrehan		sema_destroy(&msg_info->wait_sema);
264250199Sgrehan		free(msg_info, M_DEVBUF);
265250199Sgrehan	}
266250199Sgrehan
267294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
268250199Sgrehan	return (ret);
269250199Sgrehan}
270250199Sgrehan
271250199Sgrehan/**
272250199Sgrehan * Send a disconnect request on the partition service connection
273250199Sgrehan */
274250199Sgrehanint
275300127Ssephehv_vmbus_disconnect(void)
276300127Ssephe{
277250199Sgrehan	int			 ret = 0;
278295308Ssephe	hv_vmbus_channel_unload  msg;
279250199Sgrehan
280295308Ssephe	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
281250199Sgrehan
282295308Ssephe	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
283250199Sgrehan
284295964Ssephe	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
285250199Sgrehan
286250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
287250199Sgrehan
288294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
289250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
290250199Sgrehan
291250199Sgrehan	return (ret);
292250199Sgrehan}
293250199Sgrehan
294300107Ssephestatic __inline void
295300107Ssephevmbus_event_flags_proc(unsigned long *event_flags, int flag_cnt)
296250199Sgrehan{
297300107Ssephe	int f;
298250199Sgrehan
299300107Ssephe	for (f = 0; f < flag_cnt; ++f) {
300300101Ssephe		uint32_t rel_id_base;
301300105Ssephe		unsigned long flags;
302300101Ssephe		int bit;
303300101Ssephe
304300107Ssephe		if (event_flags[f] == 0)
305299892Ssephe			continue;
306299892Ssephe
307300107Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
308300102Ssephe		rel_id_base = f << HV_CHANNEL_ULONG_SHIFT;
309299890Ssephe
310300105Ssephe		while ((bit = ffsl(flags)) != 0) {
311300105Ssephe			struct hv_vmbus_channel *channel;
312300105Ssephe			uint32_t rel_id;
313299890Ssephe
314300105Ssephe			--bit;	/* NOTE: ffsl is 1-based */
315300105Ssephe			flags &= ~(1UL << bit);
316250199Sgrehan
317300105Ssephe			rel_id = rel_id_base + bit;
318300105Ssephe			channel = hv_vmbus_g_connection.channels[rel_id];
319300105Ssephe
320300105Ssephe			/* if channel is closed or closing */
321300105Ssephe			if (channel == NULL || channel->rxq == NULL)
322300105Ssephe				continue;
323300105Ssephe
324300105Ssephe			if (channel->batched_reading)
325300105Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
326300105Ssephe			taskqueue_enqueue(channel->rxq, &channel->channel_task);
327300101Ssephe		}
328250199Sgrehan	}
329250199Sgrehan}
330250199Sgrehan
331300107Ssephevoid
332300107Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
333300107Ssephe{
334300107Ssephe	hv_vmbus_synic_event_flags *event;
335300107Ssephe
336300107Ssephe	/*
337300107Ssephe	 * On Host with Win8 or above, the event page can be checked directly
338300107Ssephe	 * to get the id of the channel that has the pending interrupt.
339300107Ssephe	 */
340300573Ssephe	event = VMBUS_PCPU_GET(sc, event_flag, cpu) + HV_VMBUS_MESSAGE_SINT;
341300107Ssephe	vmbus_event_flags_proc(event->flagsul,
342300573Ssephe	    VMBUS_PCPU_GET(sc, event_flag_cnt, cpu));
343300107Ssephe}
344300107Ssephe
345300107Ssephevoid
346300107Ssephevmbus_event_proc_compat(struct vmbus_softc *sc __unused, int cpu)
347300107Ssephe{
348300107Ssephe	hv_vmbus_synic_event_flags *event;
349300107Ssephe
350300573Ssephe	event = VMBUS_PCPU_GET(sc, event_flag, cpu) + HV_VMBUS_MESSAGE_SINT;
351300107Ssephe	if (atomic_testandclear_int(&event->flags32[0], 0)) {
352300107Ssephe		vmbus_event_flags_proc(
353300107Ssephe		    hv_vmbus_g_connection.recv_interrupt_page,
354300107Ssephe		    HV_MAX_NUM_CHANNELS_SUPPORTED >> HV_CHANNEL_ULONG_SHIFT);
355300107Ssephe	}
356300107Ssephe}
357300107Ssephe
358250199Sgrehan/**
359250199Sgrehan * Send a msg on the vmbus's message connection
360250199Sgrehan */
361297219Ssepheint hv_vmbus_post_message(void *buffer, size_t bufferLen)
362297219Ssephe{
363250199Sgrehan	hv_vmbus_connection_id connId;
364297219Ssephe	sbintime_t time = SBT_1MS;
365297219Ssephe	int retries;
366297219Ssephe	int ret;
367250199Sgrehan
368297219Ssephe	connId.as_uint32_t = 0;
369297219Ssephe	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
370250199Sgrehan
371297219Ssephe	/*
372297219Ssephe	 * We retry to cope with transient failures caused by host side's
373297219Ssephe	 * insufficient resources. 20 times should suffice in practice.
374250199Sgrehan	 */
375297219Ssephe	for (retries = 0; retries < 20; retries++) {
376297219Ssephe		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
377297219Ssephe						    bufferLen);
378297219Ssephe		if (ret == HV_STATUS_SUCCESS)
379297219Ssephe			return (0);
380297219Ssephe
381297219Ssephe		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
382297219Ssephe		if (time < SBT_1S * 2)
383297219Ssephe			time *= 2;
384250199Sgrehan	}
385250199Sgrehan
386297219Ssephe	KASSERT(ret == HV_STATUS_SUCCESS,
387297219Ssephe		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
388250199Sgrehan
389297219Ssephe	return (EAGAIN);
390250199Sgrehan}
391250199Sgrehan
392250199Sgrehan/**
393250199Sgrehan * Send an event notification to the parent
394250199Sgrehan */
395250199Sgrehanint
396300127Ssephehv_vmbus_set_event(hv_vmbus_channel *channel)
397300127Ssephe{
398250199Sgrehan	int ret = 0;
399282212Swhu	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
400250199Sgrehan
401250199Sgrehan	/* Each uint32_t represents 32 channels */
402250199Sgrehan
403250199Sgrehan	synch_set_bit(child_rel_id & 31,
404250199Sgrehan		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
405250199Sgrehan			+ (child_rel_id >> 5))));
406282212Swhu	ret = hv_vmbus_signal_event(channel->signal_event_param);
407250199Sgrehan
408250199Sgrehan	return (ret);
409250199Sgrehan}
410300102Ssephe
411300102Ssephevoid
412300102Ssephevmbus_on_channel_open(const struct hv_vmbus_channel *chan)
413300102Ssephe{
414300102Ssephe	volatile int *flag_cnt_ptr;
415300102Ssephe	int flag_cnt;
416300102Ssephe
417300102Ssephe	flag_cnt = (chan->offer_msg.child_rel_id / HV_CHANNEL_ULONG_LEN) + 1;
418300573Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(vmbus_get_softc(), event_flag_cnt,
419300573Ssephe	    chan->target_cpu);
420300102Ssephe
421300102Ssephe	for (;;) {
422300102Ssephe		int old_flag_cnt;
423300102Ssephe
424300102Ssephe		old_flag_cnt = *flag_cnt_ptr;
425300102Ssephe		if (old_flag_cnt >= flag_cnt)
426300102Ssephe			break;
427300102Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
428300102Ssephe			if (bootverbose) {
429300102Ssephe				printf("VMBUS: channel%u update "
430300102Ssephe				    "cpu%d flag_cnt to %d\n",
431300102Ssephe				    chan->offer_msg.child_rel_id,
432300102Ssephe				    chan->target_cpu, flag_cnt);
433300102Ssephe			}
434300102Ssephe			break;
435300102Ssephe		}
436300102Ssephe	}
437300102Ssephe}
438