hv_connection.c revision 301106
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/malloc.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <machine/bus.h>
36#include <machine/atomic.h>
37#include <vm/vm.h>
38#include <vm/vm_param.h>
39#include <vm/pmap.h>
40
41#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
42#include <dev/hyperv/vmbus/vmbus_reg.h>
43#include <dev/hyperv/vmbus/vmbus_var.h>
44
45/*
46 * Globals
47 */
48hv_vmbus_connection hv_vmbus_g_connection =
49	{ .connect_state = HV_DISCONNECTED,
50	  .next_gpadl_handle = 0xE1E10, };
51
52uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
53
54static uint32_t
55hv_vmbus_get_next_version(uint32_t current_ver)
56{
57	switch (current_ver) {
58	case (HV_VMBUS_VERSION_WIN7):
59		return(HV_VMBUS_VERSION_WS2008);
60
61	case (HV_VMBUS_VERSION_WIN8):
62		return(HV_VMBUS_VERSION_WIN7);
63
64	case (HV_VMBUS_VERSION_WIN8_1):
65		return(HV_VMBUS_VERSION_WIN8);
66
67	case (HV_VMBUS_VERSION_WS2008):
68	default:
69		return(HV_VMBUS_VERSION_INVALID);
70	}
71}
72
73/**
74 * Negotiate the highest supported hypervisor version.
75 */
76static int
77hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
78	uint32_t version)
79{
80	int					ret = 0;
81	hv_vmbus_channel_initiate_contact	*msg;
82
83	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
84	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
85
86	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
87	msg->vmbus_version_requested = version;
88
89	msg->interrupt_page = hv_get_phys_addr(
90		hv_vmbus_g_connection.interrupt_page);
91
92	msg->monitor_page_1 = hv_get_phys_addr(
93		hv_vmbus_g_connection.monitor_page_1);
94
95	msg->monitor_page_2 = hv_get_phys_addr(
96		hv_vmbus_g_connection.monitor_page_2);
97
98	/**
99	 * Add to list before we send the request since we may receive the
100	 * response before returning from this routine
101	 */
102	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
103
104	TAILQ_INSERT_TAIL(
105		&hv_vmbus_g_connection.channel_msg_anchor,
106		msg_info,
107		msg_list_entry);
108
109	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
110
111	ret = hv_vmbus_post_message(
112		msg,
113		sizeof(hv_vmbus_channel_initiate_contact));
114
115	if (ret != 0) {
116		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
117		TAILQ_REMOVE(
118			&hv_vmbus_g_connection.channel_msg_anchor,
119			msg_info,
120			msg_list_entry);
121		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
122		return (ret);
123	}
124
125	/**
126	 * Wait for the connection response
127	 */
128	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
129
130	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
131	TAILQ_REMOVE(
132		&hv_vmbus_g_connection.channel_msg_anchor,
133		msg_info,
134		msg_list_entry);
135	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
136
137	/**
138	 * Check if successful
139	 */
140	if (msg_info->response.version_response.version_supported) {
141		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
142	} else {
143		ret = ECONNREFUSED;
144	}
145
146	return (ret);
147}
148
149/**
150 * Send a connect request on the partition service connection
151 */
152int
153hv_vmbus_connect(void)
154{
155	int					ret = 0;
156	uint32_t				version;
157	hv_vmbus_channel_msg_info*		msg_info = NULL;
158
159	/**
160	 * Make sure we are not connecting or connected
161	 */
162	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
163		return (-1);
164	}
165
166	/**
167	 * Initialize the vmbus connection
168	 */
169	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
170
171	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
172	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
173		NULL, MTX_DEF);
174
175	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
176	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
177		NULL, MTX_DEF);
178
179	/**
180	 * Setup the vmbus event connection for channel interrupt abstraction
181	 * stuff
182	 */
183	hv_vmbus_g_connection.interrupt_page = malloc(
184					PAGE_SIZE, M_DEVBUF,
185					M_WAITOK | M_ZERO);
186
187	hv_vmbus_g_connection.recv_interrupt_page =
188		hv_vmbus_g_connection.interrupt_page;
189
190	hv_vmbus_g_connection.send_interrupt_page =
191		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
192		    (PAGE_SIZE >> 1));
193
194	/**
195	 * Set up the monitor notification facility. The 1st page for
196	 * parent->child and the 2nd page for child->parent
197	 */
198	hv_vmbus_g_connection.monitor_page_1 = malloc(
199		PAGE_SIZE,
200		M_DEVBUF,
201		M_WAITOK | M_ZERO);
202	hv_vmbus_g_connection.monitor_page_2 = malloc(
203		PAGE_SIZE,
204		M_DEVBUF,
205		M_WAITOK | M_ZERO);
206
207	msg_info = (hv_vmbus_channel_msg_info*)
208		malloc(sizeof(hv_vmbus_channel_msg_info) +
209			sizeof(hv_vmbus_channel_initiate_contact),
210			M_DEVBUF, M_WAITOK | M_ZERO);
211
212	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
213	    VMBUS_CHAN_MAX, M_DEVBUF, M_WAITOK | M_ZERO);
214	/*
215	 * Find the highest vmbus version number we can support.
216	 */
217	version = HV_VMBUS_VERSION_CURRENT;
218
219	do {
220		ret = hv_vmbus_negotiate_version(msg_info, version);
221		if (ret == EWOULDBLOCK) {
222			/*
223			 * We timed out.
224			 */
225			goto cleanup;
226		}
227
228		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
229			break;
230
231		version = hv_vmbus_get_next_version(version);
232	} while (version != HV_VMBUS_VERSION_INVALID);
233
234	hv_vmbus_protocal_version = version;
235	if (bootverbose)
236		printf("VMBUS: Protocol Version: %d.%d\n",
237		    version >> 16, version & 0xFFFF);
238
239	sema_destroy(&msg_info->wait_sema);
240	free(msg_info, M_DEVBUF);
241
242	return (0);
243
244	/*
245	 * Cleanup after failure!
246	 */
247	cleanup:
248
249	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
250
251	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
252	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
253
254	if (hv_vmbus_g_connection.interrupt_page != NULL) {
255		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
256		hv_vmbus_g_connection.interrupt_page = NULL;
257	}
258
259	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
260	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
261
262	if (msg_info) {
263		sema_destroy(&msg_info->wait_sema);
264		free(msg_info, M_DEVBUF);
265	}
266
267	free(hv_vmbus_g_connection.channels, M_DEVBUF);
268	return (ret);
269}
270
271/**
272 * Send a disconnect request on the partition service connection
273 */
274int
275hv_vmbus_disconnect(void)
276{
277	int			 ret = 0;
278	hv_vmbus_channel_unload  msg;
279
280	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
281
282	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
283
284	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
285
286	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
287
288	free(hv_vmbus_g_connection.channels, M_DEVBUF);
289	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
290
291	return (ret);
292}
293
294static __inline void
295vmbus_event_flags_proc(volatile u_long *event_flags, int flag_cnt)
296{
297	int f;
298
299	for (f = 0; f < flag_cnt; ++f) {
300		uint32_t rel_id_base;
301		u_long flags;
302		int bit;
303
304		if (event_flags[f] == 0)
305			continue;
306
307		flags = atomic_swap_long(&event_flags[f], 0);
308		rel_id_base = f << VMBUS_EVTFLAG_SHIFT;
309
310		while ((bit = ffsl(flags)) != 0) {
311			struct hv_vmbus_channel *channel;
312			uint32_t rel_id;
313
314			--bit;	/* NOTE: ffsl is 1-based */
315			flags &= ~(1UL << bit);
316
317			rel_id = rel_id_base + bit;
318			channel = hv_vmbus_g_connection.channels[rel_id];
319
320			/* if channel is closed or closing */
321			if (channel == NULL || channel->rxq == NULL)
322				continue;
323
324			if (channel->batched_reading)
325				hv_ring_buffer_read_begin(&channel->inbound);
326			taskqueue_enqueue(channel->rxq, &channel->channel_task);
327		}
328	}
329}
330
331void
332vmbus_event_proc(struct vmbus_softc *sc, int cpu)
333{
334	struct vmbus_evtflags *eventf;
335
336	/*
337	 * On Host with Win8 or above, the event page can be checked directly
338	 * to get the id of the channel that has the pending interrupt.
339	 */
340	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
341	vmbus_event_flags_proc(eventf->evt_flags,
342	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
343}
344
345void
346vmbus_event_proc_compat(struct vmbus_softc *sc __unused, int cpu)
347{
348	struct vmbus_evtflags *eventf;
349
350	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
351	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
352		vmbus_event_flags_proc(
353		    hv_vmbus_g_connection.recv_interrupt_page,
354		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
355	}
356}
357
358/**
359 * Send a msg on the vmbus's message connection
360 */
361int hv_vmbus_post_message(void *buffer, size_t bufferLen)
362{
363	hv_vmbus_connection_id connId;
364	sbintime_t time = SBT_1MS;
365	int retries;
366	int ret;
367
368	connId.as_uint32_t = 0;
369	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
370
371	/*
372	 * We retry to cope with transient failures caused by host side's
373	 * insufficient resources. 20 times should suffice in practice.
374	 */
375	for (retries = 0; retries < 20; retries++) {
376		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
377						    bufferLen);
378		if (ret == HV_STATUS_SUCCESS)
379			return (0);
380
381		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
382		if (time < SBT_1S * 2)
383			time *= 2;
384	}
385
386	KASSERT(ret == HV_STATUS_SUCCESS,
387		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
388
389	return (EAGAIN);
390}
391
392/**
393 * Send an event notification to the parent
394 */
395int
396hv_vmbus_set_event(hv_vmbus_channel *channel)
397{
398	int ret = 0;
399	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
400
401	/* Each uint32_t represents 32 channels */
402
403	synch_set_bit(child_rel_id & 31,
404		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
405			+ (child_rel_id >> 5))));
406	ret = hv_vmbus_signal_event(channel->signal_event_param);
407
408	return (ret);
409}
410
411void
412vmbus_on_channel_open(const struct hv_vmbus_channel *chan)
413{
414	volatile int *flag_cnt_ptr;
415	int flag_cnt;
416
417	flag_cnt = (chan->offer_msg.child_rel_id / VMBUS_EVTFLAG_LEN) + 1;
418	flag_cnt_ptr = VMBUS_PCPU_PTR(vmbus_get_softc(), event_flags_cnt,
419	    chan->target_cpu);
420
421	for (;;) {
422		int old_flag_cnt;
423
424		old_flag_cnt = *flag_cnt_ptr;
425		if (old_flag_cnt >= flag_cnt)
426			break;
427		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
428			if (bootverbose) {
429				printf("VMBUS: channel%u update "
430				    "cpu%d flag_cnt to %d\n",
431				    chan->offer_msg.child_rel_id,
432				    chan->target_cpu, flag_cnt);
433			}
434			break;
435		}
436	}
437}
438