hv_connection.c revision 300107
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/malloc.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <machine/bus.h>
36#include <machine/atomic.h>
37#include <vm/vm.h>
38#include <vm/vm_param.h>
39#include <vm/pmap.h>
40
41#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
42#include <dev/hyperv/vmbus/vmbus_var.h>
43
44/*
45 * Globals
46 */
47hv_vmbus_connection hv_vmbus_g_connection =
48	{ .connect_state = HV_DISCONNECTED,
49	  .next_gpadl_handle = 0xE1E10, };
50
51uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
52
53static uint32_t
54hv_vmbus_get_next_version(uint32_t current_ver)
55{
56	switch (current_ver) {
57	case (HV_VMBUS_VERSION_WIN7):
58		return(HV_VMBUS_VERSION_WS2008);
59
60	case (HV_VMBUS_VERSION_WIN8):
61		return(HV_VMBUS_VERSION_WIN7);
62
63	case (HV_VMBUS_VERSION_WIN8_1):
64		return(HV_VMBUS_VERSION_WIN8);
65
66	case (HV_VMBUS_VERSION_WS2008):
67	default:
68		return(HV_VMBUS_VERSION_INVALID);
69	}
70}
71
72/**
73 * Negotiate the highest supported hypervisor version.
74 */
75static int
76hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
77	uint32_t version)
78{
79	int					ret = 0;
80	hv_vmbus_channel_initiate_contact	*msg;
81
82	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
83	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
84
85	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
86	msg->vmbus_version_requested = version;
87
88	msg->interrupt_page = hv_get_phys_addr(
89		hv_vmbus_g_connection.interrupt_page);
90
91	msg->monitor_page_1 = hv_get_phys_addr(
92		hv_vmbus_g_connection.monitor_page_1);
93
94	msg->monitor_page_2 = hv_get_phys_addr(
95		hv_vmbus_g_connection.monitor_page_2);
96
97	/**
98	 * Add to list before we send the request since we may receive the
99	 * response before returning from this routine
100	 */
101	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
102
103	TAILQ_INSERT_TAIL(
104		&hv_vmbus_g_connection.channel_msg_anchor,
105		msg_info,
106		msg_list_entry);
107
108	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
109
110	ret = hv_vmbus_post_message(
111		msg,
112		sizeof(hv_vmbus_channel_initiate_contact));
113
114	if (ret != 0) {
115		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
116		TAILQ_REMOVE(
117			&hv_vmbus_g_connection.channel_msg_anchor,
118			msg_info,
119			msg_list_entry);
120		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
121		return (ret);
122	}
123
124	/**
125	 * Wait for the connection response
126	 */
127	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
128
129	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
130	TAILQ_REMOVE(
131		&hv_vmbus_g_connection.channel_msg_anchor,
132		msg_info,
133		msg_list_entry);
134	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
135
136	/**
137	 * Check if successful
138	 */
139	if (msg_info->response.version_response.version_supported) {
140		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
141	} else {
142		ret = ECONNREFUSED;
143	}
144
145	return (ret);
146}
147
148/**
149 * Send a connect request on the partition service connection
150 */
151int
152hv_vmbus_connect(void) {
153	int					ret = 0;
154	uint32_t				version;
155	hv_vmbus_channel_msg_info*		msg_info = NULL;
156
157	/**
158	 * Make sure we are not connecting or connected
159	 */
160	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
161		return (-1);
162	}
163
164	/**
165	 * Initialize the vmbus connection
166	 */
167	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
168
169	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
170	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
171		NULL, MTX_DEF);
172
173	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
174	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
175		NULL, MTX_DEF);
176
177	/**
178	 * Setup the vmbus event connection for channel interrupt abstraction
179	 * stuff
180	 */
181	hv_vmbus_g_connection.interrupt_page = malloc(
182					PAGE_SIZE, M_DEVBUF,
183					M_WAITOK | M_ZERO);
184
185	hv_vmbus_g_connection.recv_interrupt_page =
186		hv_vmbus_g_connection.interrupt_page;
187
188	hv_vmbus_g_connection.send_interrupt_page =
189		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
190		    (PAGE_SIZE >> 1));
191
192	/**
193	 * Set up the monitor notification facility. The 1st page for
194	 * parent->child and the 2nd page for child->parent
195	 */
196	hv_vmbus_g_connection.monitor_page_1 = malloc(
197		PAGE_SIZE,
198		M_DEVBUF,
199		M_WAITOK | M_ZERO);
200	hv_vmbus_g_connection.monitor_page_2 = malloc(
201		PAGE_SIZE,
202		M_DEVBUF,
203		M_WAITOK | M_ZERO);
204
205	msg_info = (hv_vmbus_channel_msg_info*)
206		malloc(sizeof(hv_vmbus_channel_msg_info) +
207			sizeof(hv_vmbus_channel_initiate_contact),
208			M_DEVBUF, M_WAITOK | M_ZERO);
209
210	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
211		HV_CHANNEL_MAX_COUNT,
212		M_DEVBUF, M_WAITOK | M_ZERO);
213	/*
214	 * Find the highest vmbus version number we can support.
215	 */
216	version = HV_VMBUS_VERSION_CURRENT;
217
218	do {
219		ret = hv_vmbus_negotiate_version(msg_info, version);
220		if (ret == EWOULDBLOCK) {
221			/*
222			 * We timed out.
223			 */
224			goto cleanup;
225		}
226
227		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
228			break;
229
230		version = hv_vmbus_get_next_version(version);
231	} while (version != HV_VMBUS_VERSION_INVALID);
232
233	hv_vmbus_protocal_version = version;
234	if (bootverbose)
235		printf("VMBUS: Protocol Version: %d.%d\n",
236		    version >> 16, version & 0xFFFF);
237
238	sema_destroy(&msg_info->wait_sema);
239	free(msg_info, M_DEVBUF);
240
241	return (0);
242
243	/*
244	 * Cleanup after failure!
245	 */
246	cleanup:
247
248	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
249
250	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
251	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
252
253	if (hv_vmbus_g_connection.interrupt_page != NULL) {
254		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
255		hv_vmbus_g_connection.interrupt_page = NULL;
256	}
257
258	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
259	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
260
261	if (msg_info) {
262		sema_destroy(&msg_info->wait_sema);
263		free(msg_info, M_DEVBUF);
264	}
265
266	free(hv_vmbus_g_connection.channels, M_DEVBUF);
267	return (ret);
268}
269
270/**
271 * Send a disconnect request on the partition service connection
272 */
273int
274hv_vmbus_disconnect(void) {
275	int			 ret = 0;
276	hv_vmbus_channel_unload  msg;
277
278	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
279
280	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
281
282	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
283
284	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
285
286	free(hv_vmbus_g_connection.channels, M_DEVBUF);
287	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
288
289	return (ret);
290}
291
292static __inline void
293vmbus_event_flags_proc(unsigned long *event_flags, int flag_cnt)
294{
295	int f;
296
297	for (f = 0; f < flag_cnt; ++f) {
298		uint32_t rel_id_base;
299		unsigned long flags;
300		int bit;
301
302		if (event_flags[f] == 0)
303			continue;
304
305		flags = atomic_swap_long(&event_flags[f], 0);
306		rel_id_base = f << HV_CHANNEL_ULONG_SHIFT;
307
308		while ((bit = ffsl(flags)) != 0) {
309			struct hv_vmbus_channel *channel;
310			uint32_t rel_id;
311
312			--bit;	/* NOTE: ffsl is 1-based */
313			flags &= ~(1UL << bit);
314
315			rel_id = rel_id_base + bit;
316			channel = hv_vmbus_g_connection.channels[rel_id];
317
318			/* if channel is closed or closing */
319			if (channel == NULL || channel->rxq == NULL)
320				continue;
321
322			if (channel->batched_reading)
323				hv_ring_buffer_read_begin(&channel->inbound);
324			taskqueue_enqueue(channel->rxq, &channel->channel_task);
325		}
326	}
327}
328
329void
330vmbus_event_proc(struct vmbus_softc *sc, int cpu)
331{
332	hv_vmbus_synic_event_flags *event;
333
334	event = ((hv_vmbus_synic_event_flags *)
335	    hv_vmbus_g_context.syn_ic_event_page[cpu]) + HV_VMBUS_MESSAGE_SINT;
336
337	/*
338	 * On Host with Win8 or above, the event page can be checked directly
339	 * to get the id of the channel that has the pending interrupt.
340	 */
341	vmbus_event_flags_proc(event->flagsul,
342	    VMBUS_SC_PCPU_GET(sc, event_flag_cnt, cpu));
343}
344
345void
346vmbus_event_proc_compat(struct vmbus_softc *sc __unused, int cpu)
347{
348	hv_vmbus_synic_event_flags *event;
349
350	event = ((hv_vmbus_synic_event_flags *)
351	    hv_vmbus_g_context.syn_ic_event_page[cpu]) + HV_VMBUS_MESSAGE_SINT;
352
353	if (atomic_testandclear_int(&event->flags32[0], 0)) {
354		vmbus_event_flags_proc(
355		    hv_vmbus_g_connection.recv_interrupt_page,
356		    HV_MAX_NUM_CHANNELS_SUPPORTED >> HV_CHANNEL_ULONG_SHIFT);
357	}
358}
359
360/**
361 * Send a msg on the vmbus's message connection
362 */
363int hv_vmbus_post_message(void *buffer, size_t bufferLen)
364{
365	hv_vmbus_connection_id connId;
366	sbintime_t time = SBT_1MS;
367	int retries;
368	int ret;
369
370	connId.as_uint32_t = 0;
371	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
372
373	/*
374	 * We retry to cope with transient failures caused by host side's
375	 * insufficient resources. 20 times should suffice in practice.
376	 */
377	for (retries = 0; retries < 20; retries++) {
378		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
379						    bufferLen);
380		if (ret == HV_STATUS_SUCCESS)
381			return (0);
382
383		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
384		if (time < SBT_1S * 2)
385			time *= 2;
386	}
387
388	KASSERT(ret == HV_STATUS_SUCCESS,
389		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
390
391	return (EAGAIN);
392}
393
394/**
395 * Send an event notification to the parent
396 */
397int
398hv_vmbus_set_event(hv_vmbus_channel *channel) {
399	int ret = 0;
400	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
401
402	/* Each uint32_t represents 32 channels */
403
404	synch_set_bit(child_rel_id & 31,
405		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
406			+ (child_rel_id >> 5))));
407	ret = hv_vmbus_signal_event(channel->signal_event_param);
408
409	return (ret);
410}
411
412void
413vmbus_on_channel_open(const struct hv_vmbus_channel *chan)
414{
415	volatile int *flag_cnt_ptr;
416	int flag_cnt;
417
418	flag_cnt = (chan->offer_msg.child_rel_id / HV_CHANNEL_ULONG_LEN) + 1;
419	flag_cnt_ptr = VMBUS_PCPU_PTR(event_flag_cnt, chan->target_cpu);
420
421	for (;;) {
422		int old_flag_cnt;
423
424		old_flag_cnt = *flag_cnt_ptr;
425		if (old_flag_cnt >= flag_cnt)
426			break;
427		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
428			if (bootverbose) {
429				printf("VMBUS: channel%u update "
430				    "cpu%d flag_cnt to %d\n",
431				    chan->offer_msg.child_rel_id,
432				    chan->target_cpu, flag_cnt);
433			}
434			break;
435		}
436	}
437}
438