hv_connection.c revision 300107
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29250199Sgrehan#include <sys/param.h>
30296028Ssephe#include <sys/kernel.h>
31250199Sgrehan#include <sys/malloc.h>
32250199Sgrehan#include <sys/systm.h>
33250199Sgrehan#include <sys/lock.h>
34250199Sgrehan#include <sys/mutex.h>
35250199Sgrehan#include <machine/bus.h>
36299927Ssephe#include <machine/atomic.h>
37250199Sgrehan#include <vm/vm.h>
38250199Sgrehan#include <vm/vm_param.h>
39250199Sgrehan#include <vm/pmap.h>
40250199Sgrehan
41300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
42300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
43250199Sgrehan
44250199Sgrehan/*
45250199Sgrehan * Globals
46250199Sgrehan */
47250199Sgrehanhv_vmbus_connection hv_vmbus_g_connection =
48250199Sgrehan	{ .connect_state = HV_DISCONNECTED,
49250199Sgrehan	  .next_gpadl_handle = 0xE1E10, };
50250199Sgrehan
51282212Swhuuint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
52282212Swhu
53282212Swhustatic uint32_t
54282212Swhuhv_vmbus_get_next_version(uint32_t current_ver)
55282212Swhu{
56282212Swhu	switch (current_ver) {
57282212Swhu	case (HV_VMBUS_VERSION_WIN7):
58282212Swhu		return(HV_VMBUS_VERSION_WS2008);
59282212Swhu
60282212Swhu	case (HV_VMBUS_VERSION_WIN8):
61282212Swhu		return(HV_VMBUS_VERSION_WIN7);
62282212Swhu
63282212Swhu	case (HV_VMBUS_VERSION_WIN8_1):
64282212Swhu		return(HV_VMBUS_VERSION_WIN8);
65282212Swhu
66282212Swhu	case (HV_VMBUS_VERSION_WS2008):
67282212Swhu	default:
68282212Swhu		return(HV_VMBUS_VERSION_INVALID);
69282212Swhu	}
70282212Swhu}
71282212Swhu
72250199Sgrehan/**
73282212Swhu * Negotiate the highest supported hypervisor version.
74282212Swhu */
75282212Swhustatic int
76282212Swhuhv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
77282212Swhu	uint32_t version)
78282212Swhu{
79282212Swhu	int					ret = 0;
80282212Swhu	hv_vmbus_channel_initiate_contact	*msg;
81282212Swhu
82282212Swhu	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
83282212Swhu	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
84282212Swhu
85282212Swhu	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
86282212Swhu	msg->vmbus_version_requested = version;
87282212Swhu
88282212Swhu	msg->interrupt_page = hv_get_phys_addr(
89282212Swhu		hv_vmbus_g_connection.interrupt_page);
90282212Swhu
91282212Swhu	msg->monitor_page_1 = hv_get_phys_addr(
92295309Ssephe		hv_vmbus_g_connection.monitor_page_1);
93282212Swhu
94295308Ssephe	msg->monitor_page_2 = hv_get_phys_addr(
95295309Ssephe		hv_vmbus_g_connection.monitor_page_2);
96282212Swhu
97282212Swhu	/**
98282212Swhu	 * Add to list before we send the request since we may receive the
99282212Swhu	 * response before returning from this routine
100282212Swhu	 */
101297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
102282212Swhu
103282212Swhu	TAILQ_INSERT_TAIL(
104282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
105282212Swhu		msg_info,
106282212Swhu		msg_list_entry);
107282212Swhu
108297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
109282212Swhu
110282212Swhu	ret = hv_vmbus_post_message(
111282212Swhu		msg,
112282212Swhu		sizeof(hv_vmbus_channel_initiate_contact));
113282212Swhu
114282212Swhu	if (ret != 0) {
115297635Ssephe		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
116282212Swhu		TAILQ_REMOVE(
117282212Swhu			&hv_vmbus_g_connection.channel_msg_anchor,
118282212Swhu			msg_info,
119282212Swhu			msg_list_entry);
120297635Ssephe		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
121282212Swhu		return (ret);
122282212Swhu	}
123282212Swhu
124282212Swhu	/**
125282212Swhu	 * Wait for the connection response
126282212Swhu	 */
127296028Ssephe	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
128282212Swhu
129297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
130282212Swhu	TAILQ_REMOVE(
131282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
132282212Swhu		msg_info,
133282212Swhu		msg_list_entry);
134297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
135282212Swhu
136282212Swhu	/**
137282212Swhu	 * Check if successful
138282212Swhu	 */
139282212Swhu	if (msg_info->response.version_response.version_supported) {
140282212Swhu		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
141282212Swhu	} else {
142282212Swhu		ret = ECONNREFUSED;
143282212Swhu	}
144282212Swhu
145282212Swhu	return (ret);
146282212Swhu}
147282212Swhu
148282212Swhu/**
149250199Sgrehan * Send a connect request on the partition service connection
150250199Sgrehan */
151250199Sgrehanint
152250199Sgrehanhv_vmbus_connect(void) {
153250199Sgrehan	int					ret = 0;
154282212Swhu	uint32_t				version;
155250199Sgrehan	hv_vmbus_channel_msg_info*		msg_info = NULL;
156250199Sgrehan
157250199Sgrehan	/**
158250199Sgrehan	 * Make sure we are not connecting or connected
159250199Sgrehan	 */
160250199Sgrehan	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
161250199Sgrehan		return (-1);
162250199Sgrehan	}
163250199Sgrehan
164250199Sgrehan	/**
165250199Sgrehan	 * Initialize the vmbus connection
166250199Sgrehan	 */
167250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
168250199Sgrehan
169250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
170250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
171297635Ssephe		NULL, MTX_DEF);
172250199Sgrehan
173250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
174250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
175282212Swhu		NULL, MTX_DEF);
176250199Sgrehan
177250199Sgrehan	/**
178250199Sgrehan	 * Setup the vmbus event connection for channel interrupt abstraction
179250199Sgrehan	 * stuff
180250199Sgrehan	 */
181295309Ssephe	hv_vmbus_g_connection.interrupt_page = malloc(
182250199Sgrehan					PAGE_SIZE, M_DEVBUF,
183295309Ssephe					M_WAITOK | M_ZERO);
184250199Sgrehan
185250199Sgrehan	hv_vmbus_g_connection.recv_interrupt_page =
186250199Sgrehan		hv_vmbus_g_connection.interrupt_page;
187250199Sgrehan
188250199Sgrehan	hv_vmbus_g_connection.send_interrupt_page =
189250199Sgrehan		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
190250199Sgrehan		    (PAGE_SIZE >> 1));
191250199Sgrehan
192250199Sgrehan	/**
193250199Sgrehan	 * Set up the monitor notification facility. The 1st page for
194250199Sgrehan	 * parent->child and the 2nd page for child->parent
195250199Sgrehan	 */
196295309Ssephe	hv_vmbus_g_connection.monitor_page_1 = malloc(
197295309Ssephe		PAGE_SIZE,
198250199Sgrehan		M_DEVBUF,
199295309Ssephe		M_WAITOK | M_ZERO);
200295309Ssephe	hv_vmbus_g_connection.monitor_page_2 = malloc(
201250199Sgrehan		PAGE_SIZE,
202295309Ssephe		M_DEVBUF,
203295309Ssephe		M_WAITOK | M_ZERO);
204250199Sgrehan
205250199Sgrehan	msg_info = (hv_vmbus_channel_msg_info*)
206250199Sgrehan		malloc(sizeof(hv_vmbus_channel_msg_info) +
207250199Sgrehan			sizeof(hv_vmbus_channel_initiate_contact),
208295308Ssephe			M_DEVBUF, M_WAITOK | M_ZERO);
209250199Sgrehan
210294553Ssephe	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
211294553Ssephe		HV_CHANNEL_MAX_COUNT,
212294553Ssephe		M_DEVBUF, M_WAITOK | M_ZERO);
213282212Swhu	/*
214282212Swhu	 * Find the highest vmbus version number we can support.
215250199Sgrehan	 */
216282212Swhu	version = HV_VMBUS_VERSION_CURRENT;
217250199Sgrehan
218282212Swhu	do {
219282212Swhu		ret = hv_vmbus_negotiate_version(msg_info, version);
220282212Swhu		if (ret == EWOULDBLOCK) {
221282212Swhu			/*
222282212Swhu			 * We timed out.
223282212Swhu			 */
224282212Swhu			goto cleanup;
225282212Swhu		}
226250199Sgrehan
227282212Swhu		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
228282212Swhu			break;
229250199Sgrehan
230282212Swhu		version = hv_vmbus_get_next_version(version);
231282212Swhu	} while (version != HV_VMBUS_VERSION_INVALID);
232250199Sgrehan
233282212Swhu	hv_vmbus_protocal_version = version;
234282212Swhu	if (bootverbose)
235293870Ssephe		printf("VMBUS: Protocol Version: %d.%d\n",
236282212Swhu		    version >> 16, version & 0xFFFF);
237250199Sgrehan
238250199Sgrehan	sema_destroy(&msg_info->wait_sema);
239250199Sgrehan	free(msg_info, M_DEVBUF);
240250199Sgrehan
241250199Sgrehan	return (0);
242250199Sgrehan
243250199Sgrehan	/*
244250199Sgrehan	 * Cleanup after failure!
245250199Sgrehan	 */
246250199Sgrehan	cleanup:
247250199Sgrehan
248250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
249250199Sgrehan
250250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
251250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
252250199Sgrehan
253250199Sgrehan	if (hv_vmbus_g_connection.interrupt_page != NULL) {
254295964Ssephe		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
255250199Sgrehan		hv_vmbus_g_connection.interrupt_page = NULL;
256250199Sgrehan	}
257250199Sgrehan
258295309Ssephe	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
259295309Ssephe	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
260250199Sgrehan
261250199Sgrehan	if (msg_info) {
262250199Sgrehan		sema_destroy(&msg_info->wait_sema);
263250199Sgrehan		free(msg_info, M_DEVBUF);
264250199Sgrehan	}
265250199Sgrehan
266294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
267250199Sgrehan	return (ret);
268250199Sgrehan}
269250199Sgrehan
270250199Sgrehan/**
271250199Sgrehan * Send a disconnect request on the partition service connection
272250199Sgrehan */
273250199Sgrehanint
274250199Sgrehanhv_vmbus_disconnect(void) {
275250199Sgrehan	int			 ret = 0;
276295308Ssephe	hv_vmbus_channel_unload  msg;
277250199Sgrehan
278295308Ssephe	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
279250199Sgrehan
280295308Ssephe	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
281250199Sgrehan
282295964Ssephe	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
283250199Sgrehan
284250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
285250199Sgrehan
286294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
287250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
288250199Sgrehan
289250199Sgrehan	return (ret);
290250199Sgrehan}
291250199Sgrehan
292300107Ssephestatic __inline void
293300107Ssephevmbus_event_flags_proc(unsigned long *event_flags, int flag_cnt)
294250199Sgrehan{
295300107Ssephe	int f;
296250199Sgrehan
297300107Ssephe	for (f = 0; f < flag_cnt; ++f) {
298300101Ssephe		uint32_t rel_id_base;
299300105Ssephe		unsigned long flags;
300300101Ssephe		int bit;
301300101Ssephe
302300107Ssephe		if (event_flags[f] == 0)
303299892Ssephe			continue;
304299892Ssephe
305300107Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
306300102Ssephe		rel_id_base = f << HV_CHANNEL_ULONG_SHIFT;
307299890Ssephe
308300105Ssephe		while ((bit = ffsl(flags)) != 0) {
309300105Ssephe			struct hv_vmbus_channel *channel;
310300105Ssephe			uint32_t rel_id;
311299890Ssephe
312300105Ssephe			--bit;	/* NOTE: ffsl is 1-based */
313300105Ssephe			flags &= ~(1UL << bit);
314250199Sgrehan
315300105Ssephe			rel_id = rel_id_base + bit;
316300105Ssephe			channel = hv_vmbus_g_connection.channels[rel_id];
317300105Ssephe
318300105Ssephe			/* if channel is closed or closing */
319300105Ssephe			if (channel == NULL || channel->rxq == NULL)
320300105Ssephe				continue;
321300105Ssephe
322300105Ssephe			if (channel->batched_reading)
323300105Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
324300105Ssephe			taskqueue_enqueue(channel->rxq, &channel->channel_task);
325300101Ssephe		}
326250199Sgrehan	}
327250199Sgrehan}
328250199Sgrehan
329300107Ssephevoid
330300107Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
331300107Ssephe{
332300107Ssephe	hv_vmbus_synic_event_flags *event;
333300107Ssephe
334300107Ssephe	event = ((hv_vmbus_synic_event_flags *)
335300107Ssephe	    hv_vmbus_g_context.syn_ic_event_page[cpu]) + HV_VMBUS_MESSAGE_SINT;
336300107Ssephe
337300107Ssephe	/*
338300107Ssephe	 * On Host with Win8 or above, the event page can be checked directly
339300107Ssephe	 * to get the id of the channel that has the pending interrupt.
340300107Ssephe	 */
341300107Ssephe	vmbus_event_flags_proc(event->flagsul,
342300107Ssephe	    VMBUS_SC_PCPU_GET(sc, event_flag_cnt, cpu));
343300107Ssephe}
344300107Ssephe
345300107Ssephevoid
346300107Ssephevmbus_event_proc_compat(struct vmbus_softc *sc __unused, int cpu)
347300107Ssephe{
348300107Ssephe	hv_vmbus_synic_event_flags *event;
349300107Ssephe
350300107Ssephe	event = ((hv_vmbus_synic_event_flags *)
351300107Ssephe	    hv_vmbus_g_context.syn_ic_event_page[cpu]) + HV_VMBUS_MESSAGE_SINT;
352300107Ssephe
353300107Ssephe	if (atomic_testandclear_int(&event->flags32[0], 0)) {
354300107Ssephe		vmbus_event_flags_proc(
355300107Ssephe		    hv_vmbus_g_connection.recv_interrupt_page,
356300107Ssephe		    HV_MAX_NUM_CHANNELS_SUPPORTED >> HV_CHANNEL_ULONG_SHIFT);
357300107Ssephe	}
358300107Ssephe}
359300107Ssephe
360250199Sgrehan/**
361250199Sgrehan * Send a msg on the vmbus's message connection
362250199Sgrehan */
363297219Ssepheint hv_vmbus_post_message(void *buffer, size_t bufferLen)
364297219Ssephe{
365250199Sgrehan	hv_vmbus_connection_id connId;
366297219Ssephe	sbintime_t time = SBT_1MS;
367297219Ssephe	int retries;
368297219Ssephe	int ret;
369250199Sgrehan
370297219Ssephe	connId.as_uint32_t = 0;
371297219Ssephe	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
372250199Sgrehan
373297219Ssephe	/*
374297219Ssephe	 * We retry to cope with transient failures caused by host side's
375297219Ssephe	 * insufficient resources. 20 times should suffice in practice.
376250199Sgrehan	 */
377297219Ssephe	for (retries = 0; retries < 20; retries++) {
378297219Ssephe		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
379297219Ssephe						    bufferLen);
380297219Ssephe		if (ret == HV_STATUS_SUCCESS)
381297219Ssephe			return (0);
382297219Ssephe
383297219Ssephe		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
384297219Ssephe		if (time < SBT_1S * 2)
385297219Ssephe			time *= 2;
386250199Sgrehan	}
387250199Sgrehan
388297219Ssephe	KASSERT(ret == HV_STATUS_SUCCESS,
389297219Ssephe		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
390250199Sgrehan
391297219Ssephe	return (EAGAIN);
392250199Sgrehan}
393250199Sgrehan
394250199Sgrehan/**
395250199Sgrehan * Send an event notification to the parent
396250199Sgrehan */
397250199Sgrehanint
398282212Swhuhv_vmbus_set_event(hv_vmbus_channel *channel) {
399250199Sgrehan	int ret = 0;
400282212Swhu	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
401250199Sgrehan
402250199Sgrehan	/* Each uint32_t represents 32 channels */
403250199Sgrehan
404250199Sgrehan	synch_set_bit(child_rel_id & 31,
405250199Sgrehan		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
406250199Sgrehan			+ (child_rel_id >> 5))));
407282212Swhu	ret = hv_vmbus_signal_event(channel->signal_event_param);
408250199Sgrehan
409250199Sgrehan	return (ret);
410250199Sgrehan}
411300102Ssephe
412300102Ssephevoid
413300102Ssephevmbus_on_channel_open(const struct hv_vmbus_channel *chan)
414300102Ssephe{
415300102Ssephe	volatile int *flag_cnt_ptr;
416300102Ssephe	int flag_cnt;
417300102Ssephe
418300102Ssephe	flag_cnt = (chan->offer_msg.child_rel_id / HV_CHANNEL_ULONG_LEN) + 1;
419300102Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(event_flag_cnt, chan->target_cpu);
420300102Ssephe
421300102Ssephe	for (;;) {
422300102Ssephe		int old_flag_cnt;
423300102Ssephe
424300102Ssephe		old_flag_cnt = *flag_cnt_ptr;
425300102Ssephe		if (old_flag_cnt >= flag_cnt)
426300102Ssephe			break;
427300102Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
428300102Ssephe			if (bootverbose) {
429300102Ssephe				printf("VMBUS: channel%u update "
430300102Ssephe				    "cpu%d flag_cnt to %d\n",
431300102Ssephe				    chan->offer_msg.child_rel_id,
432300102Ssephe				    chan->target_cpu, flag_cnt);
433300102Ssephe			}
434300102Ssephe			break;
435300102Ssephe		}
436300102Ssephe	}
437300102Ssephe}
438