1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29250199Sgrehan#include <sys/param.h>
30296028Ssephe#include <sys/kernel.h>
31250199Sgrehan#include <sys/malloc.h>
32250199Sgrehan#include <sys/systm.h>
33250199Sgrehan#include <sys/lock.h>
34250199Sgrehan#include <sys/mutex.h>
35250199Sgrehan#include <machine/bus.h>
36299927Ssephe#include <machine/atomic.h>
37250199Sgrehan#include <vm/vm.h>
38250199Sgrehan#include <vm/vm_param.h>
39250199Sgrehan#include <vm/pmap.h>
40250199Sgrehan
41300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
42301106Ssephe#include <dev/hyperv/vmbus/vmbus_reg.h>
43300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
44250199Sgrehan
45250199Sgrehan/*
46250199Sgrehan * Globals
47250199Sgrehan */
48250199Sgrehanhv_vmbus_connection hv_vmbus_g_connection =
49250199Sgrehan	{ .connect_state = HV_DISCONNECTED,
50250199Sgrehan	  .next_gpadl_handle = 0xE1E10, };
51250199Sgrehan
52282212Swhuuint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
53282212Swhu
54282212Swhustatic uint32_t
55282212Swhuhv_vmbus_get_next_version(uint32_t current_ver)
56282212Swhu{
57282212Swhu	switch (current_ver) {
58282212Swhu	case (HV_VMBUS_VERSION_WIN7):
59282212Swhu		return(HV_VMBUS_VERSION_WS2008);
60282212Swhu
61282212Swhu	case (HV_VMBUS_VERSION_WIN8):
62282212Swhu		return(HV_VMBUS_VERSION_WIN7);
63282212Swhu
64282212Swhu	case (HV_VMBUS_VERSION_WIN8_1):
65282212Swhu		return(HV_VMBUS_VERSION_WIN8);
66282212Swhu
67282212Swhu	case (HV_VMBUS_VERSION_WS2008):
68282212Swhu	default:
69282212Swhu		return(HV_VMBUS_VERSION_INVALID);
70282212Swhu	}
71282212Swhu}
72282212Swhu
73250199Sgrehan/**
74282212Swhu * Negotiate the highest supported hypervisor version.
75282212Swhu */
76282212Swhustatic int
77301583Ssephehv_vmbus_negotiate_version(struct vmbus_softc *sc,
78301583Ssephe    hv_vmbus_channel_msg_info *msg_info, uint32_t version)
79282212Swhu{
80282212Swhu	int					ret = 0;
81282212Swhu	hv_vmbus_channel_initiate_contact	*msg;
82282212Swhu
83282212Swhu	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
84282212Swhu	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
85282212Swhu
86282212Swhu	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
87282212Swhu	msg->vmbus_version_requested = version;
88282212Swhu
89301583Ssephe	msg->interrupt_page = sc->vmbus_evtflags_dma.hv_paddr;
90301583Ssephe	msg->monitor_page_1 = sc->vmbus_mnf1_dma.hv_paddr;
91301583Ssephe	msg->monitor_page_2 = sc->vmbus_mnf2_dma.hv_paddr;
92282212Swhu
93282212Swhu	/**
94282212Swhu	 * Add to list before we send the request since we may receive the
95282212Swhu	 * response before returning from this routine
96282212Swhu	 */
97297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
98282212Swhu
99282212Swhu	TAILQ_INSERT_TAIL(
100282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
101282212Swhu		msg_info,
102282212Swhu		msg_list_entry);
103282212Swhu
104297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
105282212Swhu
106282212Swhu	ret = hv_vmbus_post_message(
107282212Swhu		msg,
108282212Swhu		sizeof(hv_vmbus_channel_initiate_contact));
109282212Swhu
110282212Swhu	if (ret != 0) {
111297635Ssephe		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
112282212Swhu		TAILQ_REMOVE(
113282212Swhu			&hv_vmbus_g_connection.channel_msg_anchor,
114282212Swhu			msg_info,
115282212Swhu			msg_list_entry);
116297635Ssephe		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
117282212Swhu		return (ret);
118282212Swhu	}
119282212Swhu
120282212Swhu	/**
121282212Swhu	 * Wait for the connection response
122282212Swhu	 */
123296028Ssephe	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
124282212Swhu
125297635Ssephe	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
126282212Swhu	TAILQ_REMOVE(
127282212Swhu		&hv_vmbus_g_connection.channel_msg_anchor,
128282212Swhu		msg_info,
129282212Swhu		msg_list_entry);
130297635Ssephe	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
131282212Swhu
132282212Swhu	/**
133282212Swhu	 * Check if successful
134282212Swhu	 */
135282212Swhu	if (msg_info->response.version_response.version_supported) {
136282212Swhu		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
137282212Swhu	} else {
138282212Swhu		ret = ECONNREFUSED;
139282212Swhu	}
140282212Swhu
141282212Swhu	return (ret);
142282212Swhu}
143282212Swhu
144282212Swhu/**
145250199Sgrehan * Send a connect request on the partition service connection
146250199Sgrehan */
147250199Sgrehanint
148301583Ssephehv_vmbus_connect(struct vmbus_softc *sc)
149300127Ssephe{
150250199Sgrehan	int					ret = 0;
151282212Swhu	uint32_t				version;
152250199Sgrehan	hv_vmbus_channel_msg_info*		msg_info = NULL;
153250199Sgrehan
154250199Sgrehan	/**
155250199Sgrehan	 * Make sure we are not connecting or connected
156250199Sgrehan	 */
157250199Sgrehan	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
158250199Sgrehan		return (-1);
159250199Sgrehan	}
160250199Sgrehan
161250199Sgrehan	/**
162250199Sgrehan	 * Initialize the vmbus connection
163250199Sgrehan	 */
164250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
165250199Sgrehan
166250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
167250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
168297635Ssephe		NULL, MTX_DEF);
169250199Sgrehan
170250199Sgrehan	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
171250199Sgrehan	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
172282212Swhu		NULL, MTX_DEF);
173250199Sgrehan
174250199Sgrehan	msg_info = (hv_vmbus_channel_msg_info*)
175250199Sgrehan		malloc(sizeof(hv_vmbus_channel_msg_info) +
176250199Sgrehan			sizeof(hv_vmbus_channel_initiate_contact),
177295308Ssephe			M_DEVBUF, M_WAITOK | M_ZERO);
178250199Sgrehan
179294553Ssephe	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
180301106Ssephe	    VMBUS_CHAN_MAX, M_DEVBUF, M_WAITOK | M_ZERO);
181282212Swhu	/*
182282212Swhu	 * Find the highest vmbus version number we can support.
183250199Sgrehan	 */
184282212Swhu	version = HV_VMBUS_VERSION_CURRENT;
185250199Sgrehan
186282212Swhu	do {
187301583Ssephe		ret = hv_vmbus_negotiate_version(sc, msg_info, version);
188282212Swhu		if (ret == EWOULDBLOCK) {
189282212Swhu			/*
190282212Swhu			 * We timed out.
191282212Swhu			 */
192282212Swhu			goto cleanup;
193282212Swhu		}
194250199Sgrehan
195282212Swhu		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
196282212Swhu			break;
197250199Sgrehan
198282212Swhu		version = hv_vmbus_get_next_version(version);
199282212Swhu	} while (version != HV_VMBUS_VERSION_INVALID);
200250199Sgrehan
201282212Swhu	hv_vmbus_protocal_version = version;
202282212Swhu	if (bootverbose)
203293870Ssephe		printf("VMBUS: Protocol Version: %d.%d\n",
204282212Swhu		    version >> 16, version & 0xFFFF);
205250199Sgrehan
206250199Sgrehan	sema_destroy(&msg_info->wait_sema);
207250199Sgrehan	free(msg_info, M_DEVBUF);
208250199Sgrehan
209250199Sgrehan	return (0);
210250199Sgrehan
211250199Sgrehan	/*
212250199Sgrehan	 * Cleanup after failure!
213250199Sgrehan	 */
214250199Sgrehan	cleanup:
215250199Sgrehan
216250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
217250199Sgrehan
218250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
219250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
220250199Sgrehan
221250199Sgrehan	if (msg_info) {
222250199Sgrehan		sema_destroy(&msg_info->wait_sema);
223250199Sgrehan		free(msg_info, M_DEVBUF);
224250199Sgrehan	}
225250199Sgrehan
226294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
227250199Sgrehan	return (ret);
228250199Sgrehan}
229250199Sgrehan
230250199Sgrehan/**
231250199Sgrehan * Send a disconnect request on the partition service connection
232250199Sgrehan */
233250199Sgrehanint
234300127Ssephehv_vmbus_disconnect(void)
235300127Ssephe{
236250199Sgrehan	int			 ret = 0;
237295308Ssephe	hv_vmbus_channel_unload  msg;
238250199Sgrehan
239295308Ssephe	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
240250199Sgrehan
241295308Ssephe	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
242250199Sgrehan
243250199Sgrehan	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
244250199Sgrehan
245294553Ssephe	free(hv_vmbus_g_connection.channels, M_DEVBUF);
246250199Sgrehan	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
247250199Sgrehan
248250199Sgrehan	return (ret);
249250199Sgrehan}
250250199Sgrehan
251300107Ssephestatic __inline void
252301106Ssephevmbus_event_flags_proc(volatile u_long *event_flags, int flag_cnt)
253250199Sgrehan{
254300107Ssephe	int f;
255250199Sgrehan
256300107Ssephe	for (f = 0; f < flag_cnt; ++f) {
257300101Ssephe		uint32_t rel_id_base;
258301106Ssephe		u_long flags;
259300101Ssephe		int bit;
260300101Ssephe
261300107Ssephe		if (event_flags[f] == 0)
262299892Ssephe			continue;
263299892Ssephe
264300107Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
265301106Ssephe		rel_id_base = f << VMBUS_EVTFLAG_SHIFT;
266299890Ssephe
267300105Ssephe		while ((bit = ffsl(flags)) != 0) {
268300105Ssephe			struct hv_vmbus_channel *channel;
269300105Ssephe			uint32_t rel_id;
270299890Ssephe
271300105Ssephe			--bit;	/* NOTE: ffsl is 1-based */
272300105Ssephe			flags &= ~(1UL << bit);
273250199Sgrehan
274300105Ssephe			rel_id = rel_id_base + bit;
275300105Ssephe			channel = hv_vmbus_g_connection.channels[rel_id];
276300105Ssephe
277300105Ssephe			/* if channel is closed or closing */
278300105Ssephe			if (channel == NULL || channel->rxq == NULL)
279300105Ssephe				continue;
280300105Ssephe
281300105Ssephe			if (channel->batched_reading)
282300105Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
283300105Ssephe			taskqueue_enqueue(channel->rxq, &channel->channel_task);
284300101Ssephe		}
285250199Sgrehan	}
286250199Sgrehan}
287250199Sgrehan
288300107Ssephevoid
289300107Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
290300107Ssephe{
291301106Ssephe	struct vmbus_evtflags *eventf;
292300107Ssephe
293300107Ssephe	/*
294300107Ssephe	 * On Host with Win8 or above, the event page can be checked directly
295300107Ssephe	 * to get the id of the channel that has the pending interrupt.
296300107Ssephe	 */
297301106Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
298301106Ssephe	vmbus_event_flags_proc(eventf->evt_flags,
299301106Ssephe	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
300300107Ssephe}
301300107Ssephe
302300107Ssephevoid
303301583Ssephevmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
304300107Ssephe{
305301106Ssephe	struct vmbus_evtflags *eventf;
306300107Ssephe
307301106Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
308301106Ssephe	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
309301583Ssephe		vmbus_event_flags_proc(sc->vmbus_rx_evtflags,
310301106Ssephe		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
311300107Ssephe	}
312300107Ssephe}
313300107Ssephe
314250199Sgrehan/**
315250199Sgrehan * Send a msg on the vmbus's message connection
316250199Sgrehan */
317297219Ssepheint hv_vmbus_post_message(void *buffer, size_t bufferLen)
318297219Ssephe{
319250199Sgrehan	hv_vmbus_connection_id connId;
320297219Ssephe	sbintime_t time = SBT_1MS;
321297219Ssephe	int retries;
322297219Ssephe	int ret;
323250199Sgrehan
324297219Ssephe	connId.as_uint32_t = 0;
325297219Ssephe	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
326250199Sgrehan
327297219Ssephe	/*
328297219Ssephe	 * We retry to cope with transient failures caused by host side's
329297219Ssephe	 * insufficient resources. 20 times should suffice in practice.
330250199Sgrehan	 */
331297219Ssephe	for (retries = 0; retries < 20; retries++) {
332301484Ssephe		ret = hv_vmbus_post_msg_via_msg_ipc(connId,
333301484Ssephe		    VMBUS_MSGTYPE_CHANNEL, buffer, bufferLen);
334297219Ssephe		if (ret == HV_STATUS_SUCCESS)
335297219Ssephe			return (0);
336297219Ssephe
337297219Ssephe		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
338297219Ssephe		if (time < SBT_1S * 2)
339297219Ssephe			time *= 2;
340250199Sgrehan	}
341250199Sgrehan
342297219Ssephe	KASSERT(ret == HV_STATUS_SUCCESS,
343297219Ssephe		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
344250199Sgrehan
345297219Ssephe	return (EAGAIN);
346250199Sgrehan}
347250199Sgrehan
348250199Sgrehan/**
349250199Sgrehan * Send an event notification to the parent
350250199Sgrehan */
351250199Sgrehanint
352300127Ssephehv_vmbus_set_event(hv_vmbus_channel *channel)
353300127Ssephe{
354301583Ssephe	struct vmbus_softc *sc = vmbus_get_softc();
355250199Sgrehan	int ret = 0;
356301588Ssephe	uint32_t chanid = channel->offer_msg.child_rel_id;
357250199Sgrehan
358301588Ssephe	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
359301588Ssephe	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
360282212Swhu	ret = hv_vmbus_signal_event(channel->signal_event_param);
361250199Sgrehan
362250199Sgrehan	return (ret);
363250199Sgrehan}
364300102Ssephe
365300102Ssephevoid
366300102Ssephevmbus_on_channel_open(const struct hv_vmbus_channel *chan)
367300102Ssephe{
368300102Ssephe	volatile int *flag_cnt_ptr;
369300102Ssephe	int flag_cnt;
370300102Ssephe
371301106Ssephe	flag_cnt = (chan->offer_msg.child_rel_id / VMBUS_EVTFLAG_LEN) + 1;
372301106Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(vmbus_get_softc(), event_flags_cnt,
373300573Ssephe	    chan->target_cpu);
374300102Ssephe
375300102Ssephe	for (;;) {
376300102Ssephe		int old_flag_cnt;
377300102Ssephe
378300102Ssephe		old_flag_cnt = *flag_cnt_ptr;
379300102Ssephe		if (old_flag_cnt >= flag_cnt)
380300102Ssephe			break;
381300102Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
382300102Ssephe			if (bootverbose) {
383300102Ssephe				printf("VMBUS: channel%u update "
384300102Ssephe				    "cpu%d flag_cnt to %d\n",
385300102Ssephe				    chan->offer_msg.child_rel_id,
386300102Ssephe				    chan->target_cpu, flag_cnt);
387300102Ssephe			}
388300102Ssephe			break;
389300102Ssephe		}
390300102Ssephe	}
391300102Ssephe}
392