vmbus_chan.c revision 302818
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29256276Sdim#include <sys/cdefs.h>
30256276Sdim__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302818 2016-07-14 07:59:01Z sephe $");
31256276Sdim
32250199Sgrehan#include <sys/param.h>
33296028Ssephe#include <sys/kernel.h>
34250199Sgrehan#include <sys/malloc.h>
35250199Sgrehan#include <sys/systm.h>
36250199Sgrehan#include <sys/mbuf.h>
37250199Sgrehan#include <sys/lock.h>
38250199Sgrehan#include <sys/mutex.h>
39296181Ssephe#include <sys/sysctl.h>
40301588Ssephe
41301588Ssephe#include <machine/atomic.h>
42250199Sgrehan#include <machine/bus.h>
43301588Ssephe
44250199Sgrehan#include <vm/vm.h>
45250199Sgrehan#include <vm/vm_param.h>
46250199Sgrehan#include <vm/pmap.h>
47250199Sgrehan
48300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
49302619Ssephe#include <dev/hyperv/vmbus/hyperv_var.h>
50301588Ssephe#include <dev/hyperv/vmbus/vmbus_reg.h>
51300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
52250199Sgrehan
53302731Ssephestatic void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
54302692Ssephestatic void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
55302692Ssephe		    const struct hv_vmbus_channel *);
56302713Ssephestatic void	vmbus_chan_task(void *, int);
57302713Ssephestatic void	vmbus_chan_task_nobatch(void *, int);
58250199Sgrehan
59250199Sgrehan/**
60250199Sgrehan *  @brief Trigger an event notification on the specified channel
61250199Sgrehan */
62250199Sgrehanstatic void
63302731Ssephevmbus_chan_send_event(hv_vmbus_channel *channel)
64250199Sgrehan{
65302618Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
66302693Ssephe	uint32_t chanid = channel->ch_id;
67302618Ssephe
68302618Ssephe	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
69302618Ssephe	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
70302618Ssephe
71302695Ssephe	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
72302731Ssephe		atomic_set_int(
73302731Ssephe		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
74302731Ssephe		channel->ch_montrig_mask);
75250199Sgrehan	} else {
76302726Ssephe		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
77250199Sgrehan	}
78250199Sgrehan}
79250199Sgrehan
80296289Ssephestatic int
81296289Ssephevmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
82296289Ssephe{
83296289Ssephe	struct hv_vmbus_channel *chan = arg1;
84296289Ssephe	int alloc = 0;
85296289Ssephe
86302695Ssephe	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
87296289Ssephe		alloc = 1;
88296289Ssephe	return sysctl_handle_int(oidp, &alloc, 0, req);
89296289Ssephe}
90296289Ssephe
91296181Ssephestatic void
92296290Ssephevmbus_channel_sysctl_create(hv_vmbus_channel* channel)
93296181Ssephe{
94296181Ssephe	device_t dev;
95296181Ssephe	struct sysctl_oid *devch_sysctl;
96296181Ssephe	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
97296181Ssephe	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
98296181Ssephe	struct sysctl_ctx_list *ctx;
99296181Ssephe	uint32_t ch_id;
100296181Ssephe	uint16_t sub_ch_id;
101296181Ssephe	char name[16];
102296181Ssephe
103296181Ssephe	hv_vmbus_channel* primary_ch = channel->primary_channel;
104296181Ssephe
105296181Ssephe	if (primary_ch == NULL) {
106302706Ssephe		dev = channel->ch_dev;
107302693Ssephe		ch_id = channel->ch_id;
108296181Ssephe	} else {
109302706Ssephe		dev = primary_ch->ch_dev;
110302693Ssephe		ch_id = primary_ch->ch_id;
111302694Ssephe		sub_ch_id = channel->ch_subidx;
112296181Ssephe	}
113302633Ssephe	ctx = &channel->ch_sysctl_ctx;
114302633Ssephe	sysctl_ctx_init(ctx);
115296181Ssephe	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
116296181Ssephe	devch_sysctl = SYSCTL_ADD_NODE(ctx,
117296181Ssephe		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
118298693Ssephe		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
119296181Ssephe	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
120296181Ssephe	snprintf(name, sizeof(name), "%d", ch_id);
121296181Ssephe	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
122296181Ssephe	    	    SYSCTL_CHILDREN(devch_sysctl),
123298693Ssephe	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
124296181Ssephe
125296181Ssephe	if (primary_ch != NULL) {
126296181Ssephe		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
127296181Ssephe			SYSCTL_CHILDREN(devch_id_sysctl),
128298693Ssephe			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
129296181Ssephe		snprintf(name, sizeof(name), "%d", sub_ch_id);
130296181Ssephe		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
131296181Ssephe			SYSCTL_CHILDREN(devch_sub_sysctl),
132298693Ssephe			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
133296188Ssephe
134296188Ssephe		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
135296188Ssephe		    OID_AUTO, "chanid", CTLFLAG_RD,
136302693Ssephe		    &channel->ch_id, 0, "channel id");
137296181Ssephe	}
138296188Ssephe	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
139296188Ssephe	    "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
140296289Ssephe	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
141298693Ssephe	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
142298693Ssephe	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
143296289Ssephe	    "is monitor allocated to this channel");
144296188Ssephe
145296181Ssephe	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
146296181Ssephe                    SYSCTL_CHILDREN(devch_id_sysctl),
147296181Ssephe                    OID_AUTO,
148296181Ssephe		    "in",
149298693Ssephe		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
150296181Ssephe	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
151296181Ssephe                    SYSCTL_CHILDREN(devch_id_sysctl),
152296181Ssephe                    OID_AUTO,
153296181Ssephe		    "out",
154298693Ssephe		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
155296181Ssephe	hv_ring_buffer_stat(ctx,
156296181Ssephe		SYSCTL_CHILDREN(devch_id_in_sysctl),
157296181Ssephe		&(channel->inbound),
158296181Ssephe		"inbound ring buffer stats");
159296181Ssephe	hv_ring_buffer_stat(ctx,
160296181Ssephe		SYSCTL_CHILDREN(devch_id_out_sysctl),
161296181Ssephe		&(channel->outbound),
162296181Ssephe		"outbound ring buffer stats");
163296181Ssephe}
164296290Ssephe
165250199Sgrehan/**
166250199Sgrehan * @brief Open the specified channel
167250199Sgrehan */
168250199Sgrehanint
169250199Sgrehanhv_vmbus_channel_open(
170250199Sgrehan	hv_vmbus_channel*		new_channel,
171250199Sgrehan	uint32_t			send_ring_buffer_size,
172250199Sgrehan	uint32_t			recv_ring_buffer_size,
173250199Sgrehan	void*				user_data,
174250199Sgrehan	uint32_t			user_data_len,
175250199Sgrehan	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
176250199Sgrehan	void* 				context)
177250199Sgrehan{
178302607Ssephe	struct vmbus_softc *sc = new_channel->vmbus_sc;
179302607Ssephe	const struct vmbus_chanmsg_chopen_resp *resp;
180302607Ssephe	const struct vmbus_message *msg;
181302607Ssephe	struct vmbus_chanmsg_chopen *req;
182302607Ssephe	struct vmbus_msghc *mh;
183302607Ssephe	uint32_t status;
184250199Sgrehan	int ret = 0;
185250199Sgrehan	void *in, *out;
186250199Sgrehan
187302607Ssephe	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
188302607Ssephe		device_printf(sc->vmbus_dev,
189302607Ssephe		    "invalid udata len %u for chan%u\n",
190302693Ssephe		    user_data_len, new_channel->ch_id);
191302607Ssephe		return EINVAL;
192302607Ssephe	}
193302607Ssephe
194302812Ssephe	if (atomic_testandset_int(&new_channel->ch_stflags,
195302812Ssephe	    VMBUS_CHAN_ST_OPENED_SHIFT))
196302812Ssephe		panic("double-open chan%u", new_channel->ch_id);
197282212Swhu
198250199Sgrehan	new_channel->on_channel_callback = pfn_on_channel_callback;
199250199Sgrehan	new_channel->channel_callback_context = context;
200250199Sgrehan
201302692Ssephe	vmbus_chan_update_evtflagcnt(sc, new_channel);
202300102Ssephe
203302557Ssephe	new_channel->rxq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
204300646Ssephe	    new_channel->target_cpu);
205302713Ssephe	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
206302713Ssephe		TASK_INIT(&new_channel->channel_task, 0,
207302713Ssephe		    vmbus_chan_task, new_channel);
208302713Ssephe	} else {
209302713Ssephe		TASK_INIT(&new_channel->channel_task, 0,
210302713Ssephe		    vmbus_chan_task_nobatch, new_channel);
211302713Ssephe	}
212294886Ssephe
213250199Sgrehan	/* Allocate the ring buffer */
214250199Sgrehan	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
215256350Sgrehan	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
216250199Sgrehan	KASSERT(out != NULL,
217250199Sgrehan	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
218302812Ssephe	if (out == NULL) {
219302812Ssephe		ret = ENOMEM;
220302812Ssephe		goto failed;
221302812Ssephe	}
222250199Sgrehan
223250199Sgrehan	in = ((uint8_t *) out + send_ring_buffer_size);
224250199Sgrehan
225250199Sgrehan	new_channel->ring_buffer_pages = out;
226256350Sgrehan	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
227256350Sgrehan	    recv_ring_buffer_size) >> PAGE_SHIFT;
228256350Sgrehan	new_channel->ring_buffer_size = send_ring_buffer_size +
229256350Sgrehan	    recv_ring_buffer_size;
230250199Sgrehan
231250199Sgrehan	hv_vmbus_ring_buffer_init(
232250199Sgrehan		&new_channel->outbound,
233250199Sgrehan		out,
234250199Sgrehan		send_ring_buffer_size);
235250199Sgrehan
236250199Sgrehan	hv_vmbus_ring_buffer_init(
237250199Sgrehan		&new_channel->inbound,
238250199Sgrehan		in,
239250199Sgrehan		recv_ring_buffer_size);
240250199Sgrehan
241296290Ssephe	/* Create sysctl tree for this channel */
242296290Ssephe	vmbus_channel_sysctl_create(new_channel);
243296181Ssephe
244250199Sgrehan	/**
245250199Sgrehan	 * Establish the gpadl for the ring buffer
246250199Sgrehan	 */
247250199Sgrehan	new_channel->ring_buffer_gpadl_handle = 0;
248250199Sgrehan
249250199Sgrehan	ret = hv_vmbus_channel_establish_gpadl(new_channel,
250250199Sgrehan		new_channel->outbound.ring_buffer,
251250199Sgrehan		send_ring_buffer_size + recv_ring_buffer_size,
252250199Sgrehan		&new_channel->ring_buffer_gpadl_handle);
253250199Sgrehan
254302607Ssephe	/*
255302607Ssephe	 * Open channel w/ the bufring GPADL on the target CPU.
256250199Sgrehan	 */
257302607Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
258302607Ssephe	if (mh == NULL) {
259302607Ssephe		device_printf(sc->vmbus_dev,
260302607Ssephe		    "can not get msg hypercall for chopen(chan%u)\n",
261302693Ssephe		    new_channel->ch_id);
262302812Ssephe		ret = ENXIO;
263302812Ssephe		goto failed;
264302607Ssephe	}
265250199Sgrehan
266302607Ssephe	req = vmbus_msghc_dataptr(mh);
267302607Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
268302693Ssephe	req->chm_chanid = new_channel->ch_id;
269302693Ssephe	req->chm_openid = new_channel->ch_id;
270302607Ssephe	req->chm_gpadl = new_channel->ring_buffer_gpadl_handle;
271302607Ssephe	req->chm_vcpuid = new_channel->target_vcpu;
272302607Ssephe	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
273250199Sgrehan	if (user_data_len)
274302607Ssephe		memcpy(req->chm_udata, user_data, user_data_len);
275250199Sgrehan
276302607Ssephe	ret = vmbus_msghc_exec(sc, mh);
277302607Ssephe	if (ret != 0) {
278302607Ssephe		device_printf(sc->vmbus_dev,
279302607Ssephe		    "chopen(chan%u) msg hypercall exec failed: %d\n",
280302693Ssephe		    new_channel->ch_id, ret);
281302607Ssephe		vmbus_msghc_put(sc, mh);
282302812Ssephe		goto failed;
283302607Ssephe	}
284250199Sgrehan
285302607Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
286302607Ssephe	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
287302607Ssephe	status = resp->chm_status;
288250199Sgrehan
289302607Ssephe	vmbus_msghc_put(sc, mh);
290250199Sgrehan
291302607Ssephe	if (status == 0) {
292302607Ssephe		if (bootverbose) {
293302607Ssephe			device_printf(sc->vmbus_dev, "chan%u opened\n",
294302693Ssephe			    new_channel->ch_id);
295302607Ssephe		}
296302812Ssephe		return 0;
297250199Sgrehan	}
298302812Ssephe
299302812Ssephe	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
300302812Ssephe	    new_channel->ch_id);
301302812Ssephe	ret = ENXIO;
302302812Ssephe
303302812Ssephefailed:
304302812Ssephe	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
305302812Ssephe	return ret;
306250199Sgrehan}
307250199Sgrehan
308250199Sgrehan/**
309302609Ssephe * @brief Establish a GPADL for the specified buffer
310250199Sgrehan */
311302609Ssepheint
312302609Ssephehv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
313302609Ssephe    void *contig_buffer, uint32_t size, uint32_t *gpadl0)
314250199Sgrehan{
315302609Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
316302609Ssephe	struct vmbus_msghc *mh;
317302609Ssephe	struct vmbus_chanmsg_gpadl_conn *req;
318302609Ssephe	const struct vmbus_message *msg;
319302609Ssephe	size_t reqsz;
320302609Ssephe	uint32_t gpadl, status;
321302609Ssephe	int page_count, range_len, i, cnt, error;
322302609Ssephe	uint64_t page_id, paddr;
323250199Sgrehan
324302609Ssephe	/*
325302609Ssephe	 * Preliminary checks.
326302609Ssephe	 */
327250199Sgrehan
328302609Ssephe	KASSERT((size & PAGE_MASK) == 0,
329302609Ssephe	    ("invalid GPA size %u, not multiple page size", size));
330250199Sgrehan	page_count = size >> PAGE_SHIFT;
331250199Sgrehan
332302609Ssephe	paddr = hv_get_phys_addr(contig_buffer);
333302609Ssephe	KASSERT((paddr & PAGE_MASK) == 0,
334302609Ssephe	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
335302609Ssephe	page_id = paddr >> PAGE_SHIFT;
336250199Sgrehan
337302609Ssephe	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
338302609Ssephe	/*
339302609Ssephe	 * We don't support multiple GPA ranges.
340302609Ssephe	 */
341302609Ssephe	if (range_len > UINT16_MAX) {
342302609Ssephe		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
343302609Ssephe		    page_count);
344302609Ssephe		return EOPNOTSUPP;
345250199Sgrehan	}
346250199Sgrehan
347302609Ssephe	/*
348302609Ssephe	 * Allocate GPADL id.
349302609Ssephe	 */
350302630Ssephe	gpadl = vmbus_gpadl_alloc(sc);
351302609Ssephe	*gpadl0 = gpadl;
352250199Sgrehan
353302609Ssephe	/*
354302609Ssephe	 * Connect this GPADL to the target channel.
355302609Ssephe	 *
356302609Ssephe	 * NOTE:
357302609Ssephe	 * Since each message can only hold small set of page
358302609Ssephe	 * addresses, several messages may be required to
359302609Ssephe	 * complete the connection.
360302609Ssephe	 */
361302609Ssephe	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
362302609Ssephe		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
363302609Ssephe	else
364302609Ssephe		cnt = page_count;
365302609Ssephe	page_count -= cnt;
366250199Sgrehan
367302609Ssephe	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
368302609Ssephe	    chm_range.gpa_page[cnt]);
369302609Ssephe	mh = vmbus_msghc_get(sc, reqsz);
370302609Ssephe	if (mh == NULL) {
371302609Ssephe		device_printf(sc->vmbus_dev,
372302609Ssephe		    "can not get msg hypercall for gpadl->chan%u\n",
373302693Ssephe		    channel->ch_id);
374302609Ssephe		return EIO;
375250199Sgrehan	}
376250199Sgrehan
377302609Ssephe	req = vmbus_msghc_dataptr(mh);
378302609Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
379302693Ssephe	req->chm_chanid = channel->ch_id;
380302609Ssephe	req->chm_gpadl = gpadl;
381302609Ssephe	req->chm_range_len = range_len;
382302609Ssephe	req->chm_range_cnt = 1;
383302609Ssephe	req->chm_range.gpa_len = size;
384302609Ssephe	req->chm_range.gpa_ofs = 0;
385302609Ssephe	for (i = 0; i < cnt; ++i)
386302609Ssephe		req->chm_range.gpa_page[i] = page_id++;
387250199Sgrehan
388302609Ssephe	error = vmbus_msghc_exec(sc, mh);
389302609Ssephe	if (error) {
390302609Ssephe		device_printf(sc->vmbus_dev,
391302609Ssephe		    "gpadl->chan%u msg hypercall exec failed: %d\n",
392302693Ssephe		    channel->ch_id, error);
393302609Ssephe		vmbus_msghc_put(sc, mh);
394302609Ssephe		return error;
395302609Ssephe	}
396250199Sgrehan
397302609Ssephe	while (page_count > 0) {
398302609Ssephe		struct vmbus_chanmsg_gpadl_subconn *subreq;
399250199Sgrehan
400302609Ssephe		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
401302609Ssephe			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
402302609Ssephe		else
403302609Ssephe			cnt = page_count;
404302609Ssephe		page_count -= cnt;
405250199Sgrehan
406302609Ssephe		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
407302609Ssephe		    chm_gpa_page[cnt]);
408302609Ssephe		vmbus_msghc_reset(mh, reqsz);
409250199Sgrehan
410302609Ssephe		subreq = vmbus_msghc_dataptr(mh);
411302609Ssephe		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
412302609Ssephe		subreq->chm_gpadl = gpadl;
413302609Ssephe		for (i = 0; i < cnt; ++i)
414302609Ssephe			subreq->chm_gpa_page[i] = page_id++;
415250199Sgrehan
416302609Ssephe		vmbus_msghc_exec_noresult(mh);
417250199Sgrehan	}
418302609Ssephe	KASSERT(page_count == 0, ("invalid page count %d", page_count));
419250199Sgrehan
420302609Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
421302609Ssephe	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
422302609Ssephe	    msg->msg_data)->chm_status;
423250199Sgrehan
424302609Ssephe	vmbus_msghc_put(sc, mh);
425250199Sgrehan
426302609Ssephe	if (status != 0) {
427302609Ssephe		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
428302693Ssephe		    "status %u\n", channel->ch_id, status);
429302609Ssephe		return EIO;
430302632Ssephe	} else {
431302632Ssephe		if (bootverbose) {
432302632Ssephe			device_printf(sc->vmbus_dev, "gpadl->chan%u "
433302693Ssephe			    "succeeded\n", channel->ch_id);
434302632Ssephe		}
435302609Ssephe	}
436302609Ssephe	return 0;
437250199Sgrehan}
438250199Sgrehan
439302611Ssephe/*
440302611Ssephe * Disconnect the GPA from the target channel
441250199Sgrehan */
442250199Sgrehanint
443302611Ssephehv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
444250199Sgrehan{
445302611Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
446302611Ssephe	struct vmbus_msghc *mh;
447302611Ssephe	struct vmbus_chanmsg_gpadl_disconn *req;
448302611Ssephe	int error;
449250199Sgrehan
450302611Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
451302611Ssephe	if (mh == NULL) {
452302611Ssephe		device_printf(sc->vmbus_dev,
453302611Ssephe		    "can not get msg hypercall for gpa x->chan%u\n",
454302693Ssephe		    chan->ch_id);
455302611Ssephe		return EBUSY;
456250199Sgrehan	}
457250199Sgrehan
458302611Ssephe	req = vmbus_msghc_dataptr(mh);
459302611Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
460302693Ssephe	req->chm_chanid = chan->ch_id;
461302611Ssephe	req->chm_gpadl = gpadl;
462250199Sgrehan
463302611Ssephe	error = vmbus_msghc_exec(sc, mh);
464302611Ssephe	if (error) {
465302611Ssephe		device_printf(sc->vmbus_dev,
466302611Ssephe		    "gpa x->chan%u msg hypercall exec failed: %d\n",
467302693Ssephe		    chan->ch_id, error);
468302611Ssephe		vmbus_msghc_put(sc, mh);
469302611Ssephe		return error;
470302611Ssephe	}
471250199Sgrehan
472302611Ssephe	vmbus_msghc_wait_result(sc, mh);
473302611Ssephe	/* Discard result; no useful information */
474302611Ssephe	vmbus_msghc_put(sc, mh);
475250199Sgrehan
476302611Ssephe	return 0;
477250199Sgrehan}
478250199Sgrehan
479282212Swhustatic void
480282212Swhuhv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
481250199Sgrehan{
482302610Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
483302610Ssephe	struct vmbus_msghc *mh;
484302610Ssephe	struct vmbus_chanmsg_chclose *req;
485294886Ssephe	struct taskqueue *rxq = channel->rxq;
486302610Ssephe	int error;
487250199Sgrehan
488302812Ssephe	/* TODO: stringent check */
489302812Ssephe	atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
490302812Ssephe
491302633Ssephe	sysctl_ctx_free(&channel->ch_sysctl_ctx);
492282212Swhu
493282212Swhu	/*
494294886Ssephe	 * set rxq to NULL to avoid more requests be scheduled
495294886Ssephe	 */
496294886Ssephe	channel->rxq = NULL;
497294886Ssephe	taskqueue_drain(rxq, &channel->channel_task);
498250199Sgrehan	channel->on_channel_callback = NULL;
499250199Sgrehan
500250199Sgrehan	/**
501250199Sgrehan	 * Send a closing message
502250199Sgrehan	 */
503250199Sgrehan
504302610Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
505302610Ssephe	if (mh == NULL) {
506302610Ssephe		device_printf(sc->vmbus_dev,
507302610Ssephe		    "can not get msg hypercall for chclose(chan%u)\n",
508302693Ssephe		    channel->ch_id);
509302610Ssephe		return;
510302610Ssephe	}
511250199Sgrehan
512302610Ssephe	req = vmbus_msghc_dataptr(mh);
513302610Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
514302693Ssephe	req->chm_chanid = channel->ch_id;
515250199Sgrehan
516302610Ssephe	error = vmbus_msghc_exec_noresult(mh);
517302610Ssephe	vmbus_msghc_put(sc, mh);
518302610Ssephe
519302610Ssephe	if (error) {
520302610Ssephe		device_printf(sc->vmbus_dev,
521302610Ssephe		    "chclose(chan%u) msg hypercall exec failed: %d\n",
522302693Ssephe		    channel->ch_id, error);
523302610Ssephe		return;
524302610Ssephe	} else if (bootverbose) {
525302610Ssephe		device_printf(sc->vmbus_dev, "close chan%u\n",
526302693Ssephe		    channel->ch_id);
527302610Ssephe	}
528302610Ssephe
529250199Sgrehan	/* Tear down the gpadl for the channel's ring buffer */
530250199Sgrehan	if (channel->ring_buffer_gpadl_handle) {
531250199Sgrehan		hv_vmbus_channel_teardown_gpdal(channel,
532250199Sgrehan			channel->ring_buffer_gpadl_handle);
533250199Sgrehan	}
534250199Sgrehan
535250199Sgrehan	/* TODO: Send a msg to release the childRelId */
536250199Sgrehan
537250199Sgrehan	/* cleanup the ring buffers for this channel */
538250199Sgrehan	hv_ring_buffer_cleanup(&channel->outbound);
539250199Sgrehan	hv_ring_buffer_cleanup(&channel->inbound);
540250199Sgrehan
541256350Sgrehan	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
542256350Sgrehan	    M_DEVBUF);
543282212Swhu}
544250199Sgrehan
545302818Ssephe/*
546302818Ssephe * Caller should make sure that all sub-channels have
547302818Ssephe * been added to 'chan' and all to-be-closed channels
548302818Ssephe * are not being opened.
549282212Swhu */
550282212Swhuvoid
551302818Ssephehv_vmbus_channel_close(struct hv_vmbus_channel *chan)
552282212Swhu{
553302818Ssephe	int subchan_cnt;
554282212Swhu
555302818Ssephe	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
556282212Swhu		/*
557302818Ssephe		 * Sub-channel is closed when its primary channel
558302818Ssephe		 * is closed; done.
559282212Swhu		 */
560282212Swhu		return;
561282212Swhu	}
562282212Swhu
563250199Sgrehan	/*
564302818Ssephe	 * Close all sub-channels, if any.
565250199Sgrehan	 */
566302818Ssephe	subchan_cnt = chan->subchan_cnt;
567302818Ssephe	if (subchan_cnt > 0) {
568302818Ssephe		struct hv_vmbus_channel **subchan;
569302818Ssephe		int i;
570302818Ssephe
571302818Ssephe		subchan = vmbus_get_subchan(chan, subchan_cnt);
572302818Ssephe		for (i = 0; i < subchan_cnt; ++i)
573302818Ssephe			hv_vmbus_channel_close_internal(subchan[i]);
574302818Ssephe		vmbus_rel_subchan(subchan, subchan_cnt);
575250199Sgrehan	}
576302818Ssephe
577302818Ssephe	/* Then close the primary channel. */
578302818Ssephe	hv_vmbus_channel_close_internal(chan);
579250199Sgrehan}
580250199Sgrehan
581250199Sgrehan/**
582250199Sgrehan * @brief Send the specified buffer on the given channel
583250199Sgrehan */
584250199Sgrehanint
585250199Sgrehanhv_vmbus_channel_send_packet(
586250199Sgrehan	hv_vmbus_channel*	channel,
587250199Sgrehan	void*			buffer,
588250199Sgrehan	uint32_t		buffer_len,
589250199Sgrehan	uint64_t		request_id,
590250199Sgrehan	hv_vmbus_packet_type	type,
591250199Sgrehan	uint32_t		flags)
592250199Sgrehan{
593250199Sgrehan	int			ret = 0;
594250199Sgrehan	hv_vm_packet_descriptor	desc;
595250199Sgrehan	uint32_t		packet_len;
596250199Sgrehan	uint64_t		aligned_data;
597250199Sgrehan	uint32_t		packet_len_aligned;
598282212Swhu	boolean_t		need_sig;
599250199Sgrehan	hv_vmbus_sg_buffer_list	buffer_list[3];
600250199Sgrehan
601250199Sgrehan	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
602250199Sgrehan	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
603250199Sgrehan	aligned_data = 0;
604250199Sgrehan
605250199Sgrehan	/* Setup the descriptor */
606250199Sgrehan	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
607250199Sgrehan	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
608250199Sgrehan			    /* in 8-bytes granularity */
609250199Sgrehan	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
610250199Sgrehan	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
611250199Sgrehan	desc.transaction_id = request_id;
612250199Sgrehan
613250199Sgrehan	buffer_list[0].data = &desc;
614250199Sgrehan	buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
615250199Sgrehan
616250199Sgrehan	buffer_list[1].data = buffer;
617250199Sgrehan	buffer_list[1].length = buffer_len;
618250199Sgrehan
619250199Sgrehan	buffer_list[2].data = &aligned_data;
620250199Sgrehan	buffer_list[2].length = packet_len_aligned - packet_len;
621250199Sgrehan
622282212Swhu	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
623282212Swhu	    &need_sig);
624250199Sgrehan
625250199Sgrehan	/* TODO: We should determine if this is optional */
626302731Ssephe	if (ret == 0 && need_sig)
627302731Ssephe		vmbus_chan_send_event(channel);
628250199Sgrehan
629250199Sgrehan	return (ret);
630250199Sgrehan}
631250199Sgrehan
632250199Sgrehan/**
633250199Sgrehan * @brief Send a range of single-page buffer packets using
634250199Sgrehan * a GPADL Direct packet type
635250199Sgrehan */
636250199Sgrehanint
637250199Sgrehanhv_vmbus_channel_send_packet_pagebuffer(
638250199Sgrehan	hv_vmbus_channel*	channel,
639250199Sgrehan	hv_vmbus_page_buffer	page_buffers[],
640250199Sgrehan	uint32_t		page_count,
641250199Sgrehan	void*			buffer,
642250199Sgrehan	uint32_t		buffer_len,
643250199Sgrehan	uint64_t		request_id)
644250199Sgrehan{
645250199Sgrehan
646250199Sgrehan	int					ret = 0;
647282212Swhu	boolean_t				need_sig;
648250199Sgrehan	uint32_t				packet_len;
649294705Ssephe	uint32_t				page_buflen;
650250199Sgrehan	uint32_t				packetLen_aligned;
651294705Ssephe	hv_vmbus_sg_buffer_list			buffer_list[4];
652250199Sgrehan	hv_vmbus_channel_packet_page_buffer	desc;
653250199Sgrehan	uint32_t				descSize;
654250199Sgrehan	uint64_t				alignedData = 0;
655250199Sgrehan
656250199Sgrehan	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
657250199Sgrehan		return (EINVAL);
658250199Sgrehan
659250199Sgrehan	/*
660250199Sgrehan	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
661250199Sgrehan	 *  is the largest size we support
662250199Sgrehan	 */
663294705Ssephe	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
664294705Ssephe	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
665294705Ssephe	packet_len = descSize + page_buflen + buffer_len;
666250199Sgrehan	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
667250199Sgrehan
668250199Sgrehan	/* Setup the descriptor */
669250199Sgrehan	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
670250199Sgrehan	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
671294705Ssephe	/* in 8-bytes granularity */
672294705Ssephe	desc.data_offset8 = (descSize + page_buflen) >> 3;
673250199Sgrehan	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
674250199Sgrehan	desc.transaction_id = request_id;
675250199Sgrehan	desc.range_count = page_count;
676250199Sgrehan
677250199Sgrehan	buffer_list[0].data = &desc;
678250199Sgrehan	buffer_list[0].length = descSize;
679250199Sgrehan
680294705Ssephe	buffer_list[1].data = page_buffers;
681294705Ssephe	buffer_list[1].length = page_buflen;
682250199Sgrehan
683294705Ssephe	buffer_list[2].data = buffer;
684294705Ssephe	buffer_list[2].length = buffer_len;
685250199Sgrehan
686294705Ssephe	buffer_list[3].data = &alignedData;
687294705Ssephe	buffer_list[3].length = packetLen_aligned - packet_len;
688294705Ssephe
689294705Ssephe	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
690282212Swhu	    &need_sig);
691250199Sgrehan
692250199Sgrehan	/* TODO: We should determine if this is optional */
693302731Ssephe	if (ret == 0 && need_sig)
694302731Ssephe		vmbus_chan_send_event(channel);
695250199Sgrehan
696250199Sgrehan	return (ret);
697250199Sgrehan}
698250199Sgrehan
699250199Sgrehan/**
700250199Sgrehan * @brief Send a multi-page buffer packet using a GPADL Direct packet type
701250199Sgrehan */
702250199Sgrehanint
703250199Sgrehanhv_vmbus_channel_send_packet_multipagebuffer(
704250199Sgrehan	hv_vmbus_channel*		channel,
705250199Sgrehan	hv_vmbus_multipage_buffer*	multi_page_buffer,
706250199Sgrehan	void*				buffer,
707250199Sgrehan	uint32_t			buffer_len,
708250199Sgrehan	uint64_t			request_id)
709250199Sgrehan{
710250199Sgrehan
711250199Sgrehan	int			ret = 0;
712250199Sgrehan	uint32_t		desc_size;
713282212Swhu	boolean_t		need_sig;
714250199Sgrehan	uint32_t		packet_len;
715250199Sgrehan	uint32_t		packet_len_aligned;
716250199Sgrehan	uint32_t		pfn_count;
717250199Sgrehan	uint64_t		aligned_data = 0;
718250199Sgrehan	hv_vmbus_sg_buffer_list	buffer_list[3];
719250199Sgrehan	hv_vmbus_channel_packet_multipage_buffer desc;
720250199Sgrehan
721250199Sgrehan	pfn_count =
722250199Sgrehan	    HV_NUM_PAGES_SPANNED(
723250199Sgrehan		    multi_page_buffer->offset,
724250199Sgrehan		    multi_page_buffer->length);
725250199Sgrehan
726250199Sgrehan	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
727250199Sgrehan	    return (EINVAL);
728250199Sgrehan	/*
729250199Sgrehan	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
730250199Sgrehan	 * is the largest size we support
731250199Sgrehan	 */
732250199Sgrehan	desc_size =
733250199Sgrehan	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
734250199Sgrehan		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
735250199Sgrehan			sizeof(uint64_t));
736250199Sgrehan	packet_len = desc_size + buffer_len;
737250199Sgrehan	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
738250199Sgrehan
739250199Sgrehan	/*
740250199Sgrehan	 * Setup the descriptor
741250199Sgrehan	 */
742250199Sgrehan	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
743250199Sgrehan	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
744250199Sgrehan	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
745250199Sgrehan	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
746250199Sgrehan	desc.transaction_id = request_id;
747250199Sgrehan	desc.range_count = 1;
748250199Sgrehan
749250199Sgrehan	desc.range.length = multi_page_buffer->length;
750250199Sgrehan	desc.range.offset = multi_page_buffer->offset;
751250199Sgrehan
752250199Sgrehan	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
753250199Sgrehan		pfn_count * sizeof(uint64_t));
754250199Sgrehan
755250199Sgrehan	buffer_list[0].data = &desc;
756250199Sgrehan	buffer_list[0].length = desc_size;
757250199Sgrehan
758250199Sgrehan	buffer_list[1].data = buffer;
759250199Sgrehan	buffer_list[1].length = buffer_len;
760250199Sgrehan
761250199Sgrehan	buffer_list[2].data = &aligned_data;
762250199Sgrehan	buffer_list[2].length = packet_len_aligned - packet_len;
763250199Sgrehan
764282212Swhu	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
765282212Swhu	    &need_sig);
766250199Sgrehan
767250199Sgrehan	/* TODO: We should determine if this is optional */
768302731Ssephe	if (ret == 0 && need_sig)
769302731Ssephe		vmbus_chan_send_event(channel);
770250199Sgrehan
771250199Sgrehan	return (ret);
772250199Sgrehan}
773250199Sgrehan
774250199Sgrehan/**
775250199Sgrehan * @brief Retrieve the user packet on the specified channel
776250199Sgrehan */
777250199Sgrehanint
778250199Sgrehanhv_vmbus_channel_recv_packet(
779250199Sgrehan	hv_vmbus_channel*	channel,
780250199Sgrehan	void*			Buffer,
781250199Sgrehan	uint32_t		buffer_len,
782250199Sgrehan	uint32_t*		buffer_actual_len,
783250199Sgrehan	uint64_t*		request_id)
784250199Sgrehan{
785250199Sgrehan	int			ret;
786250199Sgrehan	uint32_t		user_len;
787250199Sgrehan	uint32_t		packet_len;
788250199Sgrehan	hv_vm_packet_descriptor	desc;
789250199Sgrehan
790250199Sgrehan	*buffer_actual_len = 0;
791250199Sgrehan	*request_id = 0;
792250199Sgrehan
793250199Sgrehan	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
794250199Sgrehan		sizeof(hv_vm_packet_descriptor));
795250199Sgrehan	if (ret != 0)
796250199Sgrehan		return (0);
797250199Sgrehan
798250199Sgrehan	packet_len = desc.length8 << 3;
799250199Sgrehan	user_len = packet_len - (desc.data_offset8 << 3);
800250199Sgrehan
801250199Sgrehan	*buffer_actual_len = user_len;
802250199Sgrehan
803250199Sgrehan	if (user_len > buffer_len)
804250199Sgrehan		return (EINVAL);
805250199Sgrehan
806250199Sgrehan	*request_id = desc.transaction_id;
807250199Sgrehan
808250199Sgrehan	/* Copy over the packet to the user buffer */
809250199Sgrehan	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
810250199Sgrehan		(desc.data_offset8 << 3));
811250199Sgrehan
812250199Sgrehan	return (0);
813250199Sgrehan}
814250199Sgrehan
815250199Sgrehan/**
816250199Sgrehan * @brief Retrieve the raw packet on the specified channel
817250199Sgrehan */
818250199Sgrehanint
819250199Sgrehanhv_vmbus_channel_recv_packet_raw(
820250199Sgrehan	hv_vmbus_channel*	channel,
821250199Sgrehan	void*			buffer,
822250199Sgrehan	uint32_t		buffer_len,
823250199Sgrehan	uint32_t*		buffer_actual_len,
824250199Sgrehan	uint64_t*		request_id)
825250199Sgrehan{
826250199Sgrehan	int		ret;
827250199Sgrehan	uint32_t	packetLen;
828250199Sgrehan	hv_vm_packet_descriptor	desc;
829250199Sgrehan
830250199Sgrehan	*buffer_actual_len = 0;
831250199Sgrehan	*request_id = 0;
832250199Sgrehan
833250199Sgrehan	ret = hv_ring_buffer_peek(
834250199Sgrehan		&channel->inbound, &desc,
835250199Sgrehan		sizeof(hv_vm_packet_descriptor));
836250199Sgrehan
837250199Sgrehan	if (ret != 0)
838250199Sgrehan	    return (0);
839250199Sgrehan
840250199Sgrehan	packetLen = desc.length8 << 3;
841250199Sgrehan	*buffer_actual_len = packetLen;
842250199Sgrehan
843250199Sgrehan	if (packetLen > buffer_len)
844250199Sgrehan	    return (ENOBUFS);
845250199Sgrehan
846250199Sgrehan	*request_id = desc.transaction_id;
847250199Sgrehan
848250199Sgrehan	/* Copy over the entire packet to the user buffer */
849250199Sgrehan	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
850250199Sgrehan
851250199Sgrehan	return (0);
852250199Sgrehan}
853294886Ssephe
854294886Ssephestatic void
855302713Ssephevmbus_chan_task(void *xchan, int pending __unused)
856294886Ssephe{
857302713Ssephe	struct hv_vmbus_channel *chan = xchan;
858302713Ssephe	void (*callback)(void *);
859302713Ssephe	void *arg;
860294886Ssephe
861302713Ssephe	arg = chan->channel_callback_context;
862302713Ssephe	callback = chan->on_channel_callback;
863302709Ssephe
864302710Ssephe	/*
865302710Ssephe	 * Optimize host to guest signaling by ensuring:
866302710Ssephe	 * 1. While reading the channel, we disable interrupts from
867302710Ssephe	 *    host.
868302710Ssephe	 * 2. Ensure that we process all posted messages from the host
869302710Ssephe	 *    before returning from this callback.
870302710Ssephe	 * 3. Once we return, enable signaling from the host. Once this
871302710Ssephe	 *    state is set we check to see if additional packets are
872302710Ssephe	 *    available to read. In this case we repeat the process.
873302713Ssephe	 *
874302713Ssephe	 * NOTE: Interrupt has been disabled in the ISR.
875302710Ssephe	 */
876302713Ssephe	for (;;) {
877302713Ssephe		uint32_t left;
878294886Ssephe
879302713Ssephe		callback(arg);
880294886Ssephe
881302713Ssephe		left = hv_ring_buffer_read_end(&chan->inbound);
882302713Ssephe		if (left == 0) {
883302713Ssephe			/* No more data in RX bufring; done */
884302713Ssephe			break;
885302713Ssephe		}
886302713Ssephe		hv_ring_buffer_read_begin(&chan->inbound);
887302713Ssephe	}
888294886Ssephe}
889302692Ssephe
890302713Ssephestatic void
891302713Ssephevmbus_chan_task_nobatch(void *xchan, int pending __unused)
892302713Ssephe{
893302713Ssephe	struct hv_vmbus_channel *chan = xchan;
894302713Ssephe
895302713Ssephe	chan->on_channel_callback(chan->channel_callback_context);
896302713Ssephe}
897302713Ssephe
898302692Ssephestatic __inline void
899302692Ssephevmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
900302692Ssephe    int flag_cnt)
901302692Ssephe{
902302692Ssephe	int f;
903302692Ssephe
904302692Ssephe	for (f = 0; f < flag_cnt; ++f) {
905302806Ssephe		uint32_t chid_base;
906302692Ssephe		u_long flags;
907302806Ssephe		int chid_ofs;
908302692Ssephe
909302692Ssephe		if (event_flags[f] == 0)
910302692Ssephe			continue;
911302692Ssephe
912302692Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
913302806Ssephe		chid_base = f << VMBUS_EVTFLAG_SHIFT;
914302692Ssephe
915302806Ssephe		while ((chid_ofs = ffsl(flags)) != 0) {
916302692Ssephe			struct hv_vmbus_channel *channel;
917302692Ssephe
918302806Ssephe			--chid_ofs; /* NOTE: ffsl is 1-based */
919302806Ssephe			flags &= ~(1UL << chid_ofs);
920302692Ssephe
921302806Ssephe			channel = sc->vmbus_chmap[chid_base + chid_ofs];
922302692Ssephe
923302692Ssephe			/* if channel is closed or closing */
924302692Ssephe			if (channel == NULL || channel->rxq == NULL)
925302692Ssephe				continue;
926302692Ssephe
927302709Ssephe			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
928302692Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
929302692Ssephe			taskqueue_enqueue(channel->rxq, &channel->channel_task);
930302692Ssephe		}
931302692Ssephe	}
932302692Ssephe}
933302692Ssephe
934302692Ssephevoid
935302692Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
936302692Ssephe{
937302692Ssephe	struct vmbus_evtflags *eventf;
938302692Ssephe
939302692Ssephe	/*
940302692Ssephe	 * On Host with Win8 or above, the event page can be checked directly
941302692Ssephe	 * to get the id of the channel that has the pending interrupt.
942302692Ssephe	 */
943302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
944302692Ssephe	vmbus_event_flags_proc(sc, eventf->evt_flags,
945302692Ssephe	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
946302692Ssephe}
947302692Ssephe
948302692Ssephevoid
949302692Ssephevmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
950302692Ssephe{
951302692Ssephe	struct vmbus_evtflags *eventf;
952302692Ssephe
953302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
954302692Ssephe	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
955302692Ssephe		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
956302692Ssephe		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
957302692Ssephe	}
958302692Ssephe}
959302692Ssephe
960302692Ssephestatic void
961302692Ssephevmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
962302692Ssephe    const struct hv_vmbus_channel *chan)
963302692Ssephe{
964302692Ssephe	volatile int *flag_cnt_ptr;
965302692Ssephe	int flag_cnt;
966302692Ssephe
967302693Ssephe	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
968302692Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->target_cpu);
969302692Ssephe
970302692Ssephe	for (;;) {
971302692Ssephe		int old_flag_cnt;
972302692Ssephe
973302692Ssephe		old_flag_cnt = *flag_cnt_ptr;
974302692Ssephe		if (old_flag_cnt >= flag_cnt)
975302692Ssephe			break;
976302692Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
977302692Ssephe			if (bootverbose) {
978302692Ssephe				device_printf(sc->vmbus_dev,
979302692Ssephe				    "channel%u update cpu%d flag_cnt to %d\n",
980302693Ssephe				    chan->ch_id,
981302692Ssephe				    chan->target_cpu, flag_cnt);
982302692Ssephe			}
983302692Ssephe			break;
984302692Ssephe		}
985302692Ssephe	}
986302692Ssephe}
987