vmbus_chan.c revision 303020
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29256276Sdim#include <sys/cdefs.h>
30256276Sdim__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 303020 2016-07-19 05:46:15Z sephe $");
31256276Sdim
32250199Sgrehan#include <sys/param.h>
33296028Ssephe#include <sys/kernel.h>
34250199Sgrehan#include <sys/malloc.h>
35250199Sgrehan#include <sys/systm.h>
36250199Sgrehan#include <sys/mbuf.h>
37250199Sgrehan#include <sys/lock.h>
38250199Sgrehan#include <sys/mutex.h>
39296181Ssephe#include <sys/sysctl.h>
40301588Ssephe
41301588Ssephe#include <machine/atomic.h>
42250199Sgrehan#include <machine/bus.h>
43301588Ssephe
44250199Sgrehan#include <vm/vm.h>
45250199Sgrehan#include <vm/vm_param.h>
46250199Sgrehan#include <vm/pmap.h>
47250199Sgrehan
48302872Ssephe#include <dev/hyperv/include/hyperv_busdma.h>
49300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50302619Ssephe#include <dev/hyperv/vmbus/hyperv_var.h>
51301588Ssephe#include <dev/hyperv/vmbus/vmbus_reg.h>
52300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
53250199Sgrehan
54302731Ssephestatic void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
55302692Ssephestatic void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56302692Ssephe		    const struct hv_vmbus_channel *);
57302864Ssephe
58302713Ssephestatic void	vmbus_chan_task(void *, int);
59302713Ssephestatic void	vmbus_chan_task_nobatch(void *, int);
60302864Ssephestatic void	vmbus_chan_detach_task(void *, int);
61250199Sgrehan
62302864Ssephestatic void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63302864Ssephe		    const struct vmbus_message *);
64302864Ssephestatic void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65302864Ssephe		    const struct vmbus_message *);
66302864Ssephe
67302864Ssephe/*
68302864Ssephe * Vmbus channel message processing.
69302864Ssephe */
70302864Ssephestatic const vmbus_chanmsg_proc_t
71302864Ssephevmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72302864Ssephe	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73302864Ssephe	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74302864Ssephe
75302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78302864Ssephe};
79302864Ssephe
80250199Sgrehan/**
81250199Sgrehan *  @brief Trigger an event notification on the specified channel
82250199Sgrehan */
83250199Sgrehanstatic void
84302731Ssephevmbus_chan_send_event(hv_vmbus_channel *channel)
85250199Sgrehan{
86302618Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
87302693Ssephe	uint32_t chanid = channel->ch_id;
88302618Ssephe
89302618Ssephe	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90302618Ssephe	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91302618Ssephe
92302695Ssephe	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93302731Ssephe		atomic_set_int(
94302731Ssephe		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
95302731Ssephe		channel->ch_montrig_mask);
96250199Sgrehan	} else {
97302726Ssephe		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
98250199Sgrehan	}
99250199Sgrehan}
100250199Sgrehan
101296289Ssephestatic int
102302892Ssephevmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
103296289Ssephe{
104296289Ssephe	struct hv_vmbus_channel *chan = arg1;
105302892Ssephe	int mnf = 0;
106296289Ssephe
107302695Ssephe	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108302892Ssephe		mnf = 1;
109302892Ssephe	return sysctl_handle_int(oidp, &mnf, 0, req);
110296289Ssephe}
111296289Ssephe
112296181Ssephestatic void
113302892Ssephevmbus_chan_sysctl_create(struct hv_vmbus_channel *chan)
114296181Ssephe{
115302892Ssephe	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
116296181Ssephe	struct sysctl_ctx_list *ctx;
117296181Ssephe	uint32_t ch_id;
118296181Ssephe	char name[16];
119296181Ssephe
120302892Ssephe	/*
121302892Ssephe	 * Add sysctl nodes related to this channel to this
122302892Ssephe	 * channel's sysctl ctx, so that they can be destroyed
123302892Ssephe	 * independently upon close of this channel, which can
124302892Ssephe	 * happen even if the device is not detached.
125302892Ssephe	 */
126302892Ssephe	ctx = &chan->ch_sysctl_ctx;
127302633Ssephe	sysctl_ctx_init(ctx);
128302892Ssephe
129302892Ssephe	/*
130302892Ssephe	 * Create dev.NAME.UNIT.channel tree.
131302892Ssephe	 */
132302892Ssephe	ch_tree = SYSCTL_ADD_NODE(ctx,
133302892Ssephe	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
134302892Ssephe	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
135302892Ssephe	if (ch_tree == NULL)
136302892Ssephe		return;
137302892Ssephe
138302892Ssephe	/*
139302892Ssephe	 * Create dev.NAME.UNIT.channel.CHANID tree.
140302892Ssephe	 */
141302892Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan))
142302892Ssephe		ch_id = chan->ch_id;
143302892Ssephe	else
144302892Ssephe		ch_id = chan->ch_prichan->ch_id;
145296181Ssephe	snprintf(name, sizeof(name), "%d", ch_id);
146302892Ssephe	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
147302892Ssephe	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
148302892Ssephe	if (chid_tree == NULL)
149302892Ssephe		return;
150296181Ssephe
151302892Ssephe	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
152302892Ssephe		/*
153302892Ssephe		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
154302892Ssephe		 */
155302892Ssephe		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
156302892Ssephe		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
157302892Ssephe		if (ch_tree == NULL)
158302892Ssephe			return;
159296188Ssephe
160302892Ssephe		/*
161302892Ssephe		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
162302892Ssephe		 *
163302892Ssephe		 * NOTE:
164302892Ssephe		 * chid_tree is changed to this new sysctl tree.
165302892Ssephe		 */
166302892Ssephe		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
167302892Ssephe		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
168302892Ssephe		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
169302892Ssephe		if (chid_tree == NULL)
170302892Ssephe			return;
171302892Ssephe
172302892Ssephe		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
173302892Ssephe		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
174296181Ssephe	}
175296188Ssephe
176302892Ssephe	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
177302892Ssephe	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
178302892Ssephe	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
179302892Ssephe	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
180302892Ssephe	    chan, 0, vmbus_chan_sysctl_mnf, "I",
181302892Ssephe	    "has monitor notification facilities");
182302892Ssephe
183302892Ssephe	/*
184302892Ssephe	 * Create sysctl tree for RX bufring.
185302892Ssephe	 */
186302892Ssephe	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
187302892Ssephe	    "in", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
188302892Ssephe	if (br_tree != NULL) {
189302892Ssephe		hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(br_tree),
190302892Ssephe		    &chan->inbound, "inbound ring buffer stats");
191302892Ssephe	}
192302892Ssephe
193302892Ssephe	/*
194302892Ssephe	 * Create sysctl tree for TX bufring.
195302892Ssephe	 */
196302892Ssephe	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
197302892Ssephe	    "out", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
198302892Ssephe	if (br_tree != NULL) {
199302892Ssephe		hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(br_tree),
200302892Ssephe		    &chan->outbound, "outbound ring buffer stats");
201302892Ssephe	}
202296181Ssephe}
203296290Ssephe
204250199Sgrehanint
205302986Ssephehv_vmbus_channel_open(struct hv_vmbus_channel *chan,
206302986Ssephe    int txbr_size, int rxbr_size, const void *udata, int udlen,
207302986Ssephe    vmbus_chan_callback_t cb, void *cbarg)
208250199Sgrehan{
209302986Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
210302607Ssephe	const struct vmbus_chanmsg_chopen_resp *resp;
211302607Ssephe	const struct vmbus_message *msg;
212302607Ssephe	struct vmbus_chanmsg_chopen *req;
213302607Ssephe	struct vmbus_msghc *mh;
214302607Ssephe	uint32_t status;
215302986Ssephe	int error;
216302872Ssephe	uint8_t *br;
217250199Sgrehan
218302986Ssephe	if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
219302607Ssephe		device_printf(sc->vmbus_dev,
220302986Ssephe		    "invalid udata len %d for chan%u\n", udlen, chan->ch_id);
221302607Ssephe		return EINVAL;
222302607Ssephe	}
223302986Ssephe	KASSERT((txbr_size & PAGE_MASK) == 0,
224302872Ssephe	    ("send bufring size is not multiple page"));
225302986Ssephe	KASSERT((rxbr_size & PAGE_MASK) == 0,
226302872Ssephe	    ("recv bufring size is not multiple page"));
227302607Ssephe
228302986Ssephe	if (atomic_testandset_int(&chan->ch_stflags,
229302812Ssephe	    VMBUS_CHAN_ST_OPENED_SHIFT))
230302986Ssephe		panic("double-open chan%u", chan->ch_id);
231282212Swhu
232302986Ssephe	chan->ch_cb = cb;
233302986Ssephe	chan->ch_cbarg = cbarg;
234250199Sgrehan
235302986Ssephe	vmbus_chan_update_evtflagcnt(sc, chan);
236300102Ssephe
237302986Ssephe	chan->ch_tq = VMBUS_PCPU_GET(chan->vmbus_sc, event_tq, chan->ch_cpuid);
238302986Ssephe	if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
239302986Ssephe		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task, chan);
240302986Ssephe	else
241302986Ssephe		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task_nobatch, chan);
242294886Ssephe
243302872Ssephe	/*
244302872Ssephe	 * Allocate the TX+RX bufrings.
245302872Ssephe	 * XXX should use ch_dev dtag
246302872Ssephe	 */
247302872Ssephe	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
248302986Ssephe	    PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma,
249302986Ssephe	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
250302872Ssephe	if (br == NULL) {
251302872Ssephe		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
252302986Ssephe		error = ENOMEM;
253302812Ssephe		goto failed;
254302812Ssephe	}
255302986Ssephe	chan->ch_bufring = br;
256250199Sgrehan
257302872Ssephe	/* TX bufring comes first */
258302986Ssephe	hv_vmbus_ring_buffer_init(&chan->outbound, br, txbr_size);
259302872Ssephe	/* RX bufring immediately follows TX bufring */
260302986Ssephe	hv_vmbus_ring_buffer_init(&chan->inbound, br + txbr_size, rxbr_size);
261250199Sgrehan
262296290Ssephe	/* Create sysctl tree for this channel */
263302986Ssephe	vmbus_chan_sysctl_create(chan);
264296181Ssephe
265302872Ssephe	/*
266302872Ssephe	 * Connect the bufrings, both RX and TX, to this channel.
267250199Sgrehan	 */
268302986Ssephe	error = vmbus_chan_gpadl_connect(chan, chan->ch_bufring_dma.hv_paddr,
269302986Ssephe	    txbr_size + rxbr_size, &chan->ch_bufring_gpadl);
270302986Ssephe	if (error) {
271302872Ssephe		device_printf(sc->vmbus_dev,
272302986Ssephe		    "failed to connect bufring GPADL to chan%u\n", chan->ch_id);
273302872Ssephe		goto failed;
274302872Ssephe	}
275250199Sgrehan
276302607Ssephe	/*
277302607Ssephe	 * Open channel w/ the bufring GPADL on the target CPU.
278250199Sgrehan	 */
279302607Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
280302607Ssephe	if (mh == NULL) {
281302607Ssephe		device_printf(sc->vmbus_dev,
282302607Ssephe		    "can not get msg hypercall for chopen(chan%u)\n",
283302986Ssephe		    chan->ch_id);
284302986Ssephe		error = ENXIO;
285302812Ssephe		goto failed;
286302607Ssephe	}
287250199Sgrehan
288302607Ssephe	req = vmbus_msghc_dataptr(mh);
289302607Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
290302986Ssephe	req->chm_chanid = chan->ch_id;
291302986Ssephe	req->chm_openid = chan->ch_id;
292302986Ssephe	req->chm_gpadl = chan->ch_bufring_gpadl;
293302986Ssephe	req->chm_vcpuid = chan->ch_vcpuid;
294302986Ssephe	req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT;
295302986Ssephe	if (udlen > 0)
296302986Ssephe		memcpy(req->chm_udata, udata, udlen);
297250199Sgrehan
298302986Ssephe	error = vmbus_msghc_exec(sc, mh);
299302986Ssephe	if (error) {
300302607Ssephe		device_printf(sc->vmbus_dev,
301302607Ssephe		    "chopen(chan%u) msg hypercall exec failed: %d\n",
302302986Ssephe		    chan->ch_id, error);
303302607Ssephe		vmbus_msghc_put(sc, mh);
304302812Ssephe		goto failed;
305302607Ssephe	}
306250199Sgrehan
307302607Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
308302607Ssephe	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
309302607Ssephe	status = resp->chm_status;
310250199Sgrehan
311302607Ssephe	vmbus_msghc_put(sc, mh);
312250199Sgrehan
313302607Ssephe	if (status == 0) {
314302607Ssephe		if (bootverbose) {
315302607Ssephe			device_printf(sc->vmbus_dev, "chan%u opened\n",
316302986Ssephe			    chan->ch_id);
317302607Ssephe		}
318302812Ssephe		return 0;
319250199Sgrehan	}
320302812Ssephe
321302986Ssephe	device_printf(sc->vmbus_dev, "failed to open chan%u\n", chan->ch_id);
322302986Ssephe	error = ENXIO;
323302812Ssephe
324302812Ssephefailed:
325302986Ssephe	if (chan->ch_bufring_gpadl) {
326302986Ssephe		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
327302986Ssephe		chan->ch_bufring_gpadl = 0;
328302872Ssephe	}
329302986Ssephe	if (chan->ch_bufring != NULL) {
330302986Ssephe		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
331302986Ssephe		chan->ch_bufring = NULL;
332302872Ssephe	}
333302986Ssephe	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
334302986Ssephe	return error;
335250199Sgrehan}
336250199Sgrehan
337302609Ssepheint
338302871Ssephevmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
339302871Ssephe    int size, uint32_t *gpadl0)
340302871Ssephe{
341302871Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
342302609Ssephe	struct vmbus_msghc *mh;
343302609Ssephe	struct vmbus_chanmsg_gpadl_conn *req;
344302609Ssephe	const struct vmbus_message *msg;
345302609Ssephe	size_t reqsz;
346302609Ssephe	uint32_t gpadl, status;
347302609Ssephe	int page_count, range_len, i, cnt, error;
348302871Ssephe	uint64_t page_id;
349250199Sgrehan
350302609Ssephe	/*
351302609Ssephe	 * Preliminary checks.
352302609Ssephe	 */
353250199Sgrehan
354302609Ssephe	KASSERT((size & PAGE_MASK) == 0,
355302871Ssephe	    ("invalid GPA size %d, not multiple page size", size));
356250199Sgrehan	page_count = size >> PAGE_SHIFT;
357250199Sgrehan
358302609Ssephe	KASSERT((paddr & PAGE_MASK) == 0,
359302609Ssephe	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
360302609Ssephe	page_id = paddr >> PAGE_SHIFT;
361250199Sgrehan
362302609Ssephe	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
363302609Ssephe	/*
364302609Ssephe	 * We don't support multiple GPA ranges.
365302609Ssephe	 */
366302609Ssephe	if (range_len > UINT16_MAX) {
367302609Ssephe		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
368302609Ssephe		    page_count);
369302609Ssephe		return EOPNOTSUPP;
370250199Sgrehan	}
371250199Sgrehan
372302609Ssephe	/*
373302609Ssephe	 * Allocate GPADL id.
374302609Ssephe	 */
375302630Ssephe	gpadl = vmbus_gpadl_alloc(sc);
376302609Ssephe	*gpadl0 = gpadl;
377250199Sgrehan
378302609Ssephe	/*
379302609Ssephe	 * Connect this GPADL to the target channel.
380302609Ssephe	 *
381302609Ssephe	 * NOTE:
382302609Ssephe	 * Since each message can only hold small set of page
383302609Ssephe	 * addresses, several messages may be required to
384302609Ssephe	 * complete the connection.
385302609Ssephe	 */
386302609Ssephe	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
387302609Ssephe		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
388302609Ssephe	else
389302609Ssephe		cnt = page_count;
390302609Ssephe	page_count -= cnt;
391250199Sgrehan
392302609Ssephe	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
393302609Ssephe	    chm_range.gpa_page[cnt]);
394302609Ssephe	mh = vmbus_msghc_get(sc, reqsz);
395302609Ssephe	if (mh == NULL) {
396302609Ssephe		device_printf(sc->vmbus_dev,
397302609Ssephe		    "can not get msg hypercall for gpadl->chan%u\n",
398302871Ssephe		    chan->ch_id);
399302609Ssephe		return EIO;
400250199Sgrehan	}
401250199Sgrehan
402302609Ssephe	req = vmbus_msghc_dataptr(mh);
403302609Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
404302871Ssephe	req->chm_chanid = chan->ch_id;
405302609Ssephe	req->chm_gpadl = gpadl;
406302609Ssephe	req->chm_range_len = range_len;
407302609Ssephe	req->chm_range_cnt = 1;
408302609Ssephe	req->chm_range.gpa_len = size;
409302609Ssephe	req->chm_range.gpa_ofs = 0;
410302609Ssephe	for (i = 0; i < cnt; ++i)
411302609Ssephe		req->chm_range.gpa_page[i] = page_id++;
412250199Sgrehan
413302609Ssephe	error = vmbus_msghc_exec(sc, mh);
414302609Ssephe	if (error) {
415302609Ssephe		device_printf(sc->vmbus_dev,
416302609Ssephe		    "gpadl->chan%u msg hypercall exec failed: %d\n",
417302871Ssephe		    chan->ch_id, error);
418302609Ssephe		vmbus_msghc_put(sc, mh);
419302609Ssephe		return error;
420302609Ssephe	}
421250199Sgrehan
422302609Ssephe	while (page_count > 0) {
423302609Ssephe		struct vmbus_chanmsg_gpadl_subconn *subreq;
424250199Sgrehan
425302609Ssephe		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
426302609Ssephe			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
427302609Ssephe		else
428302609Ssephe			cnt = page_count;
429302609Ssephe		page_count -= cnt;
430250199Sgrehan
431302609Ssephe		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
432302609Ssephe		    chm_gpa_page[cnt]);
433302609Ssephe		vmbus_msghc_reset(mh, reqsz);
434250199Sgrehan
435302609Ssephe		subreq = vmbus_msghc_dataptr(mh);
436302609Ssephe		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
437302609Ssephe		subreq->chm_gpadl = gpadl;
438302609Ssephe		for (i = 0; i < cnt; ++i)
439302609Ssephe			subreq->chm_gpa_page[i] = page_id++;
440250199Sgrehan
441302609Ssephe		vmbus_msghc_exec_noresult(mh);
442250199Sgrehan	}
443302609Ssephe	KASSERT(page_count == 0, ("invalid page count %d", page_count));
444250199Sgrehan
445302609Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
446302609Ssephe	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
447302609Ssephe	    msg->msg_data)->chm_status;
448250199Sgrehan
449302609Ssephe	vmbus_msghc_put(sc, mh);
450250199Sgrehan
451302609Ssephe	if (status != 0) {
452302609Ssephe		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
453302871Ssephe		    "status %u\n", chan->ch_id, status);
454302609Ssephe		return EIO;
455302632Ssephe	} else {
456302632Ssephe		if (bootverbose) {
457302632Ssephe			device_printf(sc->vmbus_dev, "gpadl->chan%u "
458302871Ssephe			    "succeeded\n", chan->ch_id);
459302632Ssephe		}
460302609Ssephe	}
461302609Ssephe	return 0;
462250199Sgrehan}
463250199Sgrehan
464302611Ssephe/*
465302611Ssephe * Disconnect the GPA from the target channel
466250199Sgrehan */
467250199Sgrehanint
468302890Ssephevmbus_chan_gpadl_disconnect(struct hv_vmbus_channel *chan, uint32_t gpadl)
469250199Sgrehan{
470302611Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
471302611Ssephe	struct vmbus_msghc *mh;
472302611Ssephe	struct vmbus_chanmsg_gpadl_disconn *req;
473302611Ssephe	int error;
474250199Sgrehan
475302611Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
476302611Ssephe	if (mh == NULL) {
477302611Ssephe		device_printf(sc->vmbus_dev,
478302611Ssephe		    "can not get msg hypercall for gpa x->chan%u\n",
479302693Ssephe		    chan->ch_id);
480302611Ssephe		return EBUSY;
481250199Sgrehan	}
482250199Sgrehan
483302611Ssephe	req = vmbus_msghc_dataptr(mh);
484302611Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
485302693Ssephe	req->chm_chanid = chan->ch_id;
486302611Ssephe	req->chm_gpadl = gpadl;
487250199Sgrehan
488302611Ssephe	error = vmbus_msghc_exec(sc, mh);
489302611Ssephe	if (error) {
490302611Ssephe		device_printf(sc->vmbus_dev,
491302611Ssephe		    "gpa x->chan%u msg hypercall exec failed: %d\n",
492302693Ssephe		    chan->ch_id, error);
493302611Ssephe		vmbus_msghc_put(sc, mh);
494302611Ssephe		return error;
495302611Ssephe	}
496250199Sgrehan
497302611Ssephe	vmbus_msghc_wait_result(sc, mh);
498302611Ssephe	/* Discard result; no useful information */
499302611Ssephe	vmbus_msghc_put(sc, mh);
500250199Sgrehan
501302611Ssephe	return 0;
502250199Sgrehan}
503250199Sgrehan
504282212Swhustatic void
505302891Ssephevmbus_chan_close_internal(struct hv_vmbus_channel *chan)
506250199Sgrehan{
507302891Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
508302610Ssephe	struct vmbus_msghc *mh;
509302610Ssephe	struct vmbus_chanmsg_chclose *req;
510302891Ssephe	struct taskqueue *tq = chan->ch_tq;
511302610Ssephe	int error;
512250199Sgrehan
513302812Ssephe	/* TODO: stringent check */
514302891Ssephe	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
515302812Ssephe
516302891Ssephe	/*
517302891Ssephe	 * Free this channel's sysctl tree attached to its device's
518302891Ssephe	 * sysctl tree.
519302891Ssephe	 */
520302891Ssephe	sysctl_ctx_free(&chan->ch_sysctl_ctx);
521282212Swhu
522282212Swhu	/*
523302891Ssephe	 * Set ch_tq to NULL to avoid more requests be scheduled.
524302891Ssephe	 * XXX pretty broken; need rework.
525294886Ssephe	 */
526302891Ssephe	chan->ch_tq = NULL;
527302891Ssephe	taskqueue_drain(tq, &chan->ch_task);
528302891Ssephe	chan->ch_cb = NULL;
529250199Sgrehan
530302891Ssephe	/*
531302891Ssephe	 * Close this channel.
532250199Sgrehan	 */
533302610Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
534302610Ssephe	if (mh == NULL) {
535302610Ssephe		device_printf(sc->vmbus_dev,
536302610Ssephe		    "can not get msg hypercall for chclose(chan%u)\n",
537302891Ssephe		    chan->ch_id);
538302610Ssephe		return;
539302610Ssephe	}
540250199Sgrehan
541302610Ssephe	req = vmbus_msghc_dataptr(mh);
542302610Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
543302891Ssephe	req->chm_chanid = chan->ch_id;
544250199Sgrehan
545302610Ssephe	error = vmbus_msghc_exec_noresult(mh);
546302610Ssephe	vmbus_msghc_put(sc, mh);
547302610Ssephe
548302610Ssephe	if (error) {
549302610Ssephe		device_printf(sc->vmbus_dev,
550302610Ssephe		    "chclose(chan%u) msg hypercall exec failed: %d\n",
551302891Ssephe		    chan->ch_id, error);
552302610Ssephe		return;
553302610Ssephe	} else if (bootverbose) {
554302891Ssephe		device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id);
555302610Ssephe	}
556302610Ssephe
557302891Ssephe	/*
558302891Ssephe	 * Disconnect the TX+RX bufrings from this channel.
559302891Ssephe	 */
560302891Ssephe	if (chan->ch_bufring_gpadl) {
561302891Ssephe		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
562302891Ssephe		chan->ch_bufring_gpadl = 0;
563250199Sgrehan	}
564250199Sgrehan
565302891Ssephe	/*
566302891Ssephe	 * Destroy the TX+RX bufrings.
567302891Ssephe	 */
568302891Ssephe	hv_ring_buffer_cleanup(&chan->outbound);
569302891Ssephe	hv_ring_buffer_cleanup(&chan->inbound);
570302891Ssephe	if (chan->ch_bufring != NULL) {
571302891Ssephe		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
572302891Ssephe		chan->ch_bufring = NULL;
573302872Ssephe	}
574282212Swhu}
575250199Sgrehan
576302818Ssephe/*
577302818Ssephe * Caller should make sure that all sub-channels have
578302818Ssephe * been added to 'chan' and all to-be-closed channels
579302818Ssephe * are not being opened.
580282212Swhu */
581282212Swhuvoid
582302818Ssephehv_vmbus_channel_close(struct hv_vmbus_channel *chan)
583282212Swhu{
584302818Ssephe	int subchan_cnt;
585282212Swhu
586302818Ssephe	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
587282212Swhu		/*
588302818Ssephe		 * Sub-channel is closed when its primary channel
589302818Ssephe		 * is closed; done.
590282212Swhu		 */
591282212Swhu		return;
592282212Swhu	}
593282212Swhu
594250199Sgrehan	/*
595302818Ssephe	 * Close all sub-channels, if any.
596250199Sgrehan	 */
597302819Ssephe	subchan_cnt = chan->ch_subchan_cnt;
598302818Ssephe	if (subchan_cnt > 0) {
599302818Ssephe		struct hv_vmbus_channel **subchan;
600302818Ssephe		int i;
601302818Ssephe
602302890Ssephe		subchan = vmbus_subchan_get(chan, subchan_cnt);
603302818Ssephe		for (i = 0; i < subchan_cnt; ++i)
604302891Ssephe			vmbus_chan_close_internal(subchan[i]);
605302890Ssephe		vmbus_subchan_rel(subchan, subchan_cnt);
606250199Sgrehan	}
607302818Ssephe
608302818Ssephe	/* Then close the primary channel. */
609302891Ssephe	vmbus_chan_close_internal(chan);
610250199Sgrehan}
611250199Sgrehan
612250199Sgrehanint
613302882Ssephevmbus_chan_send(struct hv_vmbus_channel *chan, uint16_t type, uint16_t flags,
614302882Ssephe    void *data, int dlen, uint64_t xactid)
615250199Sgrehan{
616302875Ssephe	struct vmbus_chanpkt pkt;
617302881Ssephe	int pktlen, pad_pktlen, hlen, error;
618302881Ssephe	uint64_t pad = 0;
619302881Ssephe	struct iovec iov[3];
620302881Ssephe	boolean_t send_evt;
621250199Sgrehan
622302881Ssephe	hlen = sizeof(pkt);
623302881Ssephe	pktlen = hlen + dlen;
624302884Ssephe	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
625250199Sgrehan
626302875Ssephe	pkt.cp_hdr.cph_type = type;
627302875Ssephe	pkt.cp_hdr.cph_flags = flags;
628302884Ssephe	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
629302884Ssephe	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
630302881Ssephe	pkt.cp_hdr.cph_xactid = xactid;
631250199Sgrehan
632302875Ssephe	iov[0].iov_base = &pkt;
633302881Ssephe	iov[0].iov_len = hlen;
634302881Ssephe	iov[1].iov_base = data;
635302881Ssephe	iov[1].iov_len = dlen;
636302881Ssephe	iov[2].iov_base = &pad;
637302881Ssephe	iov[2].iov_len = pad_pktlen - pktlen;
638250199Sgrehan
639302881Ssephe	error = hv_ring_buffer_write(&chan->outbound, iov, 3, &send_evt);
640302881Ssephe	if (!error && send_evt)
641302881Ssephe		vmbus_chan_send_event(chan);
642302881Ssephe	return error;
643250199Sgrehan}
644250199Sgrehan
645250199Sgrehanint
646302876Ssephevmbus_chan_send_sglist(struct hv_vmbus_channel *chan,
647302876Ssephe    struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
648250199Sgrehan{
649302876Ssephe	struct vmbus_chanpkt_sglist pkt;
650302876Ssephe	int pktlen, pad_pktlen, hlen, error;
651302876Ssephe	struct iovec iov[4];
652302876Ssephe	boolean_t send_evt;
653302876Ssephe	uint64_t pad = 0;
654250199Sgrehan
655302876Ssephe	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
656302876Ssephe	    ("invalid sglist len %d", sglen));
657250199Sgrehan
658302876Ssephe	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
659302876Ssephe	pktlen = hlen + dlen;
660302884Ssephe	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
661250199Sgrehan
662302880Ssephe	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
663302879Ssephe	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
664302884Ssephe	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
665302884Ssephe	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
666302876Ssephe	pkt.cp_hdr.cph_xactid = xactid;
667302876Ssephe	pkt.cp_rsvd = 0;
668302876Ssephe	pkt.cp_gpa_cnt = sglen;
669250199Sgrehan
670302876Ssephe	iov[0].iov_base = &pkt;
671302876Ssephe	iov[0].iov_len = sizeof(pkt);
672302876Ssephe	iov[1].iov_base = sg;
673302876Ssephe	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
674302876Ssephe	iov[2].iov_base = data;
675302876Ssephe	iov[2].iov_len = dlen;
676302876Ssephe	iov[3].iov_base = &pad;
677302876Ssephe	iov[3].iov_len = pad_pktlen - pktlen;
678250199Sgrehan
679302876Ssephe	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
680302876Ssephe	if (!error && send_evt)
681302876Ssephe		vmbus_chan_send_event(chan);
682302876Ssephe	return error;
683250199Sgrehan}
684250199Sgrehan
685250199Sgrehanint
686302878Ssephevmbus_chan_send_prplist(struct hv_vmbus_channel *chan,
687302878Ssephe    struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
688302878Ssephe    uint64_t xactid)
689250199Sgrehan{
690302878Ssephe	struct vmbus_chanpkt_prplist pkt;
691302878Ssephe	int pktlen, pad_pktlen, hlen, error;
692302878Ssephe	struct iovec iov[4];
693302878Ssephe	boolean_t send_evt;
694302878Ssephe	uint64_t pad = 0;
695250199Sgrehan
696302878Ssephe	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
697302878Ssephe	    ("invalid prplist entry count %d", prp_cnt));
698250199Sgrehan
699302878Ssephe	hlen = __offsetof(struct vmbus_chanpkt_prplist,
700302878Ssephe	    cp_range[0].gpa_page[prp_cnt]);
701302878Ssephe	pktlen = hlen + dlen;
702302884Ssephe	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
703250199Sgrehan
704302880Ssephe	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
705302879Ssephe	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
706302884Ssephe	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
707302884Ssephe	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
708302878Ssephe	pkt.cp_hdr.cph_xactid = xactid;
709302878Ssephe	pkt.cp_rsvd = 0;
710302878Ssephe	pkt.cp_range_cnt = 1;
711250199Sgrehan
712302878Ssephe	iov[0].iov_base = &pkt;
713302878Ssephe	iov[0].iov_len = sizeof(pkt);
714302878Ssephe	iov[1].iov_base = prp;
715302878Ssephe	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
716302878Ssephe	iov[2].iov_base = data;
717302878Ssephe	iov[2].iov_len = dlen;
718302878Ssephe	iov[3].iov_base = &pad;
719302878Ssephe	iov[3].iov_len = pad_pktlen - pktlen;
720250199Sgrehan
721302878Ssephe	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
722302878Ssephe	if (!error && send_evt)
723302878Ssephe		vmbus_chan_send_event(chan);
724302878Ssephe	return error;
725250199Sgrehan}
726250199Sgrehan
727250199Sgrehanint
728302885Ssephevmbus_chan_recv(struct hv_vmbus_channel *chan, void *data, int *dlen0,
729302885Ssephe    uint64_t *xactid)
730250199Sgrehan{
731302885Ssephe	struct vmbus_chanpkt_hdr pkt;
732302885Ssephe	int error, dlen, hlen;
733250199Sgrehan
734302885Ssephe	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
735302885Ssephe	if (error)
736302885Ssephe		return error;
737250199Sgrehan
738302885Ssephe	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
739302885Ssephe	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
740250199Sgrehan
741302885Ssephe	if (*dlen0 < dlen) {
742302886Ssephe		/* Return the size of this packet's data. */
743302885Ssephe		*dlen0 = dlen;
744302885Ssephe		return ENOBUFS;
745302885Ssephe	}
746250199Sgrehan
747302885Ssephe	*xactid = pkt.cph_xactid;
748302885Ssephe	*dlen0 = dlen;
749250199Sgrehan
750302886Ssephe	/* Skip packet header */
751302885Ssephe	error = hv_ring_buffer_read(&chan->inbound, data, dlen, hlen);
752302885Ssephe	KASSERT(!error, ("hv_ring_buffer_read failed"));
753250199Sgrehan
754302885Ssephe	return 0;
755250199Sgrehan}
756250199Sgrehan
757250199Sgrehanint
758302886Ssephevmbus_chan_recv_pkt(struct hv_vmbus_channel *chan,
759302886Ssephe    struct vmbus_chanpkt_hdr *pkt0, int *pktlen0)
760250199Sgrehan{
761302886Ssephe	struct vmbus_chanpkt_hdr pkt;
762302886Ssephe	int error, pktlen;
763250199Sgrehan
764302886Ssephe	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
765302886Ssephe	if (error)
766302886Ssephe		return error;
767250199Sgrehan
768302886Ssephe	pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen);
769302886Ssephe	if (*pktlen0 < pktlen) {
770302886Ssephe		/* Return the size of this packet. */
771302886Ssephe		*pktlen0 = pktlen;
772302886Ssephe		return ENOBUFS;
773302886Ssephe	}
774302886Ssephe	*pktlen0 = pktlen;
775250199Sgrehan
776302886Ssephe	/* Include packet header */
777302886Ssephe	error = hv_ring_buffer_read(&chan->inbound, pkt0, pktlen, 0);
778302886Ssephe	KASSERT(!error, ("hv_ring_buffer_read failed"));
779250199Sgrehan
780302886Ssephe	return 0;
781250199Sgrehan}
782294886Ssephe
783294886Ssephestatic void
784302713Ssephevmbus_chan_task(void *xchan, int pending __unused)
785294886Ssephe{
786302713Ssephe	struct hv_vmbus_channel *chan = xchan;
787302874Ssephe	vmbus_chan_callback_t cb = chan->ch_cb;
788302874Ssephe	void *cbarg = chan->ch_cbarg;
789294886Ssephe
790302710Ssephe	/*
791302710Ssephe	 * Optimize host to guest signaling by ensuring:
792302710Ssephe	 * 1. While reading the channel, we disable interrupts from
793302710Ssephe	 *    host.
794302710Ssephe	 * 2. Ensure that we process all posted messages from the host
795302710Ssephe	 *    before returning from this callback.
796302710Ssephe	 * 3. Once we return, enable signaling from the host. Once this
797302710Ssephe	 *    state is set we check to see if additional packets are
798302710Ssephe	 *    available to read. In this case we repeat the process.
799302713Ssephe	 *
800302713Ssephe	 * NOTE: Interrupt has been disabled in the ISR.
801302710Ssephe	 */
802302713Ssephe	for (;;) {
803302713Ssephe		uint32_t left;
804294886Ssephe
805302874Ssephe		cb(cbarg);
806294886Ssephe
807302713Ssephe		left = hv_ring_buffer_read_end(&chan->inbound);
808302713Ssephe		if (left == 0) {
809302713Ssephe			/* No more data in RX bufring; done */
810302713Ssephe			break;
811302713Ssephe		}
812302713Ssephe		hv_ring_buffer_read_begin(&chan->inbound);
813302713Ssephe	}
814294886Ssephe}
815302692Ssephe
816302713Ssephestatic void
817302713Ssephevmbus_chan_task_nobatch(void *xchan, int pending __unused)
818302713Ssephe{
819302713Ssephe	struct hv_vmbus_channel *chan = xchan;
820302713Ssephe
821302874Ssephe	chan->ch_cb(chan->ch_cbarg);
822302713Ssephe}
823302713Ssephe
824302692Ssephestatic __inline void
825302692Ssephevmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
826302692Ssephe    int flag_cnt)
827302692Ssephe{
828302692Ssephe	int f;
829302692Ssephe
830302692Ssephe	for (f = 0; f < flag_cnt; ++f) {
831302806Ssephe		uint32_t chid_base;
832302692Ssephe		u_long flags;
833302806Ssephe		int chid_ofs;
834302692Ssephe
835302692Ssephe		if (event_flags[f] == 0)
836302692Ssephe			continue;
837302692Ssephe
838302692Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
839302806Ssephe		chid_base = f << VMBUS_EVTFLAG_SHIFT;
840302692Ssephe
841302806Ssephe		while ((chid_ofs = ffsl(flags)) != 0) {
842302692Ssephe			struct hv_vmbus_channel *channel;
843302692Ssephe
844302806Ssephe			--chid_ofs; /* NOTE: ffsl is 1-based */
845302806Ssephe			flags &= ~(1UL << chid_ofs);
846302692Ssephe
847302806Ssephe			channel = sc->vmbus_chmap[chid_base + chid_ofs];
848302692Ssephe
849302692Ssephe			/* if channel is closed or closing */
850302874Ssephe			if (channel == NULL || channel->ch_tq == NULL)
851302692Ssephe				continue;
852302692Ssephe
853302709Ssephe			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
854302692Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
855302874Ssephe			taskqueue_enqueue(channel->ch_tq, &channel->ch_task);
856302692Ssephe		}
857302692Ssephe	}
858302692Ssephe}
859302692Ssephe
860302692Ssephevoid
861302692Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
862302692Ssephe{
863302692Ssephe	struct vmbus_evtflags *eventf;
864302692Ssephe
865302692Ssephe	/*
866302692Ssephe	 * On Host with Win8 or above, the event page can be checked directly
867302692Ssephe	 * to get the id of the channel that has the pending interrupt.
868302692Ssephe	 */
869302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
870302692Ssephe	vmbus_event_flags_proc(sc, eventf->evt_flags,
871302692Ssephe	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
872302692Ssephe}
873302692Ssephe
874302692Ssephevoid
875302692Ssephevmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
876302692Ssephe{
877302692Ssephe	struct vmbus_evtflags *eventf;
878302692Ssephe
879302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
880302692Ssephe	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
881302692Ssephe		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
882302692Ssephe		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
883302692Ssephe	}
884302692Ssephe}
885302692Ssephe
886302692Ssephestatic void
887302692Ssephevmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
888302692Ssephe    const struct hv_vmbus_channel *chan)
889302692Ssephe{
890302692Ssephe	volatile int *flag_cnt_ptr;
891302692Ssephe	int flag_cnt;
892302692Ssephe
893302693Ssephe	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
894302873Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
895302692Ssephe
896302692Ssephe	for (;;) {
897302692Ssephe		int old_flag_cnt;
898302692Ssephe
899302692Ssephe		old_flag_cnt = *flag_cnt_ptr;
900302692Ssephe		if (old_flag_cnt >= flag_cnt)
901302692Ssephe			break;
902302692Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
903302692Ssephe			if (bootverbose) {
904302692Ssephe				device_printf(sc->vmbus_dev,
905302692Ssephe				    "channel%u update cpu%d flag_cnt to %d\n",
906302873Ssephe				    chan->ch_id, chan->ch_cpuid, flag_cnt);
907302692Ssephe			}
908302692Ssephe			break;
909302692Ssephe		}
910302692Ssephe	}
911302692Ssephe}
912302864Ssephe
913302864Ssephestatic struct hv_vmbus_channel *
914302864Ssephevmbus_chan_alloc(struct vmbus_softc *sc)
915302864Ssephe{
916302864Ssephe	struct hv_vmbus_channel *chan;
917302864Ssephe
918302864Ssephe	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
919302864Ssephe
920302864Ssephe	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
921302864Ssephe	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
922302864Ssephe	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
923302864Ssephe	if (chan->ch_monprm == NULL) {
924302864Ssephe		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
925302864Ssephe		free(chan, M_DEVBUF);
926302864Ssephe		return NULL;
927302864Ssephe	}
928302864Ssephe
929302864Ssephe	chan->vmbus_sc = sc;
930302864Ssephe	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
931302864Ssephe	TAILQ_INIT(&chan->ch_subchans);
932302864Ssephe	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
933302864Ssephe
934302864Ssephe	return chan;
935302864Ssephe}
936302864Ssephe
937302864Ssephestatic void
938302864Ssephevmbus_chan_free(struct hv_vmbus_channel *chan)
939302864Ssephe{
940302864Ssephe	/* TODO: assert sub-channel list is empty */
941302864Ssephe	/* TODO: asset no longer on the primary channel's sub-channel list */
942302864Ssephe	/* TODO: asset no longer on the vmbus channel list */
943302864Ssephe	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
944302864Ssephe	mtx_destroy(&chan->ch_subchan_lock);
945302864Ssephe	free(chan, M_DEVBUF);
946302864Ssephe}
947302864Ssephe
948302864Ssephestatic int
949302864Ssephevmbus_chan_add(struct hv_vmbus_channel *newchan)
950302864Ssephe{
951302864Ssephe	struct vmbus_softc *sc = newchan->vmbus_sc;
952302864Ssephe	struct hv_vmbus_channel *prichan;
953302864Ssephe
954302864Ssephe	if (newchan->ch_id == 0) {
955302864Ssephe		/*
956302864Ssephe		 * XXX
957302864Ssephe		 * Chan0 will neither be processed nor should be offered;
958302864Ssephe		 * skip it.
959302864Ssephe		 */
960302864Ssephe		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
961302864Ssephe		return EINVAL;
962302864Ssephe	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
963302864Ssephe		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
964302864Ssephe		    newchan->ch_id);
965302864Ssephe		return EINVAL;
966302864Ssephe	}
967302864Ssephe	sc->vmbus_chmap[newchan->ch_id] = newchan;
968302864Ssephe
969302864Ssephe	if (bootverbose) {
970302864Ssephe		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
971302864Ssephe		    newchan->ch_id, newchan->ch_subidx);
972302864Ssephe	}
973302864Ssephe
974302864Ssephe	mtx_lock(&sc->vmbus_prichan_lock);
975302864Ssephe	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
976302864Ssephe		/*
977302864Ssephe		 * Sub-channel will have the same type GUID and instance
978302864Ssephe		 * GUID as its primary channel.
979302864Ssephe		 */
980302864Ssephe		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
981302864Ssephe		    sizeof(struct hyperv_guid)) == 0 &&
982302864Ssephe		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
983302864Ssephe		    sizeof(struct hyperv_guid)) == 0)
984302864Ssephe			break;
985302864Ssephe	}
986302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
987302864Ssephe		if (prichan == NULL) {
988302864Ssephe			/* Install the new primary channel */
989302864Ssephe			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
990302864Ssephe			    ch_prilink);
991302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
992302864Ssephe			return 0;
993302864Ssephe		} else {
994302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
995302864Ssephe			device_printf(sc->vmbus_dev, "duplicated primary "
996302864Ssephe			    "chan%u\n", newchan->ch_id);
997302864Ssephe			return EINVAL;
998302864Ssephe		}
999302864Ssephe	} else { /* Sub-channel */
1000302864Ssephe		if (prichan == NULL) {
1001302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1002302864Ssephe			device_printf(sc->vmbus_dev, "no primary chan for "
1003302864Ssephe			    "chan%u\n", newchan->ch_id);
1004302864Ssephe			return EINVAL;
1005302864Ssephe		}
1006302864Ssephe		/*
1007302864Ssephe		 * Found the primary channel for this sub-channel and
1008302864Ssephe		 * move on.
1009302864Ssephe		 *
1010302864Ssephe		 * XXX refcnt prichan
1011302864Ssephe		 */
1012302864Ssephe	}
1013302864Ssephe	mtx_unlock(&sc->vmbus_prichan_lock);
1014302864Ssephe
1015302864Ssephe	/*
1016302864Ssephe	 * This is a sub-channel; link it with the primary channel.
1017302864Ssephe	 */
1018302864Ssephe	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1019302864Ssephe	    ("new channel is not sub-channel"));
1020302864Ssephe	KASSERT(prichan != NULL, ("no primary channel"));
1021302864Ssephe
1022302864Ssephe	newchan->ch_prichan = prichan;
1023302864Ssephe	newchan->ch_dev = prichan->ch_dev;
1024302864Ssephe
1025302864Ssephe	mtx_lock(&prichan->ch_subchan_lock);
1026302864Ssephe	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1027302864Ssephe	/*
1028302864Ssephe	 * Bump up sub-channel count and notify anyone that is
1029302864Ssephe	 * interested in this sub-channel, after this sub-channel
1030302864Ssephe	 * is setup.
1031302864Ssephe	 */
1032302864Ssephe	prichan->ch_subchan_cnt++;
1033302864Ssephe	mtx_unlock(&prichan->ch_subchan_lock);
1034302864Ssephe	wakeup(prichan);
1035302864Ssephe
1036302864Ssephe	return 0;
1037302864Ssephe}
1038302864Ssephe
1039302864Ssephevoid
1040302890Ssephevmbus_chan_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1041302864Ssephe{
1042302864Ssephe	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1043302864Ssephe
1044302864Ssephe	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1045302864Ssephe	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1046302864Ssephe		/* Only cpu0 is supported */
1047302864Ssephe		cpu = 0;
1048302864Ssephe	}
1049302864Ssephe
1050302873Ssephe	chan->ch_cpuid = cpu;
1051302873Ssephe	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1052302864Ssephe
1053302864Ssephe	if (bootverbose) {
1054302864Ssephe		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1055302873Ssephe		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
1056302864Ssephe	}
1057302864Ssephe}
1058302864Ssephe
1059302864Ssephevoid
1060302890Ssephevmbus_chan_cpu_rr(struct hv_vmbus_channel *chan)
1061302864Ssephe{
1062302864Ssephe	static uint32_t vmbus_chan_nextcpu;
1063302864Ssephe	int cpu;
1064302864Ssephe
1065302864Ssephe	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1066302890Ssephe	vmbus_chan_cpu_set(chan, cpu);
1067302864Ssephe}
1068302864Ssephe
1069302864Ssephestatic void
1070302864Ssephevmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1071302864Ssephe{
1072302864Ssephe	/*
1073302864Ssephe	 * By default, pin the channel to cpu0.  Devices having
1074302864Ssephe	 * special channel-cpu mapping requirement should call
1075302890Ssephe	 * vmbus_chan_cpu_{set,rr}().
1076302864Ssephe	 */
1077302890Ssephe	vmbus_chan_cpu_set(chan, 0);
1078302864Ssephe}
1079302864Ssephe
1080302864Ssephestatic void
1081302864Ssephevmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1082302864Ssephe    const struct vmbus_message *msg)
1083302864Ssephe{
1084302864Ssephe	const struct vmbus_chanmsg_choffer *offer;
1085302864Ssephe	struct hv_vmbus_channel *chan;
1086302864Ssephe	int error;
1087302864Ssephe
1088302864Ssephe	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1089302864Ssephe
1090302864Ssephe	chan = vmbus_chan_alloc(sc);
1091302864Ssephe	if (chan == NULL) {
1092302864Ssephe		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1093302864Ssephe		    offer->chm_chanid);
1094302864Ssephe		return;
1095302864Ssephe	}
1096302864Ssephe
1097302864Ssephe	chan->ch_id = offer->chm_chanid;
1098302864Ssephe	chan->ch_subidx = offer->chm_subidx;
1099302864Ssephe	chan->ch_guid_type = offer->chm_chtype;
1100302864Ssephe	chan->ch_guid_inst = offer->chm_chinst;
1101302864Ssephe
1102302864Ssephe	/* Batch reading is on by default */
1103302864Ssephe	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1104302864Ssephe
1105302864Ssephe	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1106302864Ssephe	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1107302864Ssephe		chan->ch_monprm->mp_connid = offer->chm_connid;
1108302864Ssephe
1109302864Ssephe	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1110302864Ssephe		/*
1111302864Ssephe		 * Setup MNF stuffs.
1112302864Ssephe		 */
1113302864Ssephe		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1114302864Ssephe		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1115302864Ssephe		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1116302864Ssephe			panic("invalid monitor trigger %u", offer->chm_montrig);
1117302864Ssephe		chan->ch_montrig_mask =
1118302864Ssephe		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1119302864Ssephe	}
1120302864Ssephe
1121302864Ssephe	/* Select default cpu for this channel. */
1122302864Ssephe	vmbus_chan_cpu_default(chan);
1123302864Ssephe
1124302864Ssephe	error = vmbus_chan_add(chan);
1125302864Ssephe	if (error) {
1126302864Ssephe		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1127302864Ssephe		    chan->ch_id, error);
1128302864Ssephe		vmbus_chan_free(chan);
1129302864Ssephe		return;
1130302864Ssephe	}
1131302864Ssephe
1132302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1133302864Ssephe		/*
1134302864Ssephe		 * Add device for this primary channel.
1135302864Ssephe		 *
1136302864Ssephe		 * NOTE:
1137302864Ssephe		 * Error is ignored here; don't have much to do if error
1138302864Ssephe		 * really happens.
1139302864Ssephe		 */
1140302868Ssephe		vmbus_add_child(chan);
1141302864Ssephe	}
1142302864Ssephe}
1143302864Ssephe
1144302864Ssephe/*
1145302864Ssephe * XXX pretty broken; need rework.
1146302864Ssephe */
1147302864Ssephestatic void
1148302864Ssephevmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1149302864Ssephe    const struct vmbus_message *msg)
1150302864Ssephe{
1151302864Ssephe	const struct vmbus_chanmsg_chrescind *note;
1152302864Ssephe	struct hv_vmbus_channel *chan;
1153302864Ssephe
1154302864Ssephe	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1155302864Ssephe	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1156302864Ssephe		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1157302864Ssephe		    note->chm_chanid);
1158302864Ssephe		return;
1159302864Ssephe	}
1160302864Ssephe
1161302864Ssephe	if (bootverbose) {
1162302864Ssephe		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1163302864Ssephe		    note->chm_chanid);
1164302864Ssephe	}
1165302864Ssephe
1166302864Ssephe	chan = sc->vmbus_chmap[note->chm_chanid];
1167302864Ssephe	if (chan == NULL)
1168302864Ssephe		return;
1169302864Ssephe	sc->vmbus_chmap[note->chm_chanid] = NULL;
1170302864Ssephe
1171302864Ssephe	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1172302864Ssephe}
1173302864Ssephe
1174302864Ssephestatic void
1175302864Ssephevmbus_chan_detach_task(void *xchan, int pending __unused)
1176302864Ssephe{
1177302864Ssephe	struct hv_vmbus_channel *chan = xchan;
1178302864Ssephe
1179302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1180302864Ssephe		/* Only primary channel owns the device */
1181302868Ssephe		vmbus_delete_child(chan);
1182302864Ssephe		/* NOTE: DO NOT free primary channel for now */
1183302864Ssephe	} else {
1184302864Ssephe		struct vmbus_softc *sc = chan->vmbus_sc;
1185302864Ssephe		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1186302864Ssephe		struct vmbus_chanmsg_chfree *req;
1187302864Ssephe		struct vmbus_msghc *mh;
1188302864Ssephe		int error;
1189302864Ssephe
1190302864Ssephe		mh = vmbus_msghc_get(sc, sizeof(*req));
1191302864Ssephe		if (mh == NULL) {
1192302864Ssephe			device_printf(sc->vmbus_dev,
1193302864Ssephe			    "can not get msg hypercall for chfree(chan%u)\n",
1194302864Ssephe			    chan->ch_id);
1195302864Ssephe			goto remove;
1196302864Ssephe		}
1197302864Ssephe
1198302864Ssephe		req = vmbus_msghc_dataptr(mh);
1199302864Ssephe		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1200302864Ssephe		req->chm_chanid = chan->ch_id;
1201302864Ssephe
1202302864Ssephe		error = vmbus_msghc_exec_noresult(mh);
1203302864Ssephe		vmbus_msghc_put(sc, mh);
1204302864Ssephe
1205302864Ssephe		if (error) {
1206302864Ssephe			device_printf(sc->vmbus_dev,
1207302864Ssephe			    "chfree(chan%u) failed: %d",
1208302864Ssephe			    chan->ch_id, error);
1209302864Ssephe			/* NOTE: Move on! */
1210302864Ssephe		} else {
1211302864Ssephe			if (bootverbose) {
1212302864Ssephe				device_printf(sc->vmbus_dev, "chan%u freed\n",
1213302864Ssephe				    chan->ch_id);
1214302864Ssephe			}
1215302864Ssephe		}
1216302864Ssepheremove:
1217302864Ssephe		mtx_lock(&pri_chan->ch_subchan_lock);
1218302864Ssephe		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1219302864Ssephe		KASSERT(pri_chan->ch_subchan_cnt > 0,
1220302864Ssephe		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1221302864Ssephe		pri_chan->ch_subchan_cnt--;
1222302864Ssephe		mtx_unlock(&pri_chan->ch_subchan_lock);
1223302864Ssephe		wakeup(pri_chan);
1224302864Ssephe
1225302864Ssephe		vmbus_chan_free(chan);
1226302864Ssephe	}
1227302864Ssephe}
1228302864Ssephe
1229302864Ssephe/*
1230302864Ssephe * Detach all devices and destroy the corresponding primary channels.
1231302864Ssephe */
1232302864Ssephevoid
1233302864Ssephevmbus_chan_destroy_all(struct vmbus_softc *sc)
1234302864Ssephe{
1235302864Ssephe	struct hv_vmbus_channel *chan;
1236302864Ssephe
1237302864Ssephe	mtx_lock(&sc->vmbus_prichan_lock);
1238302864Ssephe	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1239302864Ssephe		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1240302864Ssephe		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1241302864Ssephe		mtx_unlock(&sc->vmbus_prichan_lock);
1242302864Ssephe
1243302868Ssephe		vmbus_delete_child(chan);
1244302864Ssephe		vmbus_chan_free(chan);
1245302864Ssephe
1246302864Ssephe		mtx_lock(&sc->vmbus_prichan_lock);
1247302864Ssephe	}
1248302864Ssephe	bzero(sc->vmbus_chmap,
1249302864Ssephe	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1250302864Ssephe	mtx_unlock(&sc->vmbus_prichan_lock);
1251302864Ssephe}
1252302864Ssephe
1253303020Ssephe/*
1254302864Ssephe * The channel whose vcpu binding is closest to the currect vcpu will
1255302864Ssephe * be selected.
1256303020Ssephe * If no multi-channel, always select primary channel.
1257302864Ssephe */
1258302864Ssephestruct hv_vmbus_channel *
1259303020Ssephevmbus_chan_cpu2chan(struct hv_vmbus_channel *prichan, int cpu)
1260302864Ssephe{
1261303020Ssephe	struct hv_vmbus_channel *sel, *chan;
1262303020Ssephe	uint32_t vcpu, sel_dist;
1263302864Ssephe
1264303020Ssephe	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpuid %d", cpu));
1265303020Ssephe	if (TAILQ_EMPTY(&prichan->ch_subchans))
1266303020Ssephe		return prichan;
1267302864Ssephe
1268303020Ssephe	vcpu = VMBUS_PCPU_GET(prichan->vmbus_sc, vcpuid, cpu);
1269302864Ssephe
1270303020Ssephe#define CHAN_VCPU_DIST(ch, vcpu)		\
1271303020Ssephe	(((ch)->ch_vcpuid > (vcpu)) ?		\
1272303020Ssephe	 ((ch)->ch_vcpuid - (vcpu)) : ((vcpu) - (ch)->ch_vcpuid))
1273302864Ssephe
1274303020Ssephe#define CHAN_SELECT(ch)				\
1275303020Ssephedo {						\
1276303020Ssephe	sel = ch;				\
1277303020Ssephe	sel_dist = CHAN_VCPU_DIST(ch, vcpu);	\
1278303020Ssephe} while (0)
1279302864Ssephe
1280303020Ssephe	CHAN_SELECT(prichan);
1281302864Ssephe
1282303020Ssephe	mtx_lock(&prichan->ch_subchan_lock);
1283303020Ssephe	TAILQ_FOREACH(chan, &prichan->ch_subchans, ch_sublink) {
1284303020Ssephe		uint32_t dist;
1285302864Ssephe
1286303020Ssephe		KASSERT(chan->ch_stflags & VMBUS_CHAN_ST_OPENED,
1287303020Ssephe		    ("chan%u is not opened", chan->ch_id));
1288303020Ssephe
1289303020Ssephe		if (chan->ch_vcpuid == vcpu) {
1290303020Ssephe			/* Exact match; done */
1291303020Ssephe			CHAN_SELECT(chan);
1292303020Ssephe			break;
1293303020Ssephe		}
1294303020Ssephe
1295303020Ssephe		dist = CHAN_VCPU_DIST(chan, vcpu);
1296303020Ssephe		if (sel_dist <= dist) {
1297303020Ssephe			/* Far or same distance; skip */
1298302864Ssephe			continue;
1299302864Ssephe		}
1300302864Ssephe
1301303020Ssephe		/* Select the closer channel. */
1302303020Ssephe		CHAN_SELECT(chan);
1303302864Ssephe	}
1304303020Ssephe	mtx_unlock(&prichan->ch_subchan_lock);
1305302864Ssephe
1306303020Ssephe#undef CHAN_SELECT
1307303020Ssephe#undef CHAN_VCPU_DIST
1308303020Ssephe
1309303020Ssephe	return sel;
1310302864Ssephe}
1311302864Ssephe
1312302864Ssephestruct hv_vmbus_channel **
1313302890Ssephevmbus_subchan_get(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1314302864Ssephe{
1315302864Ssephe	struct hv_vmbus_channel **ret, *chan;
1316302864Ssephe	int i;
1317302864Ssephe
1318302864Ssephe	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1319302864Ssephe	    M_WAITOK);
1320302864Ssephe
1321302864Ssephe	mtx_lock(&pri_chan->ch_subchan_lock);
1322302864Ssephe
1323302864Ssephe	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1324302864Ssephe		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1325302864Ssephe
1326302864Ssephe	i = 0;
1327302864Ssephe	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1328302864Ssephe		/* TODO: refcnt chan */
1329302864Ssephe		ret[i] = chan;
1330302864Ssephe
1331302864Ssephe		++i;
1332302864Ssephe		if (i == subchan_cnt)
1333302864Ssephe			break;
1334302864Ssephe	}
1335302864Ssephe	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1336302864Ssephe	    pri_chan->ch_subchan_cnt, subchan_cnt));
1337302864Ssephe
1338302864Ssephe	mtx_unlock(&pri_chan->ch_subchan_lock);
1339302864Ssephe
1340302864Ssephe	return ret;
1341302864Ssephe}
1342302864Ssephe
1343302864Ssephevoid
1344302890Ssephevmbus_subchan_rel(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1345302864Ssephe{
1346302864Ssephe
1347302864Ssephe	free(subchan, M_TEMP);
1348302864Ssephe}
1349302864Ssephe
1350302864Ssephevoid
1351302890Ssephevmbus_subchan_drain(struct hv_vmbus_channel *pri_chan)
1352302864Ssephe{
1353302864Ssephe	mtx_lock(&pri_chan->ch_subchan_lock);
1354302864Ssephe	while (pri_chan->ch_subchan_cnt > 0)
1355302864Ssephe		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1356302864Ssephe	mtx_unlock(&pri_chan->ch_subchan_lock);
1357302864Ssephe}
1358302864Ssephe
1359302864Ssephevoid
1360302864Ssephevmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1361302864Ssephe{
1362302864Ssephe	vmbus_chanmsg_proc_t msg_proc;
1363302864Ssephe	uint32_t msg_type;
1364302864Ssephe
1365302864Ssephe	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1366302864Ssephe	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1367302864Ssephe	    ("invalid message type %u", msg_type));
1368302864Ssephe
1369302864Ssephe	msg_proc = vmbus_chan_msgprocs[msg_type];
1370302864Ssephe	if (msg_proc != NULL)
1371302864Ssephe		msg_proc(sc, msg);
1372302864Ssephe}
1373