vmbus_chan.c revision 302870
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29256276Sdim#include <sys/cdefs.h>
30256276Sdim__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302870 2016-07-15 05:29:04Z sephe $");
31256276Sdim
32250199Sgrehan#include <sys/param.h>
33296028Ssephe#include <sys/kernel.h>
34250199Sgrehan#include <sys/malloc.h>
35250199Sgrehan#include <sys/systm.h>
36250199Sgrehan#include <sys/mbuf.h>
37250199Sgrehan#include <sys/lock.h>
38250199Sgrehan#include <sys/mutex.h>
39296181Ssephe#include <sys/sysctl.h>
40301588Ssephe
41301588Ssephe#include <machine/atomic.h>
42250199Sgrehan#include <machine/bus.h>
43301588Ssephe
44250199Sgrehan#include <vm/vm.h>
45250199Sgrehan#include <vm/vm_param.h>
46250199Sgrehan#include <vm/pmap.h>
47250199Sgrehan
48300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
49302619Ssephe#include <dev/hyperv/vmbus/hyperv_var.h>
50301588Ssephe#include <dev/hyperv/vmbus/vmbus_reg.h>
51300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
52250199Sgrehan
53302731Ssephestatic void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
54302692Ssephestatic void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
55302692Ssephe		    const struct hv_vmbus_channel *);
56302864Ssephe
57302713Ssephestatic void	vmbus_chan_task(void *, int);
58302713Ssephestatic void	vmbus_chan_task_nobatch(void *, int);
59302864Ssephestatic void	vmbus_chan_detach_task(void *, int);
60250199Sgrehan
61302864Ssephestatic void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
62302864Ssephe		    const struct vmbus_message *);
63302864Ssephestatic void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
64302864Ssephe		    const struct vmbus_message *);
65302864Ssephe
66302864Ssephe/*
67302864Ssephe * Vmbus channel message processing.
68302864Ssephe */
69302864Ssephestatic const vmbus_chanmsg_proc_t
70302864Ssephevmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
71302864Ssephe	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
72302864Ssephe	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
73302864Ssephe
74302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
75302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
76302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
77302864Ssephe};
78302864Ssephe
79250199Sgrehan/**
80250199Sgrehan *  @brief Trigger an event notification on the specified channel
81250199Sgrehan */
82250199Sgrehanstatic void
83302731Ssephevmbus_chan_send_event(hv_vmbus_channel *channel)
84250199Sgrehan{
85302618Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
86302693Ssephe	uint32_t chanid = channel->ch_id;
87302618Ssephe
88302618Ssephe	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
89302618Ssephe	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
90302618Ssephe
91302695Ssephe	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
92302731Ssephe		atomic_set_int(
93302731Ssephe		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
94302731Ssephe		channel->ch_montrig_mask);
95250199Sgrehan	} else {
96302726Ssephe		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
97250199Sgrehan	}
98250199Sgrehan}
99250199Sgrehan
100296289Ssephestatic int
101296289Ssephevmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
102296289Ssephe{
103296289Ssephe	struct hv_vmbus_channel *chan = arg1;
104296289Ssephe	int alloc = 0;
105296289Ssephe
106302695Ssephe	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
107296289Ssephe		alloc = 1;
108296289Ssephe	return sysctl_handle_int(oidp, &alloc, 0, req);
109296289Ssephe}
110296289Ssephe
111296181Ssephestatic void
112296290Ssephevmbus_channel_sysctl_create(hv_vmbus_channel* channel)
113296181Ssephe{
114296181Ssephe	device_t dev;
115296181Ssephe	struct sysctl_oid *devch_sysctl;
116296181Ssephe	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
117296181Ssephe	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
118296181Ssephe	struct sysctl_ctx_list *ctx;
119296181Ssephe	uint32_t ch_id;
120296181Ssephe	uint16_t sub_ch_id;
121296181Ssephe	char name[16];
122296181Ssephe
123302819Ssephe	hv_vmbus_channel* primary_ch = channel->ch_prichan;
124296181Ssephe
125296181Ssephe	if (primary_ch == NULL) {
126302706Ssephe		dev = channel->ch_dev;
127302693Ssephe		ch_id = channel->ch_id;
128296181Ssephe	} else {
129302706Ssephe		dev = primary_ch->ch_dev;
130302693Ssephe		ch_id = primary_ch->ch_id;
131302694Ssephe		sub_ch_id = channel->ch_subidx;
132296181Ssephe	}
133302633Ssephe	ctx = &channel->ch_sysctl_ctx;
134302633Ssephe	sysctl_ctx_init(ctx);
135296181Ssephe	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
136296181Ssephe	devch_sysctl = SYSCTL_ADD_NODE(ctx,
137296181Ssephe		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
138298693Ssephe		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
139296181Ssephe	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
140296181Ssephe	snprintf(name, sizeof(name), "%d", ch_id);
141296181Ssephe	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
142296181Ssephe	    	    SYSCTL_CHILDREN(devch_sysctl),
143298693Ssephe	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
144296181Ssephe
145296181Ssephe	if (primary_ch != NULL) {
146296181Ssephe		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
147296181Ssephe			SYSCTL_CHILDREN(devch_id_sysctl),
148298693Ssephe			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
149296181Ssephe		snprintf(name, sizeof(name), "%d", sub_ch_id);
150296181Ssephe		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
151296181Ssephe			SYSCTL_CHILDREN(devch_sub_sysctl),
152298693Ssephe			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
153296188Ssephe
154296188Ssephe		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
155296188Ssephe		    OID_AUTO, "chanid", CTLFLAG_RD,
156302693Ssephe		    &channel->ch_id, 0, "channel id");
157296181Ssephe	}
158296188Ssephe	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
159296188Ssephe	    "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
160296289Ssephe	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
161298693Ssephe	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
162298693Ssephe	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
163296289Ssephe	    "is monitor allocated to this channel");
164296188Ssephe
165296181Ssephe	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
166296181Ssephe                    SYSCTL_CHILDREN(devch_id_sysctl),
167296181Ssephe                    OID_AUTO,
168296181Ssephe		    "in",
169298693Ssephe		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
170296181Ssephe	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
171296181Ssephe                    SYSCTL_CHILDREN(devch_id_sysctl),
172296181Ssephe                    OID_AUTO,
173296181Ssephe		    "out",
174298693Ssephe		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
175296181Ssephe	hv_ring_buffer_stat(ctx,
176296181Ssephe		SYSCTL_CHILDREN(devch_id_in_sysctl),
177296181Ssephe		&(channel->inbound),
178296181Ssephe		"inbound ring buffer stats");
179296181Ssephe	hv_ring_buffer_stat(ctx,
180296181Ssephe		SYSCTL_CHILDREN(devch_id_out_sysctl),
181296181Ssephe		&(channel->outbound),
182296181Ssephe		"outbound ring buffer stats");
183296181Ssephe}
184296290Ssephe
185250199Sgrehan/**
186250199Sgrehan * @brief Open the specified channel
187250199Sgrehan */
188250199Sgrehanint
189250199Sgrehanhv_vmbus_channel_open(
190250199Sgrehan	hv_vmbus_channel*		new_channel,
191250199Sgrehan	uint32_t			send_ring_buffer_size,
192250199Sgrehan	uint32_t			recv_ring_buffer_size,
193250199Sgrehan	void*				user_data,
194250199Sgrehan	uint32_t			user_data_len,
195250199Sgrehan	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
196250199Sgrehan	void* 				context)
197250199Sgrehan{
198302607Ssephe	struct vmbus_softc *sc = new_channel->vmbus_sc;
199302607Ssephe	const struct vmbus_chanmsg_chopen_resp *resp;
200302607Ssephe	const struct vmbus_message *msg;
201302607Ssephe	struct vmbus_chanmsg_chopen *req;
202302607Ssephe	struct vmbus_msghc *mh;
203302607Ssephe	uint32_t status;
204250199Sgrehan	int ret = 0;
205250199Sgrehan	void *in, *out;
206250199Sgrehan
207302607Ssephe	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
208302607Ssephe		device_printf(sc->vmbus_dev,
209302607Ssephe		    "invalid udata len %u for chan%u\n",
210302693Ssephe		    user_data_len, new_channel->ch_id);
211302607Ssephe		return EINVAL;
212302607Ssephe	}
213302607Ssephe
214302812Ssephe	if (atomic_testandset_int(&new_channel->ch_stflags,
215302812Ssephe	    VMBUS_CHAN_ST_OPENED_SHIFT))
216302812Ssephe		panic("double-open chan%u", new_channel->ch_id);
217282212Swhu
218250199Sgrehan	new_channel->on_channel_callback = pfn_on_channel_callback;
219250199Sgrehan	new_channel->channel_callback_context = context;
220250199Sgrehan
221302692Ssephe	vmbus_chan_update_evtflagcnt(sc, new_channel);
222300102Ssephe
223302557Ssephe	new_channel->rxq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
224300646Ssephe	    new_channel->target_cpu);
225302713Ssephe	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
226302713Ssephe		TASK_INIT(&new_channel->channel_task, 0,
227302713Ssephe		    vmbus_chan_task, new_channel);
228302713Ssephe	} else {
229302713Ssephe		TASK_INIT(&new_channel->channel_task, 0,
230302713Ssephe		    vmbus_chan_task_nobatch, new_channel);
231302713Ssephe	}
232294886Ssephe
233250199Sgrehan	/* Allocate the ring buffer */
234250199Sgrehan	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
235256350Sgrehan	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
236250199Sgrehan	KASSERT(out != NULL,
237250199Sgrehan	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
238302812Ssephe	if (out == NULL) {
239302812Ssephe		ret = ENOMEM;
240302812Ssephe		goto failed;
241302812Ssephe	}
242250199Sgrehan
243250199Sgrehan	in = ((uint8_t *) out + send_ring_buffer_size);
244250199Sgrehan
245250199Sgrehan	new_channel->ring_buffer_pages = out;
246256350Sgrehan	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
247256350Sgrehan	    recv_ring_buffer_size) >> PAGE_SHIFT;
248256350Sgrehan	new_channel->ring_buffer_size = send_ring_buffer_size +
249256350Sgrehan	    recv_ring_buffer_size;
250250199Sgrehan
251250199Sgrehan	hv_vmbus_ring_buffer_init(
252250199Sgrehan		&new_channel->outbound,
253250199Sgrehan		out,
254250199Sgrehan		send_ring_buffer_size);
255250199Sgrehan
256250199Sgrehan	hv_vmbus_ring_buffer_init(
257250199Sgrehan		&new_channel->inbound,
258250199Sgrehan		in,
259250199Sgrehan		recv_ring_buffer_size);
260250199Sgrehan
261296290Ssephe	/* Create sysctl tree for this channel */
262296290Ssephe	vmbus_channel_sysctl_create(new_channel);
263296181Ssephe
264250199Sgrehan	/**
265250199Sgrehan	 * Establish the gpadl for the ring buffer
266250199Sgrehan	 */
267250199Sgrehan	new_channel->ring_buffer_gpadl_handle = 0;
268250199Sgrehan
269250199Sgrehan	ret = hv_vmbus_channel_establish_gpadl(new_channel,
270250199Sgrehan		new_channel->outbound.ring_buffer,
271250199Sgrehan		send_ring_buffer_size + recv_ring_buffer_size,
272250199Sgrehan		&new_channel->ring_buffer_gpadl_handle);
273250199Sgrehan
274302607Ssephe	/*
275302607Ssephe	 * Open channel w/ the bufring GPADL on the target CPU.
276250199Sgrehan	 */
277302607Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
278302607Ssephe	if (mh == NULL) {
279302607Ssephe		device_printf(sc->vmbus_dev,
280302607Ssephe		    "can not get msg hypercall for chopen(chan%u)\n",
281302693Ssephe		    new_channel->ch_id);
282302812Ssephe		ret = ENXIO;
283302812Ssephe		goto failed;
284302607Ssephe	}
285250199Sgrehan
286302607Ssephe	req = vmbus_msghc_dataptr(mh);
287302607Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
288302693Ssephe	req->chm_chanid = new_channel->ch_id;
289302693Ssephe	req->chm_openid = new_channel->ch_id;
290302607Ssephe	req->chm_gpadl = new_channel->ring_buffer_gpadl_handle;
291302607Ssephe	req->chm_vcpuid = new_channel->target_vcpu;
292302607Ssephe	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
293250199Sgrehan	if (user_data_len)
294302607Ssephe		memcpy(req->chm_udata, user_data, user_data_len);
295250199Sgrehan
296302607Ssephe	ret = vmbus_msghc_exec(sc, mh);
297302607Ssephe	if (ret != 0) {
298302607Ssephe		device_printf(sc->vmbus_dev,
299302607Ssephe		    "chopen(chan%u) msg hypercall exec failed: %d\n",
300302693Ssephe		    new_channel->ch_id, ret);
301302607Ssephe		vmbus_msghc_put(sc, mh);
302302812Ssephe		goto failed;
303302607Ssephe	}
304250199Sgrehan
305302607Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
306302607Ssephe	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
307302607Ssephe	status = resp->chm_status;
308250199Sgrehan
309302607Ssephe	vmbus_msghc_put(sc, mh);
310250199Sgrehan
311302607Ssephe	if (status == 0) {
312302607Ssephe		if (bootverbose) {
313302607Ssephe			device_printf(sc->vmbus_dev, "chan%u opened\n",
314302693Ssephe			    new_channel->ch_id);
315302607Ssephe		}
316302812Ssephe		return 0;
317250199Sgrehan	}
318302812Ssephe
319302812Ssephe	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
320302812Ssephe	    new_channel->ch_id);
321302812Ssephe	ret = ENXIO;
322302812Ssephe
323302812Ssephefailed:
324302812Ssephe	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
325302812Ssephe	return ret;
326250199Sgrehan}
327250199Sgrehan
328250199Sgrehan/**
329302609Ssephe * @brief Establish a GPADL for the specified buffer
330250199Sgrehan */
331302609Ssepheint
332302609Ssephehv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
333302609Ssephe    void *contig_buffer, uint32_t size, uint32_t *gpadl0)
334250199Sgrehan{
335302609Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
336302609Ssephe	struct vmbus_msghc *mh;
337302609Ssephe	struct vmbus_chanmsg_gpadl_conn *req;
338302609Ssephe	const struct vmbus_message *msg;
339302609Ssephe	size_t reqsz;
340302609Ssephe	uint32_t gpadl, status;
341302609Ssephe	int page_count, range_len, i, cnt, error;
342302609Ssephe	uint64_t page_id, paddr;
343250199Sgrehan
344302609Ssephe	/*
345302609Ssephe	 * Preliminary checks.
346302609Ssephe	 */
347250199Sgrehan
348302609Ssephe	KASSERT((size & PAGE_MASK) == 0,
349302609Ssephe	    ("invalid GPA size %u, not multiple page size", size));
350250199Sgrehan	page_count = size >> PAGE_SHIFT;
351250199Sgrehan
352302609Ssephe	paddr = hv_get_phys_addr(contig_buffer);
353302609Ssephe	KASSERT((paddr & PAGE_MASK) == 0,
354302609Ssephe	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
355302609Ssephe	page_id = paddr >> PAGE_SHIFT;
356250199Sgrehan
357302609Ssephe	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
358302609Ssephe	/*
359302609Ssephe	 * We don't support multiple GPA ranges.
360302609Ssephe	 */
361302609Ssephe	if (range_len > UINT16_MAX) {
362302609Ssephe		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
363302609Ssephe		    page_count);
364302609Ssephe		return EOPNOTSUPP;
365250199Sgrehan	}
366250199Sgrehan
367302609Ssephe	/*
368302609Ssephe	 * Allocate GPADL id.
369302609Ssephe	 */
370302630Ssephe	gpadl = vmbus_gpadl_alloc(sc);
371302609Ssephe	*gpadl0 = gpadl;
372250199Sgrehan
373302609Ssephe	/*
374302609Ssephe	 * Connect this GPADL to the target channel.
375302609Ssephe	 *
376302609Ssephe	 * NOTE:
377302609Ssephe	 * Since each message can only hold small set of page
378302609Ssephe	 * addresses, several messages may be required to
379302609Ssephe	 * complete the connection.
380302609Ssephe	 */
381302609Ssephe	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
382302609Ssephe		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
383302609Ssephe	else
384302609Ssephe		cnt = page_count;
385302609Ssephe	page_count -= cnt;
386250199Sgrehan
387302609Ssephe	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
388302609Ssephe	    chm_range.gpa_page[cnt]);
389302609Ssephe	mh = vmbus_msghc_get(sc, reqsz);
390302609Ssephe	if (mh == NULL) {
391302609Ssephe		device_printf(sc->vmbus_dev,
392302609Ssephe		    "can not get msg hypercall for gpadl->chan%u\n",
393302693Ssephe		    channel->ch_id);
394302609Ssephe		return EIO;
395250199Sgrehan	}
396250199Sgrehan
397302609Ssephe	req = vmbus_msghc_dataptr(mh);
398302609Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
399302693Ssephe	req->chm_chanid = channel->ch_id;
400302609Ssephe	req->chm_gpadl = gpadl;
401302609Ssephe	req->chm_range_len = range_len;
402302609Ssephe	req->chm_range_cnt = 1;
403302609Ssephe	req->chm_range.gpa_len = size;
404302609Ssephe	req->chm_range.gpa_ofs = 0;
405302609Ssephe	for (i = 0; i < cnt; ++i)
406302609Ssephe		req->chm_range.gpa_page[i] = page_id++;
407250199Sgrehan
408302609Ssephe	error = vmbus_msghc_exec(sc, mh);
409302609Ssephe	if (error) {
410302609Ssephe		device_printf(sc->vmbus_dev,
411302609Ssephe		    "gpadl->chan%u msg hypercall exec failed: %d\n",
412302693Ssephe		    channel->ch_id, error);
413302609Ssephe		vmbus_msghc_put(sc, mh);
414302609Ssephe		return error;
415302609Ssephe	}
416250199Sgrehan
417302609Ssephe	while (page_count > 0) {
418302609Ssephe		struct vmbus_chanmsg_gpadl_subconn *subreq;
419250199Sgrehan
420302609Ssephe		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
421302609Ssephe			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
422302609Ssephe		else
423302609Ssephe			cnt = page_count;
424302609Ssephe		page_count -= cnt;
425250199Sgrehan
426302609Ssephe		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
427302609Ssephe		    chm_gpa_page[cnt]);
428302609Ssephe		vmbus_msghc_reset(mh, reqsz);
429250199Sgrehan
430302609Ssephe		subreq = vmbus_msghc_dataptr(mh);
431302609Ssephe		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
432302609Ssephe		subreq->chm_gpadl = gpadl;
433302609Ssephe		for (i = 0; i < cnt; ++i)
434302609Ssephe			subreq->chm_gpa_page[i] = page_id++;
435250199Sgrehan
436302609Ssephe		vmbus_msghc_exec_noresult(mh);
437250199Sgrehan	}
438302609Ssephe	KASSERT(page_count == 0, ("invalid page count %d", page_count));
439250199Sgrehan
440302609Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
441302609Ssephe	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
442302609Ssephe	    msg->msg_data)->chm_status;
443250199Sgrehan
444302609Ssephe	vmbus_msghc_put(sc, mh);
445250199Sgrehan
446302609Ssephe	if (status != 0) {
447302609Ssephe		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
448302693Ssephe		    "status %u\n", channel->ch_id, status);
449302609Ssephe		return EIO;
450302632Ssephe	} else {
451302632Ssephe		if (bootverbose) {
452302632Ssephe			device_printf(sc->vmbus_dev, "gpadl->chan%u "
453302693Ssephe			    "succeeded\n", channel->ch_id);
454302632Ssephe		}
455302609Ssephe	}
456302609Ssephe	return 0;
457250199Sgrehan}
458250199Sgrehan
459302611Ssephe/*
460302611Ssephe * Disconnect the GPA from the target channel
461250199Sgrehan */
462250199Sgrehanint
463302611Ssephehv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
464250199Sgrehan{
465302611Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
466302611Ssephe	struct vmbus_msghc *mh;
467302611Ssephe	struct vmbus_chanmsg_gpadl_disconn *req;
468302611Ssephe	int error;
469250199Sgrehan
470302611Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
471302611Ssephe	if (mh == NULL) {
472302611Ssephe		device_printf(sc->vmbus_dev,
473302611Ssephe		    "can not get msg hypercall for gpa x->chan%u\n",
474302693Ssephe		    chan->ch_id);
475302611Ssephe		return EBUSY;
476250199Sgrehan	}
477250199Sgrehan
478302611Ssephe	req = vmbus_msghc_dataptr(mh);
479302611Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
480302693Ssephe	req->chm_chanid = chan->ch_id;
481302611Ssephe	req->chm_gpadl = gpadl;
482250199Sgrehan
483302611Ssephe	error = vmbus_msghc_exec(sc, mh);
484302611Ssephe	if (error) {
485302611Ssephe		device_printf(sc->vmbus_dev,
486302611Ssephe		    "gpa x->chan%u msg hypercall exec failed: %d\n",
487302693Ssephe		    chan->ch_id, error);
488302611Ssephe		vmbus_msghc_put(sc, mh);
489302611Ssephe		return error;
490302611Ssephe	}
491250199Sgrehan
492302611Ssephe	vmbus_msghc_wait_result(sc, mh);
493302611Ssephe	/* Discard result; no useful information */
494302611Ssephe	vmbus_msghc_put(sc, mh);
495250199Sgrehan
496302611Ssephe	return 0;
497250199Sgrehan}
498250199Sgrehan
499282212Swhustatic void
500282212Swhuhv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
501250199Sgrehan{
502302610Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
503302610Ssephe	struct vmbus_msghc *mh;
504302610Ssephe	struct vmbus_chanmsg_chclose *req;
505294886Ssephe	struct taskqueue *rxq = channel->rxq;
506302610Ssephe	int error;
507250199Sgrehan
508302812Ssephe	/* TODO: stringent check */
509302812Ssephe	atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
510302812Ssephe
511302633Ssephe	sysctl_ctx_free(&channel->ch_sysctl_ctx);
512282212Swhu
513282212Swhu	/*
514294886Ssephe	 * set rxq to NULL to avoid more requests be scheduled
515294886Ssephe	 */
516294886Ssephe	channel->rxq = NULL;
517294886Ssephe	taskqueue_drain(rxq, &channel->channel_task);
518250199Sgrehan	channel->on_channel_callback = NULL;
519250199Sgrehan
520250199Sgrehan	/**
521250199Sgrehan	 * Send a closing message
522250199Sgrehan	 */
523250199Sgrehan
524302610Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
525302610Ssephe	if (mh == NULL) {
526302610Ssephe		device_printf(sc->vmbus_dev,
527302610Ssephe		    "can not get msg hypercall for chclose(chan%u)\n",
528302693Ssephe		    channel->ch_id);
529302610Ssephe		return;
530302610Ssephe	}
531250199Sgrehan
532302610Ssephe	req = vmbus_msghc_dataptr(mh);
533302610Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
534302693Ssephe	req->chm_chanid = channel->ch_id;
535250199Sgrehan
536302610Ssephe	error = vmbus_msghc_exec_noresult(mh);
537302610Ssephe	vmbus_msghc_put(sc, mh);
538302610Ssephe
539302610Ssephe	if (error) {
540302610Ssephe		device_printf(sc->vmbus_dev,
541302610Ssephe		    "chclose(chan%u) msg hypercall exec failed: %d\n",
542302693Ssephe		    channel->ch_id, error);
543302610Ssephe		return;
544302610Ssephe	} else if (bootverbose) {
545302610Ssephe		device_printf(sc->vmbus_dev, "close chan%u\n",
546302693Ssephe		    channel->ch_id);
547302610Ssephe	}
548302610Ssephe
549250199Sgrehan	/* Tear down the gpadl for the channel's ring buffer */
550250199Sgrehan	if (channel->ring_buffer_gpadl_handle) {
551250199Sgrehan		hv_vmbus_channel_teardown_gpdal(channel,
552250199Sgrehan			channel->ring_buffer_gpadl_handle);
553250199Sgrehan	}
554250199Sgrehan
555250199Sgrehan	/* TODO: Send a msg to release the childRelId */
556250199Sgrehan
557250199Sgrehan	/* cleanup the ring buffers for this channel */
558250199Sgrehan	hv_ring_buffer_cleanup(&channel->outbound);
559250199Sgrehan	hv_ring_buffer_cleanup(&channel->inbound);
560250199Sgrehan
561256350Sgrehan	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
562256350Sgrehan	    M_DEVBUF);
563282212Swhu}
564250199Sgrehan
565302818Ssephe/*
566302818Ssephe * Caller should make sure that all sub-channels have
567302818Ssephe * been added to 'chan' and all to-be-closed channels
568302818Ssephe * are not being opened.
569282212Swhu */
570282212Swhuvoid
571302818Ssephehv_vmbus_channel_close(struct hv_vmbus_channel *chan)
572282212Swhu{
573302818Ssephe	int subchan_cnt;
574282212Swhu
575302818Ssephe	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
576282212Swhu		/*
577302818Ssephe		 * Sub-channel is closed when its primary channel
578302818Ssephe		 * is closed; done.
579282212Swhu		 */
580282212Swhu		return;
581282212Swhu	}
582282212Swhu
583250199Sgrehan	/*
584302818Ssephe	 * Close all sub-channels, if any.
585250199Sgrehan	 */
586302819Ssephe	subchan_cnt = chan->ch_subchan_cnt;
587302818Ssephe	if (subchan_cnt > 0) {
588302818Ssephe		struct hv_vmbus_channel **subchan;
589302818Ssephe		int i;
590302818Ssephe
591302818Ssephe		subchan = vmbus_get_subchan(chan, subchan_cnt);
592302818Ssephe		for (i = 0; i < subchan_cnt; ++i)
593302818Ssephe			hv_vmbus_channel_close_internal(subchan[i]);
594302818Ssephe		vmbus_rel_subchan(subchan, subchan_cnt);
595250199Sgrehan	}
596302818Ssephe
597302818Ssephe	/* Then close the primary channel. */
598302818Ssephe	hv_vmbus_channel_close_internal(chan);
599250199Sgrehan}
600250199Sgrehan
601250199Sgrehan/**
602250199Sgrehan * @brief Send the specified buffer on the given channel
603250199Sgrehan */
604250199Sgrehanint
605250199Sgrehanhv_vmbus_channel_send_packet(
606250199Sgrehan	hv_vmbus_channel*	channel,
607250199Sgrehan	void*			buffer,
608250199Sgrehan	uint32_t		buffer_len,
609250199Sgrehan	uint64_t		request_id,
610250199Sgrehan	hv_vmbus_packet_type	type,
611250199Sgrehan	uint32_t		flags)
612250199Sgrehan{
613250199Sgrehan	int			ret = 0;
614250199Sgrehan	hv_vm_packet_descriptor	desc;
615250199Sgrehan	uint32_t		packet_len;
616250199Sgrehan	uint64_t		aligned_data;
617250199Sgrehan	uint32_t		packet_len_aligned;
618282212Swhu	boolean_t		need_sig;
619302870Ssephe	struct iovec		iov[3];
620250199Sgrehan
621250199Sgrehan	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
622250199Sgrehan	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
623250199Sgrehan	aligned_data = 0;
624250199Sgrehan
625250199Sgrehan	/* Setup the descriptor */
626250199Sgrehan	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
627250199Sgrehan	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
628250199Sgrehan			    /* in 8-bytes granularity */
629250199Sgrehan	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
630250199Sgrehan	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
631250199Sgrehan	desc.transaction_id = request_id;
632250199Sgrehan
633302870Ssephe	iov[0].iov_base = &desc;
634302870Ssephe	iov[0].iov_len = sizeof(hv_vm_packet_descriptor);
635250199Sgrehan
636302870Ssephe	iov[1].iov_base = buffer;
637302870Ssephe	iov[1].iov_len = buffer_len;
638250199Sgrehan
639302870Ssephe	iov[2].iov_base = &aligned_data;
640302870Ssephe	iov[2].iov_len = packet_len_aligned - packet_len;
641250199Sgrehan
642302870Ssephe	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
643250199Sgrehan
644250199Sgrehan	/* TODO: We should determine if this is optional */
645302731Ssephe	if (ret == 0 && need_sig)
646302731Ssephe		vmbus_chan_send_event(channel);
647250199Sgrehan
648250199Sgrehan	return (ret);
649250199Sgrehan}
650250199Sgrehan
651250199Sgrehan/**
652250199Sgrehan * @brief Send a range of single-page buffer packets using
653250199Sgrehan * a GPADL Direct packet type
654250199Sgrehan */
655250199Sgrehanint
656250199Sgrehanhv_vmbus_channel_send_packet_pagebuffer(
657250199Sgrehan	hv_vmbus_channel*	channel,
658250199Sgrehan	hv_vmbus_page_buffer	page_buffers[],
659250199Sgrehan	uint32_t		page_count,
660250199Sgrehan	void*			buffer,
661250199Sgrehan	uint32_t		buffer_len,
662250199Sgrehan	uint64_t		request_id)
663250199Sgrehan{
664250199Sgrehan
665250199Sgrehan	int					ret = 0;
666282212Swhu	boolean_t				need_sig;
667250199Sgrehan	uint32_t				packet_len;
668294705Ssephe	uint32_t				page_buflen;
669250199Sgrehan	uint32_t				packetLen_aligned;
670302870Ssephe	struct iovec				iov[4];
671250199Sgrehan	hv_vmbus_channel_packet_page_buffer	desc;
672250199Sgrehan	uint32_t				descSize;
673250199Sgrehan	uint64_t				alignedData = 0;
674250199Sgrehan
675250199Sgrehan	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
676250199Sgrehan		return (EINVAL);
677250199Sgrehan
678250199Sgrehan	/*
679250199Sgrehan	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
680250199Sgrehan	 *  is the largest size we support
681250199Sgrehan	 */
682294705Ssephe	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
683294705Ssephe	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
684294705Ssephe	packet_len = descSize + page_buflen + buffer_len;
685250199Sgrehan	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
686250199Sgrehan
687250199Sgrehan	/* Setup the descriptor */
688250199Sgrehan	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
689250199Sgrehan	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
690294705Ssephe	/* in 8-bytes granularity */
691294705Ssephe	desc.data_offset8 = (descSize + page_buflen) >> 3;
692250199Sgrehan	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
693250199Sgrehan	desc.transaction_id = request_id;
694250199Sgrehan	desc.range_count = page_count;
695250199Sgrehan
696302870Ssephe	iov[0].iov_base = &desc;
697302870Ssephe	iov[0].iov_len = descSize;
698250199Sgrehan
699302870Ssephe	iov[1].iov_base = page_buffers;
700302870Ssephe	iov[1].iov_len = page_buflen;
701250199Sgrehan
702302870Ssephe	iov[2].iov_base = buffer;
703302870Ssephe	iov[2].iov_len = buffer_len;
704250199Sgrehan
705302870Ssephe	iov[3].iov_base = &alignedData;
706302870Ssephe	iov[3].iov_len = packetLen_aligned - packet_len;
707294705Ssephe
708302870Ssephe	ret = hv_ring_buffer_write(&channel->outbound, iov, 4, &need_sig);
709250199Sgrehan
710250199Sgrehan	/* TODO: We should determine if this is optional */
711302731Ssephe	if (ret == 0 && need_sig)
712302731Ssephe		vmbus_chan_send_event(channel);
713250199Sgrehan
714250199Sgrehan	return (ret);
715250199Sgrehan}
716250199Sgrehan
717250199Sgrehan/**
718250199Sgrehan * @brief Send a multi-page buffer packet using a GPADL Direct packet type
719250199Sgrehan */
720250199Sgrehanint
721250199Sgrehanhv_vmbus_channel_send_packet_multipagebuffer(
722250199Sgrehan	hv_vmbus_channel*		channel,
723250199Sgrehan	hv_vmbus_multipage_buffer*	multi_page_buffer,
724250199Sgrehan	void*				buffer,
725250199Sgrehan	uint32_t			buffer_len,
726250199Sgrehan	uint64_t			request_id)
727250199Sgrehan{
728250199Sgrehan
729250199Sgrehan	int			ret = 0;
730250199Sgrehan	uint32_t		desc_size;
731282212Swhu	boolean_t		need_sig;
732250199Sgrehan	uint32_t		packet_len;
733250199Sgrehan	uint32_t		packet_len_aligned;
734250199Sgrehan	uint32_t		pfn_count;
735250199Sgrehan	uint64_t		aligned_data = 0;
736302870Ssephe	struct iovec		iov[3];
737250199Sgrehan	hv_vmbus_channel_packet_multipage_buffer desc;
738250199Sgrehan
739250199Sgrehan	pfn_count =
740250199Sgrehan	    HV_NUM_PAGES_SPANNED(
741250199Sgrehan		    multi_page_buffer->offset,
742250199Sgrehan		    multi_page_buffer->length);
743250199Sgrehan
744250199Sgrehan	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
745250199Sgrehan	    return (EINVAL);
746250199Sgrehan	/*
747250199Sgrehan	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
748250199Sgrehan	 * is the largest size we support
749250199Sgrehan	 */
750250199Sgrehan	desc_size =
751250199Sgrehan	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
752250199Sgrehan		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
753250199Sgrehan			sizeof(uint64_t));
754250199Sgrehan	packet_len = desc_size + buffer_len;
755250199Sgrehan	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
756250199Sgrehan
757250199Sgrehan	/*
758250199Sgrehan	 * Setup the descriptor
759250199Sgrehan	 */
760250199Sgrehan	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
761250199Sgrehan	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
762250199Sgrehan	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
763250199Sgrehan	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
764250199Sgrehan	desc.transaction_id = request_id;
765250199Sgrehan	desc.range_count = 1;
766250199Sgrehan
767250199Sgrehan	desc.range.length = multi_page_buffer->length;
768250199Sgrehan	desc.range.offset = multi_page_buffer->offset;
769250199Sgrehan
770250199Sgrehan	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
771250199Sgrehan		pfn_count * sizeof(uint64_t));
772250199Sgrehan
773302870Ssephe	iov[0].iov_base = &desc;
774302870Ssephe	iov[0].iov_len = desc_size;
775250199Sgrehan
776302870Ssephe	iov[1].iov_base = buffer;
777302870Ssephe	iov[1].iov_len = buffer_len;
778250199Sgrehan
779302870Ssephe	iov[2].iov_base = &aligned_data;
780302870Ssephe	iov[2].iov_len = packet_len_aligned - packet_len;
781250199Sgrehan
782302870Ssephe	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
783250199Sgrehan
784250199Sgrehan	/* TODO: We should determine if this is optional */
785302731Ssephe	if (ret == 0 && need_sig)
786302731Ssephe		vmbus_chan_send_event(channel);
787250199Sgrehan
788250199Sgrehan	return (ret);
789250199Sgrehan}
790250199Sgrehan
791250199Sgrehan/**
792250199Sgrehan * @brief Retrieve the user packet on the specified channel
793250199Sgrehan */
794250199Sgrehanint
795250199Sgrehanhv_vmbus_channel_recv_packet(
796250199Sgrehan	hv_vmbus_channel*	channel,
797250199Sgrehan	void*			Buffer,
798250199Sgrehan	uint32_t		buffer_len,
799250199Sgrehan	uint32_t*		buffer_actual_len,
800250199Sgrehan	uint64_t*		request_id)
801250199Sgrehan{
802250199Sgrehan	int			ret;
803250199Sgrehan	uint32_t		user_len;
804250199Sgrehan	uint32_t		packet_len;
805250199Sgrehan	hv_vm_packet_descriptor	desc;
806250199Sgrehan
807250199Sgrehan	*buffer_actual_len = 0;
808250199Sgrehan	*request_id = 0;
809250199Sgrehan
810250199Sgrehan	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
811250199Sgrehan		sizeof(hv_vm_packet_descriptor));
812250199Sgrehan	if (ret != 0)
813250199Sgrehan		return (0);
814250199Sgrehan
815250199Sgrehan	packet_len = desc.length8 << 3;
816250199Sgrehan	user_len = packet_len - (desc.data_offset8 << 3);
817250199Sgrehan
818250199Sgrehan	*buffer_actual_len = user_len;
819250199Sgrehan
820250199Sgrehan	if (user_len > buffer_len)
821250199Sgrehan		return (EINVAL);
822250199Sgrehan
823250199Sgrehan	*request_id = desc.transaction_id;
824250199Sgrehan
825250199Sgrehan	/* Copy over the packet to the user buffer */
826250199Sgrehan	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
827250199Sgrehan		(desc.data_offset8 << 3));
828250199Sgrehan
829250199Sgrehan	return (0);
830250199Sgrehan}
831250199Sgrehan
832250199Sgrehan/**
833250199Sgrehan * @brief Retrieve the raw packet on the specified channel
834250199Sgrehan */
835250199Sgrehanint
836250199Sgrehanhv_vmbus_channel_recv_packet_raw(
837250199Sgrehan	hv_vmbus_channel*	channel,
838250199Sgrehan	void*			buffer,
839250199Sgrehan	uint32_t		buffer_len,
840250199Sgrehan	uint32_t*		buffer_actual_len,
841250199Sgrehan	uint64_t*		request_id)
842250199Sgrehan{
843250199Sgrehan	int		ret;
844250199Sgrehan	uint32_t	packetLen;
845250199Sgrehan	hv_vm_packet_descriptor	desc;
846250199Sgrehan
847250199Sgrehan	*buffer_actual_len = 0;
848250199Sgrehan	*request_id = 0;
849250199Sgrehan
850250199Sgrehan	ret = hv_ring_buffer_peek(
851250199Sgrehan		&channel->inbound, &desc,
852250199Sgrehan		sizeof(hv_vm_packet_descriptor));
853250199Sgrehan
854250199Sgrehan	if (ret != 0)
855250199Sgrehan	    return (0);
856250199Sgrehan
857250199Sgrehan	packetLen = desc.length8 << 3;
858250199Sgrehan	*buffer_actual_len = packetLen;
859250199Sgrehan
860250199Sgrehan	if (packetLen > buffer_len)
861250199Sgrehan	    return (ENOBUFS);
862250199Sgrehan
863250199Sgrehan	*request_id = desc.transaction_id;
864250199Sgrehan
865250199Sgrehan	/* Copy over the entire packet to the user buffer */
866250199Sgrehan	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
867250199Sgrehan
868250199Sgrehan	return (0);
869250199Sgrehan}
870294886Ssephe
871294886Ssephestatic void
872302713Ssephevmbus_chan_task(void *xchan, int pending __unused)
873294886Ssephe{
874302713Ssephe	struct hv_vmbus_channel *chan = xchan;
875302713Ssephe	void (*callback)(void *);
876302713Ssephe	void *arg;
877294886Ssephe
878302713Ssephe	arg = chan->channel_callback_context;
879302713Ssephe	callback = chan->on_channel_callback;
880302709Ssephe
881302710Ssephe	/*
882302710Ssephe	 * Optimize host to guest signaling by ensuring:
883302710Ssephe	 * 1. While reading the channel, we disable interrupts from
884302710Ssephe	 *    host.
885302710Ssephe	 * 2. Ensure that we process all posted messages from the host
886302710Ssephe	 *    before returning from this callback.
887302710Ssephe	 * 3. Once we return, enable signaling from the host. Once this
888302710Ssephe	 *    state is set we check to see if additional packets are
889302710Ssephe	 *    available to read. In this case we repeat the process.
890302713Ssephe	 *
891302713Ssephe	 * NOTE: Interrupt has been disabled in the ISR.
892302710Ssephe	 */
893302713Ssephe	for (;;) {
894302713Ssephe		uint32_t left;
895294886Ssephe
896302713Ssephe		callback(arg);
897294886Ssephe
898302713Ssephe		left = hv_ring_buffer_read_end(&chan->inbound);
899302713Ssephe		if (left == 0) {
900302713Ssephe			/* No more data in RX bufring; done */
901302713Ssephe			break;
902302713Ssephe		}
903302713Ssephe		hv_ring_buffer_read_begin(&chan->inbound);
904302713Ssephe	}
905294886Ssephe}
906302692Ssephe
907302713Ssephestatic void
908302713Ssephevmbus_chan_task_nobatch(void *xchan, int pending __unused)
909302713Ssephe{
910302713Ssephe	struct hv_vmbus_channel *chan = xchan;
911302713Ssephe
912302713Ssephe	chan->on_channel_callback(chan->channel_callback_context);
913302713Ssephe}
914302713Ssephe
915302692Ssephestatic __inline void
916302692Ssephevmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
917302692Ssephe    int flag_cnt)
918302692Ssephe{
919302692Ssephe	int f;
920302692Ssephe
921302692Ssephe	for (f = 0; f < flag_cnt; ++f) {
922302806Ssephe		uint32_t chid_base;
923302692Ssephe		u_long flags;
924302806Ssephe		int chid_ofs;
925302692Ssephe
926302692Ssephe		if (event_flags[f] == 0)
927302692Ssephe			continue;
928302692Ssephe
929302692Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
930302806Ssephe		chid_base = f << VMBUS_EVTFLAG_SHIFT;
931302692Ssephe
932302806Ssephe		while ((chid_ofs = ffsl(flags)) != 0) {
933302692Ssephe			struct hv_vmbus_channel *channel;
934302692Ssephe
935302806Ssephe			--chid_ofs; /* NOTE: ffsl is 1-based */
936302806Ssephe			flags &= ~(1UL << chid_ofs);
937302692Ssephe
938302806Ssephe			channel = sc->vmbus_chmap[chid_base + chid_ofs];
939302692Ssephe
940302692Ssephe			/* if channel is closed or closing */
941302692Ssephe			if (channel == NULL || channel->rxq == NULL)
942302692Ssephe				continue;
943302692Ssephe
944302709Ssephe			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
945302692Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
946302692Ssephe			taskqueue_enqueue(channel->rxq, &channel->channel_task);
947302692Ssephe		}
948302692Ssephe	}
949302692Ssephe}
950302692Ssephe
951302692Ssephevoid
952302692Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
953302692Ssephe{
954302692Ssephe	struct vmbus_evtflags *eventf;
955302692Ssephe
956302692Ssephe	/*
957302692Ssephe	 * On Host with Win8 or above, the event page can be checked directly
958302692Ssephe	 * to get the id of the channel that has the pending interrupt.
959302692Ssephe	 */
960302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
961302692Ssephe	vmbus_event_flags_proc(sc, eventf->evt_flags,
962302692Ssephe	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
963302692Ssephe}
964302692Ssephe
965302692Ssephevoid
966302692Ssephevmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
967302692Ssephe{
968302692Ssephe	struct vmbus_evtflags *eventf;
969302692Ssephe
970302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
971302692Ssephe	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
972302692Ssephe		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
973302692Ssephe		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
974302692Ssephe	}
975302692Ssephe}
976302692Ssephe
977302692Ssephestatic void
978302692Ssephevmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
979302692Ssephe    const struct hv_vmbus_channel *chan)
980302692Ssephe{
981302692Ssephe	volatile int *flag_cnt_ptr;
982302692Ssephe	int flag_cnt;
983302692Ssephe
984302693Ssephe	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
985302692Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->target_cpu);
986302692Ssephe
987302692Ssephe	for (;;) {
988302692Ssephe		int old_flag_cnt;
989302692Ssephe
990302692Ssephe		old_flag_cnt = *flag_cnt_ptr;
991302692Ssephe		if (old_flag_cnt >= flag_cnt)
992302692Ssephe			break;
993302692Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
994302692Ssephe			if (bootverbose) {
995302692Ssephe				device_printf(sc->vmbus_dev,
996302692Ssephe				    "channel%u update cpu%d flag_cnt to %d\n",
997302693Ssephe				    chan->ch_id,
998302692Ssephe				    chan->target_cpu, flag_cnt);
999302692Ssephe			}
1000302692Ssephe			break;
1001302692Ssephe		}
1002302692Ssephe	}
1003302692Ssephe}
1004302864Ssephe
1005302864Ssephestatic struct hv_vmbus_channel *
1006302864Ssephevmbus_chan_alloc(struct vmbus_softc *sc)
1007302864Ssephe{
1008302864Ssephe	struct hv_vmbus_channel *chan;
1009302864Ssephe
1010302864Ssephe	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
1011302864Ssephe
1012302864Ssephe	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
1013302864Ssephe	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
1014302864Ssephe	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
1015302864Ssephe	if (chan->ch_monprm == NULL) {
1016302864Ssephe		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
1017302864Ssephe		free(chan, M_DEVBUF);
1018302864Ssephe		return NULL;
1019302864Ssephe	}
1020302864Ssephe
1021302864Ssephe	chan->vmbus_sc = sc;
1022302864Ssephe	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
1023302864Ssephe	TAILQ_INIT(&chan->ch_subchans);
1024302864Ssephe	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
1025302864Ssephe
1026302864Ssephe	return chan;
1027302864Ssephe}
1028302864Ssephe
1029302864Ssephestatic void
1030302864Ssephevmbus_chan_free(struct hv_vmbus_channel *chan)
1031302864Ssephe{
1032302864Ssephe	/* TODO: assert sub-channel list is empty */
1033302864Ssephe	/* TODO: asset no longer on the primary channel's sub-channel list */
1034302864Ssephe	/* TODO: asset no longer on the vmbus channel list */
1035302864Ssephe	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
1036302864Ssephe	mtx_destroy(&chan->ch_subchan_lock);
1037302864Ssephe	free(chan, M_DEVBUF);
1038302864Ssephe}
1039302864Ssephe
1040302864Ssephestatic int
1041302864Ssephevmbus_chan_add(struct hv_vmbus_channel *newchan)
1042302864Ssephe{
1043302864Ssephe	struct vmbus_softc *sc = newchan->vmbus_sc;
1044302864Ssephe	struct hv_vmbus_channel *prichan;
1045302864Ssephe
1046302864Ssephe	if (newchan->ch_id == 0) {
1047302864Ssephe		/*
1048302864Ssephe		 * XXX
1049302864Ssephe		 * Chan0 will neither be processed nor should be offered;
1050302864Ssephe		 * skip it.
1051302864Ssephe		 */
1052302864Ssephe		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
1053302864Ssephe		return EINVAL;
1054302864Ssephe	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
1055302864Ssephe		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
1056302864Ssephe		    newchan->ch_id);
1057302864Ssephe		return EINVAL;
1058302864Ssephe	}
1059302864Ssephe	sc->vmbus_chmap[newchan->ch_id] = newchan;
1060302864Ssephe
1061302864Ssephe	if (bootverbose) {
1062302864Ssephe		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
1063302864Ssephe		    newchan->ch_id, newchan->ch_subidx);
1064302864Ssephe	}
1065302864Ssephe
1066302864Ssephe	mtx_lock(&sc->vmbus_prichan_lock);
1067302864Ssephe	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
1068302864Ssephe		/*
1069302864Ssephe		 * Sub-channel will have the same type GUID and instance
1070302864Ssephe		 * GUID as its primary channel.
1071302864Ssephe		 */
1072302864Ssephe		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1073302864Ssephe		    sizeof(struct hyperv_guid)) == 0 &&
1074302864Ssephe		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1075302864Ssephe		    sizeof(struct hyperv_guid)) == 0)
1076302864Ssephe			break;
1077302864Ssephe	}
1078302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1079302864Ssephe		if (prichan == NULL) {
1080302864Ssephe			/* Install the new primary channel */
1081302864Ssephe			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1082302864Ssephe			    ch_prilink);
1083302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1084302864Ssephe			return 0;
1085302864Ssephe		} else {
1086302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1087302864Ssephe			device_printf(sc->vmbus_dev, "duplicated primary "
1088302864Ssephe			    "chan%u\n", newchan->ch_id);
1089302864Ssephe			return EINVAL;
1090302864Ssephe		}
1091302864Ssephe	} else { /* Sub-channel */
1092302864Ssephe		if (prichan == NULL) {
1093302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1094302864Ssephe			device_printf(sc->vmbus_dev, "no primary chan for "
1095302864Ssephe			    "chan%u\n", newchan->ch_id);
1096302864Ssephe			return EINVAL;
1097302864Ssephe		}
1098302864Ssephe		/*
1099302864Ssephe		 * Found the primary channel for this sub-channel and
1100302864Ssephe		 * move on.
1101302864Ssephe		 *
1102302864Ssephe		 * XXX refcnt prichan
1103302864Ssephe		 */
1104302864Ssephe	}
1105302864Ssephe	mtx_unlock(&sc->vmbus_prichan_lock);
1106302864Ssephe
1107302864Ssephe	/*
1108302864Ssephe	 * This is a sub-channel; link it with the primary channel.
1109302864Ssephe	 */
1110302864Ssephe	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1111302864Ssephe	    ("new channel is not sub-channel"));
1112302864Ssephe	KASSERT(prichan != NULL, ("no primary channel"));
1113302864Ssephe
1114302864Ssephe	newchan->ch_prichan = prichan;
1115302864Ssephe	newchan->ch_dev = prichan->ch_dev;
1116302864Ssephe
1117302864Ssephe	mtx_lock(&prichan->ch_subchan_lock);
1118302864Ssephe	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1119302864Ssephe	/*
1120302864Ssephe	 * Bump up sub-channel count and notify anyone that is
1121302864Ssephe	 * interested in this sub-channel, after this sub-channel
1122302864Ssephe	 * is setup.
1123302864Ssephe	 */
1124302864Ssephe	prichan->ch_subchan_cnt++;
1125302864Ssephe	mtx_unlock(&prichan->ch_subchan_lock);
1126302864Ssephe	wakeup(prichan);
1127302864Ssephe
1128302864Ssephe	return 0;
1129302864Ssephe}
1130302864Ssephe
1131302864Ssephevoid
1132302864Ssephevmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1133302864Ssephe{
1134302864Ssephe	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1135302864Ssephe
1136302864Ssephe	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1137302864Ssephe	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1138302864Ssephe		/* Only cpu0 is supported */
1139302864Ssephe		cpu = 0;
1140302864Ssephe	}
1141302864Ssephe
1142302864Ssephe	chan->target_cpu = cpu;
1143302864Ssephe	chan->target_vcpu = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1144302864Ssephe
1145302864Ssephe	if (bootverbose) {
1146302864Ssephe		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1147302864Ssephe		    chan->ch_id,
1148302864Ssephe		    chan->target_cpu, chan->target_vcpu);
1149302864Ssephe	}
1150302864Ssephe}
1151302864Ssephe
1152302864Ssephevoid
1153302864Ssephevmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
1154302864Ssephe{
1155302864Ssephe	static uint32_t vmbus_chan_nextcpu;
1156302864Ssephe	int cpu;
1157302864Ssephe
1158302864Ssephe	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1159302864Ssephe	vmbus_channel_cpu_set(chan, cpu);
1160302864Ssephe}
1161302864Ssephe
1162302864Ssephestatic void
1163302864Ssephevmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1164302864Ssephe{
1165302864Ssephe	/*
1166302864Ssephe	 * By default, pin the channel to cpu0.  Devices having
1167302864Ssephe	 * special channel-cpu mapping requirement should call
1168302864Ssephe	 * vmbus_channel_cpu_{set,rr}().
1169302864Ssephe	 */
1170302864Ssephe	vmbus_channel_cpu_set(chan, 0);
1171302864Ssephe}
1172302864Ssephe
1173302864Ssephestatic void
1174302864Ssephevmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1175302864Ssephe    const struct vmbus_message *msg)
1176302864Ssephe{
1177302864Ssephe	const struct vmbus_chanmsg_choffer *offer;
1178302864Ssephe	struct hv_vmbus_channel *chan;
1179302864Ssephe	int error;
1180302864Ssephe
1181302864Ssephe	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1182302864Ssephe
1183302864Ssephe	chan = vmbus_chan_alloc(sc);
1184302864Ssephe	if (chan == NULL) {
1185302864Ssephe		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1186302864Ssephe		    offer->chm_chanid);
1187302864Ssephe		return;
1188302864Ssephe	}
1189302864Ssephe
1190302864Ssephe	chan->ch_id = offer->chm_chanid;
1191302864Ssephe	chan->ch_subidx = offer->chm_subidx;
1192302864Ssephe	chan->ch_guid_type = offer->chm_chtype;
1193302864Ssephe	chan->ch_guid_inst = offer->chm_chinst;
1194302864Ssephe
1195302864Ssephe	/* Batch reading is on by default */
1196302864Ssephe	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1197302864Ssephe
1198302864Ssephe	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1199302864Ssephe	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1200302864Ssephe		chan->ch_monprm->mp_connid = offer->chm_connid;
1201302864Ssephe
1202302864Ssephe	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1203302864Ssephe		/*
1204302864Ssephe		 * Setup MNF stuffs.
1205302864Ssephe		 */
1206302864Ssephe		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1207302864Ssephe		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1208302864Ssephe		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1209302864Ssephe			panic("invalid monitor trigger %u", offer->chm_montrig);
1210302864Ssephe		chan->ch_montrig_mask =
1211302864Ssephe		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1212302864Ssephe	}
1213302864Ssephe
1214302864Ssephe	/* Select default cpu for this channel. */
1215302864Ssephe	vmbus_chan_cpu_default(chan);
1216302864Ssephe
1217302864Ssephe	error = vmbus_chan_add(chan);
1218302864Ssephe	if (error) {
1219302864Ssephe		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1220302864Ssephe		    chan->ch_id, error);
1221302864Ssephe		vmbus_chan_free(chan);
1222302864Ssephe		return;
1223302864Ssephe	}
1224302864Ssephe
1225302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1226302864Ssephe		/*
1227302864Ssephe		 * Add device for this primary channel.
1228302864Ssephe		 *
1229302864Ssephe		 * NOTE:
1230302864Ssephe		 * Error is ignored here; don't have much to do if error
1231302864Ssephe		 * really happens.
1232302864Ssephe		 */
1233302868Ssephe		vmbus_add_child(chan);
1234302864Ssephe	}
1235302864Ssephe}
1236302864Ssephe
1237302864Ssephe/*
1238302864Ssephe * XXX pretty broken; need rework.
1239302864Ssephe */
1240302864Ssephestatic void
1241302864Ssephevmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1242302864Ssephe    const struct vmbus_message *msg)
1243302864Ssephe{
1244302864Ssephe	const struct vmbus_chanmsg_chrescind *note;
1245302864Ssephe	struct hv_vmbus_channel *chan;
1246302864Ssephe
1247302864Ssephe	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1248302864Ssephe	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1249302864Ssephe		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1250302864Ssephe		    note->chm_chanid);
1251302864Ssephe		return;
1252302864Ssephe	}
1253302864Ssephe
1254302864Ssephe	if (bootverbose) {
1255302864Ssephe		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1256302864Ssephe		    note->chm_chanid);
1257302864Ssephe	}
1258302864Ssephe
1259302864Ssephe	chan = sc->vmbus_chmap[note->chm_chanid];
1260302864Ssephe	if (chan == NULL)
1261302864Ssephe		return;
1262302864Ssephe	sc->vmbus_chmap[note->chm_chanid] = NULL;
1263302864Ssephe
1264302864Ssephe	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1265302864Ssephe}
1266302864Ssephe
1267302864Ssephestatic void
1268302864Ssephevmbus_chan_detach_task(void *xchan, int pending __unused)
1269302864Ssephe{
1270302864Ssephe	struct hv_vmbus_channel *chan = xchan;
1271302864Ssephe
1272302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1273302864Ssephe		/* Only primary channel owns the device */
1274302868Ssephe		vmbus_delete_child(chan);
1275302864Ssephe		/* NOTE: DO NOT free primary channel for now */
1276302864Ssephe	} else {
1277302864Ssephe		struct vmbus_softc *sc = chan->vmbus_sc;
1278302864Ssephe		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1279302864Ssephe		struct vmbus_chanmsg_chfree *req;
1280302864Ssephe		struct vmbus_msghc *mh;
1281302864Ssephe		int error;
1282302864Ssephe
1283302864Ssephe		mh = vmbus_msghc_get(sc, sizeof(*req));
1284302864Ssephe		if (mh == NULL) {
1285302864Ssephe			device_printf(sc->vmbus_dev,
1286302864Ssephe			    "can not get msg hypercall for chfree(chan%u)\n",
1287302864Ssephe			    chan->ch_id);
1288302864Ssephe			goto remove;
1289302864Ssephe		}
1290302864Ssephe
1291302864Ssephe		req = vmbus_msghc_dataptr(mh);
1292302864Ssephe		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1293302864Ssephe		req->chm_chanid = chan->ch_id;
1294302864Ssephe
1295302864Ssephe		error = vmbus_msghc_exec_noresult(mh);
1296302864Ssephe		vmbus_msghc_put(sc, mh);
1297302864Ssephe
1298302864Ssephe		if (error) {
1299302864Ssephe			device_printf(sc->vmbus_dev,
1300302864Ssephe			    "chfree(chan%u) failed: %d",
1301302864Ssephe			    chan->ch_id, error);
1302302864Ssephe			/* NOTE: Move on! */
1303302864Ssephe		} else {
1304302864Ssephe			if (bootverbose) {
1305302864Ssephe				device_printf(sc->vmbus_dev, "chan%u freed\n",
1306302864Ssephe				    chan->ch_id);
1307302864Ssephe			}
1308302864Ssephe		}
1309302864Ssepheremove:
1310302864Ssephe		mtx_lock(&pri_chan->ch_subchan_lock);
1311302864Ssephe		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1312302864Ssephe		KASSERT(pri_chan->ch_subchan_cnt > 0,
1313302864Ssephe		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1314302864Ssephe		pri_chan->ch_subchan_cnt--;
1315302864Ssephe		mtx_unlock(&pri_chan->ch_subchan_lock);
1316302864Ssephe		wakeup(pri_chan);
1317302864Ssephe
1318302864Ssephe		vmbus_chan_free(chan);
1319302864Ssephe	}
1320302864Ssephe}
1321302864Ssephe
1322302864Ssephe/*
1323302864Ssephe * Detach all devices and destroy the corresponding primary channels.
1324302864Ssephe */
1325302864Ssephevoid
1326302864Ssephevmbus_chan_destroy_all(struct vmbus_softc *sc)
1327302864Ssephe{
1328302864Ssephe	struct hv_vmbus_channel *chan;
1329302864Ssephe
1330302864Ssephe	mtx_lock(&sc->vmbus_prichan_lock);
1331302864Ssephe	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1332302864Ssephe		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1333302864Ssephe		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1334302864Ssephe		mtx_unlock(&sc->vmbus_prichan_lock);
1335302864Ssephe
1336302868Ssephe		vmbus_delete_child(chan);
1337302864Ssephe		vmbus_chan_free(chan);
1338302864Ssephe
1339302864Ssephe		mtx_lock(&sc->vmbus_prichan_lock);
1340302864Ssephe	}
1341302864Ssephe	bzero(sc->vmbus_chmap,
1342302864Ssephe	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1343302864Ssephe	mtx_unlock(&sc->vmbus_prichan_lock);
1344302864Ssephe}
1345302864Ssephe
1346302864Ssephe/**
1347302864Ssephe * @brief Select the best outgoing channel
1348302864Ssephe *
1349302864Ssephe * The channel whose vcpu binding is closest to the currect vcpu will
1350302864Ssephe * be selected.
1351302864Ssephe * If no multi-channel, always select primary channel
1352302864Ssephe *
1353302864Ssephe * @param primary - primary channel
1354302864Ssephe */
1355302864Ssephestruct hv_vmbus_channel *
1356302864Ssephevmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1357302864Ssephe{
1358302864Ssephe	hv_vmbus_channel *new_channel = NULL;
1359302864Ssephe	hv_vmbus_channel *outgoing_channel = primary;
1360302864Ssephe	int old_cpu_distance = 0;
1361302864Ssephe	int new_cpu_distance = 0;
1362302864Ssephe	int cur_vcpu = 0;
1363302864Ssephe	int smp_pro_id = PCPU_GET(cpuid);
1364302864Ssephe
1365302864Ssephe	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1366302864Ssephe		return outgoing_channel;
1367302864Ssephe	}
1368302864Ssephe
1369302864Ssephe	if (smp_pro_id >= MAXCPU) {
1370302864Ssephe		return outgoing_channel;
1371302864Ssephe	}
1372302864Ssephe
1373302864Ssephe	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1374302864Ssephe
1375302864Ssephe	/* XXX need lock */
1376302864Ssephe	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1377302864Ssephe		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1378302864Ssephe			continue;
1379302864Ssephe		}
1380302864Ssephe
1381302864Ssephe		if (new_channel->target_vcpu == cur_vcpu){
1382302864Ssephe			return new_channel;
1383302864Ssephe		}
1384302864Ssephe
1385302864Ssephe		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
1386302864Ssephe		    (outgoing_channel->target_vcpu - cur_vcpu) :
1387302864Ssephe		    (cur_vcpu - outgoing_channel->target_vcpu));
1388302864Ssephe
1389302864Ssephe		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
1390302864Ssephe		    (new_channel->target_vcpu - cur_vcpu) :
1391302864Ssephe		    (cur_vcpu - new_channel->target_vcpu));
1392302864Ssephe
1393302864Ssephe		if (old_cpu_distance < new_cpu_distance) {
1394302864Ssephe			continue;
1395302864Ssephe		}
1396302864Ssephe
1397302864Ssephe		outgoing_channel = new_channel;
1398302864Ssephe	}
1399302864Ssephe
1400302864Ssephe	return(outgoing_channel);
1401302864Ssephe}
1402302864Ssephe
1403302864Ssephestruct hv_vmbus_channel **
1404302864Ssephevmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1405302864Ssephe{
1406302864Ssephe	struct hv_vmbus_channel **ret, *chan;
1407302864Ssephe	int i;
1408302864Ssephe
1409302864Ssephe	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1410302864Ssephe	    M_WAITOK);
1411302864Ssephe
1412302864Ssephe	mtx_lock(&pri_chan->ch_subchan_lock);
1413302864Ssephe
1414302864Ssephe	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1415302864Ssephe		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1416302864Ssephe
1417302864Ssephe	i = 0;
1418302864Ssephe	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1419302864Ssephe		/* TODO: refcnt chan */
1420302864Ssephe		ret[i] = chan;
1421302864Ssephe
1422302864Ssephe		++i;
1423302864Ssephe		if (i == subchan_cnt)
1424302864Ssephe			break;
1425302864Ssephe	}
1426302864Ssephe	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1427302864Ssephe	    pri_chan->ch_subchan_cnt, subchan_cnt));
1428302864Ssephe
1429302864Ssephe	mtx_unlock(&pri_chan->ch_subchan_lock);
1430302864Ssephe
1431302864Ssephe	return ret;
1432302864Ssephe}
1433302864Ssephe
1434302864Ssephevoid
1435302864Ssephevmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1436302864Ssephe{
1437302864Ssephe
1438302864Ssephe	free(subchan, M_TEMP);
1439302864Ssephe}
1440302864Ssephe
1441302864Ssephevoid
1442302864Ssephevmbus_drain_subchan(struct hv_vmbus_channel *pri_chan)
1443302864Ssephe{
1444302864Ssephe	mtx_lock(&pri_chan->ch_subchan_lock);
1445302864Ssephe	while (pri_chan->ch_subchan_cnt > 0)
1446302864Ssephe		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1447302864Ssephe	mtx_unlock(&pri_chan->ch_subchan_lock);
1448302864Ssephe}
1449302864Ssephe
1450302864Ssephevoid
1451302864Ssephevmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1452302864Ssephe{
1453302864Ssephe	vmbus_chanmsg_proc_t msg_proc;
1454302864Ssephe	uint32_t msg_type;
1455302864Ssephe
1456302864Ssephe	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1457302864Ssephe	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1458302864Ssephe	    ("invalid message type %u", msg_type));
1459302864Ssephe
1460302864Ssephe	msg_proc = vmbus_chan_msgprocs[msg_type];
1461302864Ssephe	if (msg_proc != NULL)
1462302864Ssephe		msg_proc(sc, msg);
1463302864Ssephe}
1464