vmbus_chan.c revision 302874
1250199Sgrehan/*-
2298446Ssephe * Copyright (c) 2009-2012,2016 Microsoft Corp.
3250199Sgrehan * Copyright (c) 2012 NetApp Inc.
4250199Sgrehan * Copyright (c) 2012 Citrix Inc.
5250199Sgrehan * All rights reserved.
6250199Sgrehan *
7250199Sgrehan * Redistribution and use in source and binary forms, with or without
8250199Sgrehan * modification, are permitted provided that the following conditions
9250199Sgrehan * are met:
10250199Sgrehan * 1. Redistributions of source code must retain the above copyright
11250199Sgrehan *    notice unmodified, this list of conditions, and the following
12250199Sgrehan *    disclaimer.
13250199Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
14250199Sgrehan *    notice, this list of conditions and the following disclaimer in the
15250199Sgrehan *    documentation and/or other materials provided with the distribution.
16250199Sgrehan *
17250199Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18250199Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19250199Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20250199Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21250199Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22250199Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23250199Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24250199Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25250199Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26250199Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27250199Sgrehan */
28250199Sgrehan
29256276Sdim#include <sys/cdefs.h>
30256276Sdim__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302874 2016-07-15 06:08:48Z sephe $");
31256276Sdim
32250199Sgrehan#include <sys/param.h>
33296028Ssephe#include <sys/kernel.h>
34250199Sgrehan#include <sys/malloc.h>
35250199Sgrehan#include <sys/systm.h>
36250199Sgrehan#include <sys/mbuf.h>
37250199Sgrehan#include <sys/lock.h>
38250199Sgrehan#include <sys/mutex.h>
39296181Ssephe#include <sys/sysctl.h>
40301588Ssephe
41301588Ssephe#include <machine/atomic.h>
42250199Sgrehan#include <machine/bus.h>
43301588Ssephe
44250199Sgrehan#include <vm/vm.h>
45250199Sgrehan#include <vm/vm_param.h>
46250199Sgrehan#include <vm/pmap.h>
47250199Sgrehan
48302872Ssephe#include <dev/hyperv/include/hyperv_busdma.h>
49300102Ssephe#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50302619Ssephe#include <dev/hyperv/vmbus/hyperv_var.h>
51301588Ssephe#include <dev/hyperv/vmbus/vmbus_reg.h>
52300102Ssephe#include <dev/hyperv/vmbus/vmbus_var.h>
53250199Sgrehan
54302731Ssephestatic void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
55302692Ssephestatic void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56302692Ssephe		    const struct hv_vmbus_channel *);
57302864Ssephe
58302713Ssephestatic void	vmbus_chan_task(void *, int);
59302713Ssephestatic void	vmbus_chan_task_nobatch(void *, int);
60302864Ssephestatic void	vmbus_chan_detach_task(void *, int);
61250199Sgrehan
62302864Ssephestatic void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63302864Ssephe		    const struct vmbus_message *);
64302864Ssephestatic void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65302864Ssephe		    const struct vmbus_message *);
66302864Ssephe
67302864Ssephe/*
68302864Ssephe * Vmbus channel message processing.
69302864Ssephe */
70302864Ssephestatic const vmbus_chanmsg_proc_t
71302864Ssephevmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72302864Ssephe	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73302864Ssephe	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74302864Ssephe
75302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77302864Ssephe	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78302864Ssephe};
79302864Ssephe
80250199Sgrehan/**
81250199Sgrehan *  @brief Trigger an event notification on the specified channel
82250199Sgrehan */
83250199Sgrehanstatic void
84302731Ssephevmbus_chan_send_event(hv_vmbus_channel *channel)
85250199Sgrehan{
86302618Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
87302693Ssephe	uint32_t chanid = channel->ch_id;
88302618Ssephe
89302618Ssephe	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90302618Ssephe	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91302618Ssephe
92302695Ssephe	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93302731Ssephe		atomic_set_int(
94302731Ssephe		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
95302731Ssephe		channel->ch_montrig_mask);
96250199Sgrehan	} else {
97302726Ssephe		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
98250199Sgrehan	}
99250199Sgrehan}
100250199Sgrehan
101296289Ssephestatic int
102296289Ssephevmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
103296289Ssephe{
104296289Ssephe	struct hv_vmbus_channel *chan = arg1;
105296289Ssephe	int alloc = 0;
106296289Ssephe
107302695Ssephe	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108296289Ssephe		alloc = 1;
109296289Ssephe	return sysctl_handle_int(oidp, &alloc, 0, req);
110296289Ssephe}
111296289Ssephe
112296181Ssephestatic void
113296290Ssephevmbus_channel_sysctl_create(hv_vmbus_channel* channel)
114296181Ssephe{
115296181Ssephe	device_t dev;
116296181Ssephe	struct sysctl_oid *devch_sysctl;
117296181Ssephe	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
118296181Ssephe	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
119296181Ssephe	struct sysctl_ctx_list *ctx;
120296181Ssephe	uint32_t ch_id;
121296181Ssephe	uint16_t sub_ch_id;
122296181Ssephe	char name[16];
123296181Ssephe
124302819Ssephe	hv_vmbus_channel* primary_ch = channel->ch_prichan;
125296181Ssephe
126296181Ssephe	if (primary_ch == NULL) {
127302706Ssephe		dev = channel->ch_dev;
128302693Ssephe		ch_id = channel->ch_id;
129296181Ssephe	} else {
130302706Ssephe		dev = primary_ch->ch_dev;
131302693Ssephe		ch_id = primary_ch->ch_id;
132302694Ssephe		sub_ch_id = channel->ch_subidx;
133296181Ssephe	}
134302633Ssephe	ctx = &channel->ch_sysctl_ctx;
135302633Ssephe	sysctl_ctx_init(ctx);
136296181Ssephe	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
137296181Ssephe	devch_sysctl = SYSCTL_ADD_NODE(ctx,
138296181Ssephe		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
139298693Ssephe		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
140296181Ssephe	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
141296181Ssephe	snprintf(name, sizeof(name), "%d", ch_id);
142296181Ssephe	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
143296181Ssephe	    	    SYSCTL_CHILDREN(devch_sysctl),
144298693Ssephe	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
145296181Ssephe
146296181Ssephe	if (primary_ch != NULL) {
147296181Ssephe		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
148296181Ssephe			SYSCTL_CHILDREN(devch_id_sysctl),
149298693Ssephe			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
150296181Ssephe		snprintf(name, sizeof(name), "%d", sub_ch_id);
151296181Ssephe		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
152296181Ssephe			SYSCTL_CHILDREN(devch_sub_sysctl),
153298693Ssephe			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
154296188Ssephe
155296188Ssephe		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
156296188Ssephe		    OID_AUTO, "chanid", CTLFLAG_RD,
157302693Ssephe		    &channel->ch_id, 0, "channel id");
158296181Ssephe	}
159296188Ssephe	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
160302873Ssephe	    "cpu", CTLFLAG_RD, &channel->ch_cpuid, 0, "owner CPU id");
161296289Ssephe	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
162298693Ssephe	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
163298693Ssephe	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
164296289Ssephe	    "is monitor allocated to this channel");
165296188Ssephe
166296181Ssephe	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
167296181Ssephe                    SYSCTL_CHILDREN(devch_id_sysctl),
168296181Ssephe                    OID_AUTO,
169296181Ssephe		    "in",
170298693Ssephe		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
171296181Ssephe	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
172296181Ssephe                    SYSCTL_CHILDREN(devch_id_sysctl),
173296181Ssephe                    OID_AUTO,
174296181Ssephe		    "out",
175298693Ssephe		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
176296181Ssephe	hv_ring_buffer_stat(ctx,
177296181Ssephe		SYSCTL_CHILDREN(devch_id_in_sysctl),
178296181Ssephe		&(channel->inbound),
179296181Ssephe		"inbound ring buffer stats");
180296181Ssephe	hv_ring_buffer_stat(ctx,
181296181Ssephe		SYSCTL_CHILDREN(devch_id_out_sysctl),
182296181Ssephe		&(channel->outbound),
183296181Ssephe		"outbound ring buffer stats");
184296181Ssephe}
185296290Ssephe
186250199Sgrehan/**
187250199Sgrehan * @brief Open the specified channel
188250199Sgrehan */
189250199Sgrehanint
190250199Sgrehanhv_vmbus_channel_open(
191250199Sgrehan	hv_vmbus_channel*		new_channel,
192250199Sgrehan	uint32_t			send_ring_buffer_size,
193250199Sgrehan	uint32_t			recv_ring_buffer_size,
194250199Sgrehan	void*				user_data,
195250199Sgrehan	uint32_t			user_data_len,
196302874Ssephe	vmbus_chan_callback_t		cb,
197302874Ssephe	void				*cbarg)
198250199Sgrehan{
199302607Ssephe	struct vmbus_softc *sc = new_channel->vmbus_sc;
200302607Ssephe	const struct vmbus_chanmsg_chopen_resp *resp;
201302607Ssephe	const struct vmbus_message *msg;
202302607Ssephe	struct vmbus_chanmsg_chopen *req;
203302607Ssephe	struct vmbus_msghc *mh;
204302607Ssephe	uint32_t status;
205250199Sgrehan	int ret = 0;
206302872Ssephe	uint8_t *br;
207250199Sgrehan
208302607Ssephe	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
209302607Ssephe		device_printf(sc->vmbus_dev,
210302607Ssephe		    "invalid udata len %u for chan%u\n",
211302693Ssephe		    user_data_len, new_channel->ch_id);
212302607Ssephe		return EINVAL;
213302607Ssephe	}
214302872Ssephe	KASSERT((send_ring_buffer_size & PAGE_MASK) == 0,
215302872Ssephe	    ("send bufring size is not multiple page"));
216302872Ssephe	KASSERT((recv_ring_buffer_size & PAGE_MASK) == 0,
217302872Ssephe	    ("recv bufring size is not multiple page"));
218302607Ssephe
219302812Ssephe	if (atomic_testandset_int(&new_channel->ch_stflags,
220302812Ssephe	    VMBUS_CHAN_ST_OPENED_SHIFT))
221302812Ssephe		panic("double-open chan%u", new_channel->ch_id);
222282212Swhu
223302874Ssephe	new_channel->ch_cb = cb;
224302874Ssephe	new_channel->ch_cbarg = cbarg;
225250199Sgrehan
226302692Ssephe	vmbus_chan_update_evtflagcnt(sc, new_channel);
227300102Ssephe
228302874Ssephe	new_channel->ch_tq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
229302873Ssephe	    new_channel->ch_cpuid);
230302713Ssephe	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
231302874Ssephe		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task,
232302874Ssephe		    new_channel);
233302713Ssephe	} else {
234302874Ssephe		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task_nobatch,
235302874Ssephe		    new_channel);
236302713Ssephe	}
237294886Ssephe
238302872Ssephe	/*
239302872Ssephe	 * Allocate the TX+RX bufrings.
240302872Ssephe	 * XXX should use ch_dev dtag
241302872Ssephe	 */
242302872Ssephe	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
243302872Ssephe	    PAGE_SIZE, 0, send_ring_buffer_size + recv_ring_buffer_size,
244302872Ssephe	    &new_channel->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
245302872Ssephe	if (br == NULL) {
246302872Ssephe		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
247302812Ssephe		ret = ENOMEM;
248302812Ssephe		goto failed;
249302812Ssephe	}
250302872Ssephe	new_channel->ch_bufring = br;
251250199Sgrehan
252302872Ssephe	/* TX bufring comes first */
253302872Ssephe	hv_vmbus_ring_buffer_init(&new_channel->outbound,
254302872Ssephe	    br, send_ring_buffer_size);
255302872Ssephe	/* RX bufring immediately follows TX bufring */
256302872Ssephe	hv_vmbus_ring_buffer_init(&new_channel->inbound,
257302872Ssephe	    br + send_ring_buffer_size, recv_ring_buffer_size);
258250199Sgrehan
259296290Ssephe	/* Create sysctl tree for this channel */
260296290Ssephe	vmbus_channel_sysctl_create(new_channel);
261296181Ssephe
262302872Ssephe	/*
263302872Ssephe	 * Connect the bufrings, both RX and TX, to this channel.
264250199Sgrehan	 */
265302872Ssephe	ret = vmbus_chan_gpadl_connect(new_channel,
266302872Ssephe		new_channel->ch_bufring_dma.hv_paddr,
267250199Sgrehan		send_ring_buffer_size + recv_ring_buffer_size,
268302872Ssephe		&new_channel->ch_bufring_gpadl);
269302872Ssephe	if (ret != 0) {
270302872Ssephe		device_printf(sc->vmbus_dev,
271302872Ssephe		    "failed to connect bufring GPADL to chan%u\n",
272302872Ssephe		    new_channel->ch_id);
273302872Ssephe		goto failed;
274302872Ssephe	}
275250199Sgrehan
276302607Ssephe	/*
277302607Ssephe	 * Open channel w/ the bufring GPADL on the target CPU.
278250199Sgrehan	 */
279302607Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
280302607Ssephe	if (mh == NULL) {
281302607Ssephe		device_printf(sc->vmbus_dev,
282302607Ssephe		    "can not get msg hypercall for chopen(chan%u)\n",
283302693Ssephe		    new_channel->ch_id);
284302812Ssephe		ret = ENXIO;
285302812Ssephe		goto failed;
286302607Ssephe	}
287250199Sgrehan
288302607Ssephe	req = vmbus_msghc_dataptr(mh);
289302607Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
290302693Ssephe	req->chm_chanid = new_channel->ch_id;
291302693Ssephe	req->chm_openid = new_channel->ch_id;
292302872Ssephe	req->chm_gpadl = new_channel->ch_bufring_gpadl;
293302873Ssephe	req->chm_vcpuid = new_channel->ch_vcpuid;
294302607Ssephe	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
295250199Sgrehan	if (user_data_len)
296302607Ssephe		memcpy(req->chm_udata, user_data, user_data_len);
297250199Sgrehan
298302607Ssephe	ret = vmbus_msghc_exec(sc, mh);
299302607Ssephe	if (ret != 0) {
300302607Ssephe		device_printf(sc->vmbus_dev,
301302607Ssephe		    "chopen(chan%u) msg hypercall exec failed: %d\n",
302302693Ssephe		    new_channel->ch_id, ret);
303302607Ssephe		vmbus_msghc_put(sc, mh);
304302812Ssephe		goto failed;
305302607Ssephe	}
306250199Sgrehan
307302607Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
308302607Ssephe	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
309302607Ssephe	status = resp->chm_status;
310250199Sgrehan
311302607Ssephe	vmbus_msghc_put(sc, mh);
312250199Sgrehan
313302607Ssephe	if (status == 0) {
314302607Ssephe		if (bootverbose) {
315302607Ssephe			device_printf(sc->vmbus_dev, "chan%u opened\n",
316302693Ssephe			    new_channel->ch_id);
317302607Ssephe		}
318302812Ssephe		return 0;
319250199Sgrehan	}
320302812Ssephe
321302812Ssephe	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
322302812Ssephe	    new_channel->ch_id);
323302812Ssephe	ret = ENXIO;
324302812Ssephe
325302812Ssephefailed:
326302872Ssephe	if (new_channel->ch_bufring_gpadl) {
327302872Ssephe		hv_vmbus_channel_teardown_gpdal(new_channel,
328302872Ssephe		    new_channel->ch_bufring_gpadl);
329302872Ssephe		new_channel->ch_bufring_gpadl = 0;
330302872Ssephe	}
331302872Ssephe	if (new_channel->ch_bufring != NULL) {
332302872Ssephe		hyperv_dmamem_free(&new_channel->ch_bufring_dma,
333302872Ssephe		    new_channel->ch_bufring);
334302872Ssephe		new_channel->ch_bufring = NULL;
335302872Ssephe	}
336302812Ssephe	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
337302812Ssephe	return ret;
338250199Sgrehan}
339250199Sgrehan
340250199Sgrehan/**
341302609Ssephe * @brief Establish a GPADL for the specified buffer
342250199Sgrehan */
343302609Ssepheint
344302609Ssephehv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
345302871Ssephe    void *contig_buffer, uint32_t size, uint32_t *gpadl)
346250199Sgrehan{
347302871Ssephe	return vmbus_chan_gpadl_connect(channel,
348302871Ssephe	    hv_get_phys_addr(contig_buffer), size, gpadl);
349302871Ssephe}
350302871Ssephe
351302871Ssepheint
352302871Ssephevmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
353302871Ssephe    int size, uint32_t *gpadl0)
354302871Ssephe{
355302871Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
356302609Ssephe	struct vmbus_msghc *mh;
357302609Ssephe	struct vmbus_chanmsg_gpadl_conn *req;
358302609Ssephe	const struct vmbus_message *msg;
359302609Ssephe	size_t reqsz;
360302609Ssephe	uint32_t gpadl, status;
361302609Ssephe	int page_count, range_len, i, cnt, error;
362302871Ssephe	uint64_t page_id;
363250199Sgrehan
364302609Ssephe	/*
365302609Ssephe	 * Preliminary checks.
366302609Ssephe	 */
367250199Sgrehan
368302609Ssephe	KASSERT((size & PAGE_MASK) == 0,
369302871Ssephe	    ("invalid GPA size %d, not multiple page size", size));
370250199Sgrehan	page_count = size >> PAGE_SHIFT;
371250199Sgrehan
372302609Ssephe	KASSERT((paddr & PAGE_MASK) == 0,
373302609Ssephe	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
374302609Ssephe	page_id = paddr >> PAGE_SHIFT;
375250199Sgrehan
376302609Ssephe	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
377302609Ssephe	/*
378302609Ssephe	 * We don't support multiple GPA ranges.
379302609Ssephe	 */
380302609Ssephe	if (range_len > UINT16_MAX) {
381302609Ssephe		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
382302609Ssephe		    page_count);
383302609Ssephe		return EOPNOTSUPP;
384250199Sgrehan	}
385250199Sgrehan
386302609Ssephe	/*
387302609Ssephe	 * Allocate GPADL id.
388302609Ssephe	 */
389302630Ssephe	gpadl = vmbus_gpadl_alloc(sc);
390302609Ssephe	*gpadl0 = gpadl;
391250199Sgrehan
392302609Ssephe	/*
393302609Ssephe	 * Connect this GPADL to the target channel.
394302609Ssephe	 *
395302609Ssephe	 * NOTE:
396302609Ssephe	 * Since each message can only hold small set of page
397302609Ssephe	 * addresses, several messages may be required to
398302609Ssephe	 * complete the connection.
399302609Ssephe	 */
400302609Ssephe	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
401302609Ssephe		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
402302609Ssephe	else
403302609Ssephe		cnt = page_count;
404302609Ssephe	page_count -= cnt;
405250199Sgrehan
406302609Ssephe	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
407302609Ssephe	    chm_range.gpa_page[cnt]);
408302609Ssephe	mh = vmbus_msghc_get(sc, reqsz);
409302609Ssephe	if (mh == NULL) {
410302609Ssephe		device_printf(sc->vmbus_dev,
411302609Ssephe		    "can not get msg hypercall for gpadl->chan%u\n",
412302871Ssephe		    chan->ch_id);
413302609Ssephe		return EIO;
414250199Sgrehan	}
415250199Sgrehan
416302609Ssephe	req = vmbus_msghc_dataptr(mh);
417302609Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
418302871Ssephe	req->chm_chanid = chan->ch_id;
419302609Ssephe	req->chm_gpadl = gpadl;
420302609Ssephe	req->chm_range_len = range_len;
421302609Ssephe	req->chm_range_cnt = 1;
422302609Ssephe	req->chm_range.gpa_len = size;
423302609Ssephe	req->chm_range.gpa_ofs = 0;
424302609Ssephe	for (i = 0; i < cnt; ++i)
425302609Ssephe		req->chm_range.gpa_page[i] = page_id++;
426250199Sgrehan
427302609Ssephe	error = vmbus_msghc_exec(sc, mh);
428302609Ssephe	if (error) {
429302609Ssephe		device_printf(sc->vmbus_dev,
430302609Ssephe		    "gpadl->chan%u msg hypercall exec failed: %d\n",
431302871Ssephe		    chan->ch_id, error);
432302609Ssephe		vmbus_msghc_put(sc, mh);
433302609Ssephe		return error;
434302609Ssephe	}
435250199Sgrehan
436302609Ssephe	while (page_count > 0) {
437302609Ssephe		struct vmbus_chanmsg_gpadl_subconn *subreq;
438250199Sgrehan
439302609Ssephe		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
440302609Ssephe			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
441302609Ssephe		else
442302609Ssephe			cnt = page_count;
443302609Ssephe		page_count -= cnt;
444250199Sgrehan
445302609Ssephe		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
446302609Ssephe		    chm_gpa_page[cnt]);
447302609Ssephe		vmbus_msghc_reset(mh, reqsz);
448250199Sgrehan
449302609Ssephe		subreq = vmbus_msghc_dataptr(mh);
450302609Ssephe		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
451302609Ssephe		subreq->chm_gpadl = gpadl;
452302609Ssephe		for (i = 0; i < cnt; ++i)
453302609Ssephe			subreq->chm_gpa_page[i] = page_id++;
454250199Sgrehan
455302609Ssephe		vmbus_msghc_exec_noresult(mh);
456250199Sgrehan	}
457302609Ssephe	KASSERT(page_count == 0, ("invalid page count %d", page_count));
458250199Sgrehan
459302609Ssephe	msg = vmbus_msghc_wait_result(sc, mh);
460302609Ssephe	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
461302609Ssephe	    msg->msg_data)->chm_status;
462250199Sgrehan
463302609Ssephe	vmbus_msghc_put(sc, mh);
464250199Sgrehan
465302609Ssephe	if (status != 0) {
466302609Ssephe		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
467302871Ssephe		    "status %u\n", chan->ch_id, status);
468302609Ssephe		return EIO;
469302632Ssephe	} else {
470302632Ssephe		if (bootverbose) {
471302632Ssephe			device_printf(sc->vmbus_dev, "gpadl->chan%u "
472302871Ssephe			    "succeeded\n", chan->ch_id);
473302632Ssephe		}
474302609Ssephe	}
475302609Ssephe	return 0;
476250199Sgrehan}
477250199Sgrehan
478302611Ssephe/*
479302611Ssephe * Disconnect the GPA from the target channel
480250199Sgrehan */
481250199Sgrehanint
482302611Ssephehv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
483250199Sgrehan{
484302611Ssephe	struct vmbus_softc *sc = chan->vmbus_sc;
485302611Ssephe	struct vmbus_msghc *mh;
486302611Ssephe	struct vmbus_chanmsg_gpadl_disconn *req;
487302611Ssephe	int error;
488250199Sgrehan
489302611Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
490302611Ssephe	if (mh == NULL) {
491302611Ssephe		device_printf(sc->vmbus_dev,
492302611Ssephe		    "can not get msg hypercall for gpa x->chan%u\n",
493302693Ssephe		    chan->ch_id);
494302611Ssephe		return EBUSY;
495250199Sgrehan	}
496250199Sgrehan
497302611Ssephe	req = vmbus_msghc_dataptr(mh);
498302611Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
499302693Ssephe	req->chm_chanid = chan->ch_id;
500302611Ssephe	req->chm_gpadl = gpadl;
501250199Sgrehan
502302611Ssephe	error = vmbus_msghc_exec(sc, mh);
503302611Ssephe	if (error) {
504302611Ssephe		device_printf(sc->vmbus_dev,
505302611Ssephe		    "gpa x->chan%u msg hypercall exec failed: %d\n",
506302693Ssephe		    chan->ch_id, error);
507302611Ssephe		vmbus_msghc_put(sc, mh);
508302611Ssephe		return error;
509302611Ssephe	}
510250199Sgrehan
511302611Ssephe	vmbus_msghc_wait_result(sc, mh);
512302611Ssephe	/* Discard result; no useful information */
513302611Ssephe	vmbus_msghc_put(sc, mh);
514250199Sgrehan
515302611Ssephe	return 0;
516250199Sgrehan}
517250199Sgrehan
518282212Swhustatic void
519282212Swhuhv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
520250199Sgrehan{
521302610Ssephe	struct vmbus_softc *sc = channel->vmbus_sc;
522302610Ssephe	struct vmbus_msghc *mh;
523302610Ssephe	struct vmbus_chanmsg_chclose *req;
524302874Ssephe	struct taskqueue *tq = channel->ch_tq;
525302610Ssephe	int error;
526250199Sgrehan
527302812Ssephe	/* TODO: stringent check */
528302812Ssephe	atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
529302812Ssephe
530302633Ssephe	sysctl_ctx_free(&channel->ch_sysctl_ctx);
531282212Swhu
532282212Swhu	/*
533302874Ssephe	 * Set ch_tq to NULL to avoid more requests be scheduled
534294886Ssephe	 */
535302874Ssephe	channel->ch_tq = NULL;
536302874Ssephe	taskqueue_drain(tq, &channel->ch_task);
537302874Ssephe	channel->ch_cb = NULL;
538250199Sgrehan
539250199Sgrehan	/**
540250199Sgrehan	 * Send a closing message
541250199Sgrehan	 */
542250199Sgrehan
543302610Ssephe	mh = vmbus_msghc_get(sc, sizeof(*req));
544302610Ssephe	if (mh == NULL) {
545302610Ssephe		device_printf(sc->vmbus_dev,
546302610Ssephe		    "can not get msg hypercall for chclose(chan%u)\n",
547302693Ssephe		    channel->ch_id);
548302610Ssephe		return;
549302610Ssephe	}
550250199Sgrehan
551302610Ssephe	req = vmbus_msghc_dataptr(mh);
552302610Ssephe	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
553302693Ssephe	req->chm_chanid = channel->ch_id;
554250199Sgrehan
555302610Ssephe	error = vmbus_msghc_exec_noresult(mh);
556302610Ssephe	vmbus_msghc_put(sc, mh);
557302610Ssephe
558302610Ssephe	if (error) {
559302610Ssephe		device_printf(sc->vmbus_dev,
560302610Ssephe		    "chclose(chan%u) msg hypercall exec failed: %d\n",
561302693Ssephe		    channel->ch_id, error);
562302610Ssephe		return;
563302610Ssephe	} else if (bootverbose) {
564302610Ssephe		device_printf(sc->vmbus_dev, "close chan%u\n",
565302693Ssephe		    channel->ch_id);
566302610Ssephe	}
567302610Ssephe
568250199Sgrehan	/* Tear down the gpadl for the channel's ring buffer */
569302872Ssephe	if (channel->ch_bufring_gpadl) {
570250199Sgrehan		hv_vmbus_channel_teardown_gpdal(channel,
571302872Ssephe		    channel->ch_bufring_gpadl);
572302872Ssephe		channel->ch_bufring_gpadl = 0;
573250199Sgrehan	}
574250199Sgrehan
575250199Sgrehan	/* TODO: Send a msg to release the childRelId */
576250199Sgrehan
577250199Sgrehan	/* cleanup the ring buffers for this channel */
578250199Sgrehan	hv_ring_buffer_cleanup(&channel->outbound);
579250199Sgrehan	hv_ring_buffer_cleanup(&channel->inbound);
580250199Sgrehan
581302872Ssephe	if (channel->ch_bufring != NULL) {
582302872Ssephe		hyperv_dmamem_free(&channel->ch_bufring_dma,
583302872Ssephe		    channel->ch_bufring);
584302872Ssephe		channel->ch_bufring = NULL;
585302872Ssephe	}
586282212Swhu}
587250199Sgrehan
588302818Ssephe/*
589302818Ssephe * Caller should make sure that all sub-channels have
590302818Ssephe * been added to 'chan' and all to-be-closed channels
591302818Ssephe * are not being opened.
592282212Swhu */
593282212Swhuvoid
594302818Ssephehv_vmbus_channel_close(struct hv_vmbus_channel *chan)
595282212Swhu{
596302818Ssephe	int subchan_cnt;
597282212Swhu
598302818Ssephe	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
599282212Swhu		/*
600302818Ssephe		 * Sub-channel is closed when its primary channel
601302818Ssephe		 * is closed; done.
602282212Swhu		 */
603282212Swhu		return;
604282212Swhu	}
605282212Swhu
606250199Sgrehan	/*
607302818Ssephe	 * Close all sub-channels, if any.
608250199Sgrehan	 */
609302819Ssephe	subchan_cnt = chan->ch_subchan_cnt;
610302818Ssephe	if (subchan_cnt > 0) {
611302818Ssephe		struct hv_vmbus_channel **subchan;
612302818Ssephe		int i;
613302818Ssephe
614302818Ssephe		subchan = vmbus_get_subchan(chan, subchan_cnt);
615302818Ssephe		for (i = 0; i < subchan_cnt; ++i)
616302818Ssephe			hv_vmbus_channel_close_internal(subchan[i]);
617302818Ssephe		vmbus_rel_subchan(subchan, subchan_cnt);
618250199Sgrehan	}
619302818Ssephe
620302818Ssephe	/* Then close the primary channel. */
621302818Ssephe	hv_vmbus_channel_close_internal(chan);
622250199Sgrehan}
623250199Sgrehan
624250199Sgrehan/**
625250199Sgrehan * @brief Send the specified buffer on the given channel
626250199Sgrehan */
627250199Sgrehanint
628250199Sgrehanhv_vmbus_channel_send_packet(
629250199Sgrehan	hv_vmbus_channel*	channel,
630250199Sgrehan	void*			buffer,
631250199Sgrehan	uint32_t		buffer_len,
632250199Sgrehan	uint64_t		request_id,
633250199Sgrehan	hv_vmbus_packet_type	type,
634250199Sgrehan	uint32_t		flags)
635250199Sgrehan{
636250199Sgrehan	int			ret = 0;
637250199Sgrehan	hv_vm_packet_descriptor	desc;
638250199Sgrehan	uint32_t		packet_len;
639250199Sgrehan	uint64_t		aligned_data;
640250199Sgrehan	uint32_t		packet_len_aligned;
641282212Swhu	boolean_t		need_sig;
642302870Ssephe	struct iovec		iov[3];
643250199Sgrehan
644250199Sgrehan	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
645250199Sgrehan	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
646250199Sgrehan	aligned_data = 0;
647250199Sgrehan
648250199Sgrehan	/* Setup the descriptor */
649250199Sgrehan	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
650250199Sgrehan	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
651250199Sgrehan			    /* in 8-bytes granularity */
652250199Sgrehan	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
653250199Sgrehan	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
654250199Sgrehan	desc.transaction_id = request_id;
655250199Sgrehan
656302870Ssephe	iov[0].iov_base = &desc;
657302870Ssephe	iov[0].iov_len = sizeof(hv_vm_packet_descriptor);
658250199Sgrehan
659302870Ssephe	iov[1].iov_base = buffer;
660302870Ssephe	iov[1].iov_len = buffer_len;
661250199Sgrehan
662302870Ssephe	iov[2].iov_base = &aligned_data;
663302870Ssephe	iov[2].iov_len = packet_len_aligned - packet_len;
664250199Sgrehan
665302870Ssephe	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
666250199Sgrehan
667250199Sgrehan	/* TODO: We should determine if this is optional */
668302731Ssephe	if (ret == 0 && need_sig)
669302731Ssephe		vmbus_chan_send_event(channel);
670250199Sgrehan
671250199Sgrehan	return (ret);
672250199Sgrehan}
673250199Sgrehan
674250199Sgrehan/**
675250199Sgrehan * @brief Send a range of single-page buffer packets using
676250199Sgrehan * a GPADL Direct packet type
677250199Sgrehan */
678250199Sgrehanint
679250199Sgrehanhv_vmbus_channel_send_packet_pagebuffer(
680250199Sgrehan	hv_vmbus_channel*	channel,
681250199Sgrehan	hv_vmbus_page_buffer	page_buffers[],
682250199Sgrehan	uint32_t		page_count,
683250199Sgrehan	void*			buffer,
684250199Sgrehan	uint32_t		buffer_len,
685250199Sgrehan	uint64_t		request_id)
686250199Sgrehan{
687250199Sgrehan
688250199Sgrehan	int					ret = 0;
689282212Swhu	boolean_t				need_sig;
690250199Sgrehan	uint32_t				packet_len;
691294705Ssephe	uint32_t				page_buflen;
692250199Sgrehan	uint32_t				packetLen_aligned;
693302870Ssephe	struct iovec				iov[4];
694250199Sgrehan	hv_vmbus_channel_packet_page_buffer	desc;
695250199Sgrehan	uint32_t				descSize;
696250199Sgrehan	uint64_t				alignedData = 0;
697250199Sgrehan
698250199Sgrehan	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
699250199Sgrehan		return (EINVAL);
700250199Sgrehan
701250199Sgrehan	/*
702250199Sgrehan	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
703250199Sgrehan	 *  is the largest size we support
704250199Sgrehan	 */
705294705Ssephe	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
706294705Ssephe	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
707294705Ssephe	packet_len = descSize + page_buflen + buffer_len;
708250199Sgrehan	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
709250199Sgrehan
710250199Sgrehan	/* Setup the descriptor */
711250199Sgrehan	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
712250199Sgrehan	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
713294705Ssephe	/* in 8-bytes granularity */
714294705Ssephe	desc.data_offset8 = (descSize + page_buflen) >> 3;
715250199Sgrehan	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
716250199Sgrehan	desc.transaction_id = request_id;
717250199Sgrehan	desc.range_count = page_count;
718250199Sgrehan
719302870Ssephe	iov[0].iov_base = &desc;
720302870Ssephe	iov[0].iov_len = descSize;
721250199Sgrehan
722302870Ssephe	iov[1].iov_base = page_buffers;
723302870Ssephe	iov[1].iov_len = page_buflen;
724250199Sgrehan
725302870Ssephe	iov[2].iov_base = buffer;
726302870Ssephe	iov[2].iov_len = buffer_len;
727250199Sgrehan
728302870Ssephe	iov[3].iov_base = &alignedData;
729302870Ssephe	iov[3].iov_len = packetLen_aligned - packet_len;
730294705Ssephe
731302870Ssephe	ret = hv_ring_buffer_write(&channel->outbound, iov, 4, &need_sig);
732250199Sgrehan
733250199Sgrehan	/* TODO: We should determine if this is optional */
734302731Ssephe	if (ret == 0 && need_sig)
735302731Ssephe		vmbus_chan_send_event(channel);
736250199Sgrehan
737250199Sgrehan	return (ret);
738250199Sgrehan}
739250199Sgrehan
740250199Sgrehan/**
741250199Sgrehan * @brief Send a multi-page buffer packet using a GPADL Direct packet type
742250199Sgrehan */
743250199Sgrehanint
744250199Sgrehanhv_vmbus_channel_send_packet_multipagebuffer(
745250199Sgrehan	hv_vmbus_channel*		channel,
746250199Sgrehan	hv_vmbus_multipage_buffer*	multi_page_buffer,
747250199Sgrehan	void*				buffer,
748250199Sgrehan	uint32_t			buffer_len,
749250199Sgrehan	uint64_t			request_id)
750250199Sgrehan{
751250199Sgrehan
752250199Sgrehan	int			ret = 0;
753250199Sgrehan	uint32_t		desc_size;
754282212Swhu	boolean_t		need_sig;
755250199Sgrehan	uint32_t		packet_len;
756250199Sgrehan	uint32_t		packet_len_aligned;
757250199Sgrehan	uint32_t		pfn_count;
758250199Sgrehan	uint64_t		aligned_data = 0;
759302870Ssephe	struct iovec		iov[3];
760250199Sgrehan	hv_vmbus_channel_packet_multipage_buffer desc;
761250199Sgrehan
762250199Sgrehan	pfn_count =
763250199Sgrehan	    HV_NUM_PAGES_SPANNED(
764250199Sgrehan		    multi_page_buffer->offset,
765250199Sgrehan		    multi_page_buffer->length);
766250199Sgrehan
767250199Sgrehan	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
768250199Sgrehan	    return (EINVAL);
769250199Sgrehan	/*
770250199Sgrehan	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
771250199Sgrehan	 * is the largest size we support
772250199Sgrehan	 */
773250199Sgrehan	desc_size =
774250199Sgrehan	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
775250199Sgrehan		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
776250199Sgrehan			sizeof(uint64_t));
777250199Sgrehan	packet_len = desc_size + buffer_len;
778250199Sgrehan	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
779250199Sgrehan
780250199Sgrehan	/*
781250199Sgrehan	 * Setup the descriptor
782250199Sgrehan	 */
783250199Sgrehan	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
784250199Sgrehan	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
785250199Sgrehan	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
786250199Sgrehan	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
787250199Sgrehan	desc.transaction_id = request_id;
788250199Sgrehan	desc.range_count = 1;
789250199Sgrehan
790250199Sgrehan	desc.range.length = multi_page_buffer->length;
791250199Sgrehan	desc.range.offset = multi_page_buffer->offset;
792250199Sgrehan
793250199Sgrehan	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
794250199Sgrehan		pfn_count * sizeof(uint64_t));
795250199Sgrehan
796302870Ssephe	iov[0].iov_base = &desc;
797302870Ssephe	iov[0].iov_len = desc_size;
798250199Sgrehan
799302870Ssephe	iov[1].iov_base = buffer;
800302870Ssephe	iov[1].iov_len = buffer_len;
801250199Sgrehan
802302870Ssephe	iov[2].iov_base = &aligned_data;
803302870Ssephe	iov[2].iov_len = packet_len_aligned - packet_len;
804250199Sgrehan
805302870Ssephe	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
806250199Sgrehan
807250199Sgrehan	/* TODO: We should determine if this is optional */
808302731Ssephe	if (ret == 0 && need_sig)
809302731Ssephe		vmbus_chan_send_event(channel);
810250199Sgrehan
811250199Sgrehan	return (ret);
812250199Sgrehan}
813250199Sgrehan
814250199Sgrehan/**
815250199Sgrehan * @brief Retrieve the user packet on the specified channel
816250199Sgrehan */
817250199Sgrehanint
818250199Sgrehanhv_vmbus_channel_recv_packet(
819250199Sgrehan	hv_vmbus_channel*	channel,
820250199Sgrehan	void*			Buffer,
821250199Sgrehan	uint32_t		buffer_len,
822250199Sgrehan	uint32_t*		buffer_actual_len,
823250199Sgrehan	uint64_t*		request_id)
824250199Sgrehan{
825250199Sgrehan	int			ret;
826250199Sgrehan	uint32_t		user_len;
827250199Sgrehan	uint32_t		packet_len;
828250199Sgrehan	hv_vm_packet_descriptor	desc;
829250199Sgrehan
830250199Sgrehan	*buffer_actual_len = 0;
831250199Sgrehan	*request_id = 0;
832250199Sgrehan
833250199Sgrehan	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
834250199Sgrehan		sizeof(hv_vm_packet_descriptor));
835250199Sgrehan	if (ret != 0)
836250199Sgrehan		return (0);
837250199Sgrehan
838250199Sgrehan	packet_len = desc.length8 << 3;
839250199Sgrehan	user_len = packet_len - (desc.data_offset8 << 3);
840250199Sgrehan
841250199Sgrehan	*buffer_actual_len = user_len;
842250199Sgrehan
843250199Sgrehan	if (user_len > buffer_len)
844250199Sgrehan		return (EINVAL);
845250199Sgrehan
846250199Sgrehan	*request_id = desc.transaction_id;
847250199Sgrehan
848250199Sgrehan	/* Copy over the packet to the user buffer */
849250199Sgrehan	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
850250199Sgrehan		(desc.data_offset8 << 3));
851250199Sgrehan
852250199Sgrehan	return (0);
853250199Sgrehan}
854250199Sgrehan
855250199Sgrehan/**
856250199Sgrehan * @brief Retrieve the raw packet on the specified channel
857250199Sgrehan */
858250199Sgrehanint
859250199Sgrehanhv_vmbus_channel_recv_packet_raw(
860250199Sgrehan	hv_vmbus_channel*	channel,
861250199Sgrehan	void*			buffer,
862250199Sgrehan	uint32_t		buffer_len,
863250199Sgrehan	uint32_t*		buffer_actual_len,
864250199Sgrehan	uint64_t*		request_id)
865250199Sgrehan{
866250199Sgrehan	int		ret;
867250199Sgrehan	uint32_t	packetLen;
868250199Sgrehan	hv_vm_packet_descriptor	desc;
869250199Sgrehan
870250199Sgrehan	*buffer_actual_len = 0;
871250199Sgrehan	*request_id = 0;
872250199Sgrehan
873250199Sgrehan	ret = hv_ring_buffer_peek(
874250199Sgrehan		&channel->inbound, &desc,
875250199Sgrehan		sizeof(hv_vm_packet_descriptor));
876250199Sgrehan
877250199Sgrehan	if (ret != 0)
878250199Sgrehan	    return (0);
879250199Sgrehan
880250199Sgrehan	packetLen = desc.length8 << 3;
881250199Sgrehan	*buffer_actual_len = packetLen;
882250199Sgrehan
883250199Sgrehan	if (packetLen > buffer_len)
884250199Sgrehan	    return (ENOBUFS);
885250199Sgrehan
886250199Sgrehan	*request_id = desc.transaction_id;
887250199Sgrehan
888250199Sgrehan	/* Copy over the entire packet to the user buffer */
889250199Sgrehan	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
890250199Sgrehan
891250199Sgrehan	return (0);
892250199Sgrehan}
893294886Ssephe
894294886Ssephestatic void
895302713Ssephevmbus_chan_task(void *xchan, int pending __unused)
896294886Ssephe{
897302713Ssephe	struct hv_vmbus_channel *chan = xchan;
898302874Ssephe	vmbus_chan_callback_t cb = chan->ch_cb;
899302874Ssephe	void *cbarg = chan->ch_cbarg;
900294886Ssephe
901302710Ssephe	/*
902302710Ssephe	 * Optimize host to guest signaling by ensuring:
903302710Ssephe	 * 1. While reading the channel, we disable interrupts from
904302710Ssephe	 *    host.
905302710Ssephe	 * 2. Ensure that we process all posted messages from the host
906302710Ssephe	 *    before returning from this callback.
907302710Ssephe	 * 3. Once we return, enable signaling from the host. Once this
908302710Ssephe	 *    state is set we check to see if additional packets are
909302710Ssephe	 *    available to read. In this case we repeat the process.
910302713Ssephe	 *
911302713Ssephe	 * NOTE: Interrupt has been disabled in the ISR.
912302710Ssephe	 */
913302713Ssephe	for (;;) {
914302713Ssephe		uint32_t left;
915294886Ssephe
916302874Ssephe		cb(cbarg);
917294886Ssephe
918302713Ssephe		left = hv_ring_buffer_read_end(&chan->inbound);
919302713Ssephe		if (left == 0) {
920302713Ssephe			/* No more data in RX bufring; done */
921302713Ssephe			break;
922302713Ssephe		}
923302713Ssephe		hv_ring_buffer_read_begin(&chan->inbound);
924302713Ssephe	}
925294886Ssephe}
926302692Ssephe
927302713Ssephestatic void
928302713Ssephevmbus_chan_task_nobatch(void *xchan, int pending __unused)
929302713Ssephe{
930302713Ssephe	struct hv_vmbus_channel *chan = xchan;
931302713Ssephe
932302874Ssephe	chan->ch_cb(chan->ch_cbarg);
933302713Ssephe}
934302713Ssephe
935302692Ssephestatic __inline void
936302692Ssephevmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
937302692Ssephe    int flag_cnt)
938302692Ssephe{
939302692Ssephe	int f;
940302692Ssephe
941302692Ssephe	for (f = 0; f < flag_cnt; ++f) {
942302806Ssephe		uint32_t chid_base;
943302692Ssephe		u_long flags;
944302806Ssephe		int chid_ofs;
945302692Ssephe
946302692Ssephe		if (event_flags[f] == 0)
947302692Ssephe			continue;
948302692Ssephe
949302692Ssephe		flags = atomic_swap_long(&event_flags[f], 0);
950302806Ssephe		chid_base = f << VMBUS_EVTFLAG_SHIFT;
951302692Ssephe
952302806Ssephe		while ((chid_ofs = ffsl(flags)) != 0) {
953302692Ssephe			struct hv_vmbus_channel *channel;
954302692Ssephe
955302806Ssephe			--chid_ofs; /* NOTE: ffsl is 1-based */
956302806Ssephe			flags &= ~(1UL << chid_ofs);
957302692Ssephe
958302806Ssephe			channel = sc->vmbus_chmap[chid_base + chid_ofs];
959302692Ssephe
960302692Ssephe			/* if channel is closed or closing */
961302874Ssephe			if (channel == NULL || channel->ch_tq == NULL)
962302692Ssephe				continue;
963302692Ssephe
964302709Ssephe			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
965302692Ssephe				hv_ring_buffer_read_begin(&channel->inbound);
966302874Ssephe			taskqueue_enqueue(channel->ch_tq, &channel->ch_task);
967302692Ssephe		}
968302692Ssephe	}
969302692Ssephe}
970302692Ssephe
971302692Ssephevoid
972302692Ssephevmbus_event_proc(struct vmbus_softc *sc, int cpu)
973302692Ssephe{
974302692Ssephe	struct vmbus_evtflags *eventf;
975302692Ssephe
976302692Ssephe	/*
977302692Ssephe	 * On Host with Win8 or above, the event page can be checked directly
978302692Ssephe	 * to get the id of the channel that has the pending interrupt.
979302692Ssephe	 */
980302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
981302692Ssephe	vmbus_event_flags_proc(sc, eventf->evt_flags,
982302692Ssephe	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
983302692Ssephe}
984302692Ssephe
985302692Ssephevoid
986302692Ssephevmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
987302692Ssephe{
988302692Ssephe	struct vmbus_evtflags *eventf;
989302692Ssephe
990302692Ssephe	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
991302692Ssephe	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
992302692Ssephe		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
993302692Ssephe		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
994302692Ssephe	}
995302692Ssephe}
996302692Ssephe
997302692Ssephestatic void
998302692Ssephevmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
999302692Ssephe    const struct hv_vmbus_channel *chan)
1000302692Ssephe{
1001302692Ssephe	volatile int *flag_cnt_ptr;
1002302692Ssephe	int flag_cnt;
1003302692Ssephe
1004302693Ssephe	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
1005302873Ssephe	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
1006302692Ssephe
1007302692Ssephe	for (;;) {
1008302692Ssephe		int old_flag_cnt;
1009302692Ssephe
1010302692Ssephe		old_flag_cnt = *flag_cnt_ptr;
1011302692Ssephe		if (old_flag_cnt >= flag_cnt)
1012302692Ssephe			break;
1013302692Ssephe		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
1014302692Ssephe			if (bootverbose) {
1015302692Ssephe				device_printf(sc->vmbus_dev,
1016302692Ssephe				    "channel%u update cpu%d flag_cnt to %d\n",
1017302873Ssephe				    chan->ch_id, chan->ch_cpuid, flag_cnt);
1018302692Ssephe			}
1019302692Ssephe			break;
1020302692Ssephe		}
1021302692Ssephe	}
1022302692Ssephe}
1023302864Ssephe
1024302864Ssephestatic struct hv_vmbus_channel *
1025302864Ssephevmbus_chan_alloc(struct vmbus_softc *sc)
1026302864Ssephe{
1027302864Ssephe	struct hv_vmbus_channel *chan;
1028302864Ssephe
1029302864Ssephe	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
1030302864Ssephe
1031302864Ssephe	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
1032302864Ssephe	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
1033302864Ssephe	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
1034302864Ssephe	if (chan->ch_monprm == NULL) {
1035302864Ssephe		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
1036302864Ssephe		free(chan, M_DEVBUF);
1037302864Ssephe		return NULL;
1038302864Ssephe	}
1039302864Ssephe
1040302864Ssephe	chan->vmbus_sc = sc;
1041302864Ssephe	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
1042302864Ssephe	TAILQ_INIT(&chan->ch_subchans);
1043302864Ssephe	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
1044302864Ssephe
1045302864Ssephe	return chan;
1046302864Ssephe}
1047302864Ssephe
1048302864Ssephestatic void
1049302864Ssephevmbus_chan_free(struct hv_vmbus_channel *chan)
1050302864Ssephe{
1051302864Ssephe	/* TODO: assert sub-channel list is empty */
1052302864Ssephe	/* TODO: asset no longer on the primary channel's sub-channel list */
1053302864Ssephe	/* TODO: asset no longer on the vmbus channel list */
1054302864Ssephe	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
1055302864Ssephe	mtx_destroy(&chan->ch_subchan_lock);
1056302864Ssephe	free(chan, M_DEVBUF);
1057302864Ssephe}
1058302864Ssephe
1059302864Ssephestatic int
1060302864Ssephevmbus_chan_add(struct hv_vmbus_channel *newchan)
1061302864Ssephe{
1062302864Ssephe	struct vmbus_softc *sc = newchan->vmbus_sc;
1063302864Ssephe	struct hv_vmbus_channel *prichan;
1064302864Ssephe
1065302864Ssephe	if (newchan->ch_id == 0) {
1066302864Ssephe		/*
1067302864Ssephe		 * XXX
1068302864Ssephe		 * Chan0 will neither be processed nor should be offered;
1069302864Ssephe		 * skip it.
1070302864Ssephe		 */
1071302864Ssephe		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
1072302864Ssephe		return EINVAL;
1073302864Ssephe	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
1074302864Ssephe		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
1075302864Ssephe		    newchan->ch_id);
1076302864Ssephe		return EINVAL;
1077302864Ssephe	}
1078302864Ssephe	sc->vmbus_chmap[newchan->ch_id] = newchan;
1079302864Ssephe
1080302864Ssephe	if (bootverbose) {
1081302864Ssephe		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
1082302864Ssephe		    newchan->ch_id, newchan->ch_subidx);
1083302864Ssephe	}
1084302864Ssephe
1085302864Ssephe	mtx_lock(&sc->vmbus_prichan_lock);
1086302864Ssephe	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
1087302864Ssephe		/*
1088302864Ssephe		 * Sub-channel will have the same type GUID and instance
1089302864Ssephe		 * GUID as its primary channel.
1090302864Ssephe		 */
1091302864Ssephe		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1092302864Ssephe		    sizeof(struct hyperv_guid)) == 0 &&
1093302864Ssephe		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1094302864Ssephe		    sizeof(struct hyperv_guid)) == 0)
1095302864Ssephe			break;
1096302864Ssephe	}
1097302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1098302864Ssephe		if (prichan == NULL) {
1099302864Ssephe			/* Install the new primary channel */
1100302864Ssephe			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1101302864Ssephe			    ch_prilink);
1102302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1103302864Ssephe			return 0;
1104302864Ssephe		} else {
1105302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1106302864Ssephe			device_printf(sc->vmbus_dev, "duplicated primary "
1107302864Ssephe			    "chan%u\n", newchan->ch_id);
1108302864Ssephe			return EINVAL;
1109302864Ssephe		}
1110302864Ssephe	} else { /* Sub-channel */
1111302864Ssephe		if (prichan == NULL) {
1112302864Ssephe			mtx_unlock(&sc->vmbus_prichan_lock);
1113302864Ssephe			device_printf(sc->vmbus_dev, "no primary chan for "
1114302864Ssephe			    "chan%u\n", newchan->ch_id);
1115302864Ssephe			return EINVAL;
1116302864Ssephe		}
1117302864Ssephe		/*
1118302864Ssephe		 * Found the primary channel for this sub-channel and
1119302864Ssephe		 * move on.
1120302864Ssephe		 *
1121302864Ssephe		 * XXX refcnt prichan
1122302864Ssephe		 */
1123302864Ssephe	}
1124302864Ssephe	mtx_unlock(&sc->vmbus_prichan_lock);
1125302864Ssephe
1126302864Ssephe	/*
1127302864Ssephe	 * This is a sub-channel; link it with the primary channel.
1128302864Ssephe	 */
1129302864Ssephe	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1130302864Ssephe	    ("new channel is not sub-channel"));
1131302864Ssephe	KASSERT(prichan != NULL, ("no primary channel"));
1132302864Ssephe
1133302864Ssephe	newchan->ch_prichan = prichan;
1134302864Ssephe	newchan->ch_dev = prichan->ch_dev;
1135302864Ssephe
1136302864Ssephe	mtx_lock(&prichan->ch_subchan_lock);
1137302864Ssephe	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1138302864Ssephe	/*
1139302864Ssephe	 * Bump up sub-channel count and notify anyone that is
1140302864Ssephe	 * interested in this sub-channel, after this sub-channel
1141302864Ssephe	 * is setup.
1142302864Ssephe	 */
1143302864Ssephe	prichan->ch_subchan_cnt++;
1144302864Ssephe	mtx_unlock(&prichan->ch_subchan_lock);
1145302864Ssephe	wakeup(prichan);
1146302864Ssephe
1147302864Ssephe	return 0;
1148302864Ssephe}
1149302864Ssephe
1150302864Ssephevoid
1151302864Ssephevmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1152302864Ssephe{
1153302864Ssephe	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1154302864Ssephe
1155302864Ssephe	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1156302864Ssephe	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1157302864Ssephe		/* Only cpu0 is supported */
1158302864Ssephe		cpu = 0;
1159302864Ssephe	}
1160302864Ssephe
1161302873Ssephe	chan->ch_cpuid = cpu;
1162302873Ssephe	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1163302864Ssephe
1164302864Ssephe	if (bootverbose) {
1165302864Ssephe		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1166302873Ssephe		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
1167302864Ssephe	}
1168302864Ssephe}
1169302864Ssephe
1170302864Ssephevoid
1171302864Ssephevmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
1172302864Ssephe{
1173302864Ssephe	static uint32_t vmbus_chan_nextcpu;
1174302864Ssephe	int cpu;
1175302864Ssephe
1176302864Ssephe	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1177302864Ssephe	vmbus_channel_cpu_set(chan, cpu);
1178302864Ssephe}
1179302864Ssephe
1180302864Ssephestatic void
1181302864Ssephevmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1182302864Ssephe{
1183302864Ssephe	/*
1184302864Ssephe	 * By default, pin the channel to cpu0.  Devices having
1185302864Ssephe	 * special channel-cpu mapping requirement should call
1186302864Ssephe	 * vmbus_channel_cpu_{set,rr}().
1187302864Ssephe	 */
1188302864Ssephe	vmbus_channel_cpu_set(chan, 0);
1189302864Ssephe}
1190302864Ssephe
1191302864Ssephestatic void
1192302864Ssephevmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1193302864Ssephe    const struct vmbus_message *msg)
1194302864Ssephe{
1195302864Ssephe	const struct vmbus_chanmsg_choffer *offer;
1196302864Ssephe	struct hv_vmbus_channel *chan;
1197302864Ssephe	int error;
1198302864Ssephe
1199302864Ssephe	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1200302864Ssephe
1201302864Ssephe	chan = vmbus_chan_alloc(sc);
1202302864Ssephe	if (chan == NULL) {
1203302864Ssephe		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1204302864Ssephe		    offer->chm_chanid);
1205302864Ssephe		return;
1206302864Ssephe	}
1207302864Ssephe
1208302864Ssephe	chan->ch_id = offer->chm_chanid;
1209302864Ssephe	chan->ch_subidx = offer->chm_subidx;
1210302864Ssephe	chan->ch_guid_type = offer->chm_chtype;
1211302864Ssephe	chan->ch_guid_inst = offer->chm_chinst;
1212302864Ssephe
1213302864Ssephe	/* Batch reading is on by default */
1214302864Ssephe	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1215302864Ssephe
1216302864Ssephe	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1217302864Ssephe	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1218302864Ssephe		chan->ch_monprm->mp_connid = offer->chm_connid;
1219302864Ssephe
1220302864Ssephe	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1221302864Ssephe		/*
1222302864Ssephe		 * Setup MNF stuffs.
1223302864Ssephe		 */
1224302864Ssephe		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1225302864Ssephe		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1226302864Ssephe		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1227302864Ssephe			panic("invalid monitor trigger %u", offer->chm_montrig);
1228302864Ssephe		chan->ch_montrig_mask =
1229302864Ssephe		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1230302864Ssephe	}
1231302864Ssephe
1232302864Ssephe	/* Select default cpu for this channel. */
1233302864Ssephe	vmbus_chan_cpu_default(chan);
1234302864Ssephe
1235302864Ssephe	error = vmbus_chan_add(chan);
1236302864Ssephe	if (error) {
1237302864Ssephe		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1238302864Ssephe		    chan->ch_id, error);
1239302864Ssephe		vmbus_chan_free(chan);
1240302864Ssephe		return;
1241302864Ssephe	}
1242302864Ssephe
1243302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1244302864Ssephe		/*
1245302864Ssephe		 * Add device for this primary channel.
1246302864Ssephe		 *
1247302864Ssephe		 * NOTE:
1248302864Ssephe		 * Error is ignored here; don't have much to do if error
1249302864Ssephe		 * really happens.
1250302864Ssephe		 */
1251302868Ssephe		vmbus_add_child(chan);
1252302864Ssephe	}
1253302864Ssephe}
1254302864Ssephe
1255302864Ssephe/*
1256302864Ssephe * XXX pretty broken; need rework.
1257302864Ssephe */
1258302864Ssephestatic void
1259302864Ssephevmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1260302864Ssephe    const struct vmbus_message *msg)
1261302864Ssephe{
1262302864Ssephe	const struct vmbus_chanmsg_chrescind *note;
1263302864Ssephe	struct hv_vmbus_channel *chan;
1264302864Ssephe
1265302864Ssephe	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1266302864Ssephe	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1267302864Ssephe		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1268302864Ssephe		    note->chm_chanid);
1269302864Ssephe		return;
1270302864Ssephe	}
1271302864Ssephe
1272302864Ssephe	if (bootverbose) {
1273302864Ssephe		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1274302864Ssephe		    note->chm_chanid);
1275302864Ssephe	}
1276302864Ssephe
1277302864Ssephe	chan = sc->vmbus_chmap[note->chm_chanid];
1278302864Ssephe	if (chan == NULL)
1279302864Ssephe		return;
1280302864Ssephe	sc->vmbus_chmap[note->chm_chanid] = NULL;
1281302864Ssephe
1282302864Ssephe	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1283302864Ssephe}
1284302864Ssephe
1285302864Ssephestatic void
1286302864Ssephevmbus_chan_detach_task(void *xchan, int pending __unused)
1287302864Ssephe{
1288302864Ssephe	struct hv_vmbus_channel *chan = xchan;
1289302864Ssephe
1290302864Ssephe	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1291302864Ssephe		/* Only primary channel owns the device */
1292302868Ssephe		vmbus_delete_child(chan);
1293302864Ssephe		/* NOTE: DO NOT free primary channel for now */
1294302864Ssephe	} else {
1295302864Ssephe		struct vmbus_softc *sc = chan->vmbus_sc;
1296302864Ssephe		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1297302864Ssephe		struct vmbus_chanmsg_chfree *req;
1298302864Ssephe		struct vmbus_msghc *mh;
1299302864Ssephe		int error;
1300302864Ssephe
1301302864Ssephe		mh = vmbus_msghc_get(sc, sizeof(*req));
1302302864Ssephe		if (mh == NULL) {
1303302864Ssephe			device_printf(sc->vmbus_dev,
1304302864Ssephe			    "can not get msg hypercall for chfree(chan%u)\n",
1305302864Ssephe			    chan->ch_id);
1306302864Ssephe			goto remove;
1307302864Ssephe		}
1308302864Ssephe
1309302864Ssephe		req = vmbus_msghc_dataptr(mh);
1310302864Ssephe		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1311302864Ssephe		req->chm_chanid = chan->ch_id;
1312302864Ssephe
1313302864Ssephe		error = vmbus_msghc_exec_noresult(mh);
1314302864Ssephe		vmbus_msghc_put(sc, mh);
1315302864Ssephe
1316302864Ssephe		if (error) {
1317302864Ssephe			device_printf(sc->vmbus_dev,
1318302864Ssephe			    "chfree(chan%u) failed: %d",
1319302864Ssephe			    chan->ch_id, error);
1320302864Ssephe			/* NOTE: Move on! */
1321302864Ssephe		} else {
1322302864Ssephe			if (bootverbose) {
1323302864Ssephe				device_printf(sc->vmbus_dev, "chan%u freed\n",
1324302864Ssephe				    chan->ch_id);
1325302864Ssephe			}
1326302864Ssephe		}
1327302864Ssepheremove:
1328302864Ssephe		mtx_lock(&pri_chan->ch_subchan_lock);
1329302864Ssephe		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1330302864Ssephe		KASSERT(pri_chan->ch_subchan_cnt > 0,
1331302864Ssephe		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1332302864Ssephe		pri_chan->ch_subchan_cnt--;
1333302864Ssephe		mtx_unlock(&pri_chan->ch_subchan_lock);
1334302864Ssephe		wakeup(pri_chan);
1335302864Ssephe
1336302864Ssephe		vmbus_chan_free(chan);
1337302864Ssephe	}
1338302864Ssephe}
1339302864Ssephe
1340302864Ssephe/*
1341302864Ssephe * Detach all devices and destroy the corresponding primary channels.
1342302864Ssephe */
1343302864Ssephevoid
1344302864Ssephevmbus_chan_destroy_all(struct vmbus_softc *sc)
1345302864Ssephe{
1346302864Ssephe	struct hv_vmbus_channel *chan;
1347302864Ssephe
1348302864Ssephe	mtx_lock(&sc->vmbus_prichan_lock);
1349302864Ssephe	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1350302864Ssephe		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1351302864Ssephe		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1352302864Ssephe		mtx_unlock(&sc->vmbus_prichan_lock);
1353302864Ssephe
1354302868Ssephe		vmbus_delete_child(chan);
1355302864Ssephe		vmbus_chan_free(chan);
1356302864Ssephe
1357302864Ssephe		mtx_lock(&sc->vmbus_prichan_lock);
1358302864Ssephe	}
1359302864Ssephe	bzero(sc->vmbus_chmap,
1360302864Ssephe	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1361302864Ssephe	mtx_unlock(&sc->vmbus_prichan_lock);
1362302864Ssephe}
1363302864Ssephe
1364302864Ssephe/**
1365302864Ssephe * @brief Select the best outgoing channel
1366302864Ssephe *
1367302864Ssephe * The channel whose vcpu binding is closest to the currect vcpu will
1368302864Ssephe * be selected.
1369302864Ssephe * If no multi-channel, always select primary channel
1370302864Ssephe *
1371302864Ssephe * @param primary - primary channel
1372302864Ssephe */
1373302864Ssephestruct hv_vmbus_channel *
1374302864Ssephevmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1375302864Ssephe{
1376302864Ssephe	hv_vmbus_channel *new_channel = NULL;
1377302864Ssephe	hv_vmbus_channel *outgoing_channel = primary;
1378302864Ssephe	int old_cpu_distance = 0;
1379302864Ssephe	int new_cpu_distance = 0;
1380302864Ssephe	int cur_vcpu = 0;
1381302864Ssephe	int smp_pro_id = PCPU_GET(cpuid);
1382302864Ssephe
1383302864Ssephe	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1384302864Ssephe		return outgoing_channel;
1385302864Ssephe	}
1386302864Ssephe
1387302864Ssephe	if (smp_pro_id >= MAXCPU) {
1388302864Ssephe		return outgoing_channel;
1389302864Ssephe	}
1390302864Ssephe
1391302864Ssephe	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1392302864Ssephe
1393302864Ssephe	/* XXX need lock */
1394302864Ssephe	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1395302864Ssephe		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1396302864Ssephe			continue;
1397302864Ssephe		}
1398302864Ssephe
1399302873Ssephe		if (new_channel->ch_vcpuid == cur_vcpu){
1400302864Ssephe			return new_channel;
1401302864Ssephe		}
1402302864Ssephe
1403302873Ssephe		old_cpu_distance = ((outgoing_channel->ch_vcpuid > cur_vcpu) ?
1404302873Ssephe		    (outgoing_channel->ch_vcpuid - cur_vcpu) :
1405302873Ssephe		    (cur_vcpu - outgoing_channel->ch_vcpuid));
1406302864Ssephe
1407302873Ssephe		new_cpu_distance = ((new_channel->ch_vcpuid > cur_vcpu) ?
1408302873Ssephe		    (new_channel->ch_vcpuid - cur_vcpu) :
1409302873Ssephe		    (cur_vcpu - new_channel->ch_vcpuid));
1410302864Ssephe
1411302864Ssephe		if (old_cpu_distance < new_cpu_distance) {
1412302864Ssephe			continue;
1413302864Ssephe		}
1414302864Ssephe
1415302864Ssephe		outgoing_channel = new_channel;
1416302864Ssephe	}
1417302864Ssephe
1418302864Ssephe	return(outgoing_channel);
1419302864Ssephe}
1420302864Ssephe
1421302864Ssephestruct hv_vmbus_channel **
1422302864Ssephevmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1423302864Ssephe{
1424302864Ssephe	struct hv_vmbus_channel **ret, *chan;
1425302864Ssephe	int i;
1426302864Ssephe
1427302864Ssephe	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1428302864Ssephe	    M_WAITOK);
1429302864Ssephe
1430302864Ssephe	mtx_lock(&pri_chan->ch_subchan_lock);
1431302864Ssephe
1432302864Ssephe	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1433302864Ssephe		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1434302864Ssephe
1435302864Ssephe	i = 0;
1436302864Ssephe	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1437302864Ssephe		/* TODO: refcnt chan */
1438302864Ssephe		ret[i] = chan;
1439302864Ssephe
1440302864Ssephe		++i;
1441302864Ssephe		if (i == subchan_cnt)
1442302864Ssephe			break;
1443302864Ssephe	}
1444302864Ssephe	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1445302864Ssephe	    pri_chan->ch_subchan_cnt, subchan_cnt));
1446302864Ssephe
1447302864Ssephe	mtx_unlock(&pri_chan->ch_subchan_lock);
1448302864Ssephe
1449302864Ssephe	return ret;
1450302864Ssephe}
1451302864Ssephe
1452302864Ssephevoid
1453302864Ssephevmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1454302864Ssephe{
1455302864Ssephe
1456302864Ssephe	free(subchan, M_TEMP);
1457302864Ssephe}
1458302864Ssephe
1459302864Ssephevoid
1460302864Ssephevmbus_drain_subchan(struct hv_vmbus_channel *pri_chan)
1461302864Ssephe{
1462302864Ssephe	mtx_lock(&pri_chan->ch_subchan_lock);
1463302864Ssephe	while (pri_chan->ch_subchan_cnt > 0)
1464302864Ssephe		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1465302864Ssephe	mtx_unlock(&pri_chan->ch_subchan_lock);
1466302864Ssephe}
1467302864Ssephe
1468302864Ssephevoid
1469302864Ssephevmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1470302864Ssephe{
1471302864Ssephe	vmbus_chanmsg_proc_t msg_proc;
1472302864Ssephe	uint32_t msg_type;
1473302864Ssephe
1474302864Ssephe	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1475302864Ssephe	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1476302864Ssephe	    ("invalid message type %u", msg_type));
1477302864Ssephe
1478302864Ssephe	msg_proc = vmbus_chan_msgprocs[msg_type];
1479302864Ssephe	if (msg_proc != NULL)
1480302864Ssephe		msg_proc(sc, msg);
1481302864Ssephe}
1482