vmbus_chan.c revision 302892
1228753Smm/*-
2228753Smm * Copyright (c) 2009-2012,2016 Microsoft Corp.
3228753Smm * Copyright (c) 2012 NetApp Inc.
4228753Smm * Copyright (c) 2012 Citrix Inc.
5228753Smm * All rights reserved.
6228753Smm *
7228753Smm * Redistribution and use in source and binary forms, with or without
8228753Smm * modification, are permitted provided that the following conditions
9228753Smm * are met:
10228753Smm * 1. Redistributions of source code must retain the above copyright
11228753Smm *    notice unmodified, this list of conditions, and the following
12228753Smm *    disclaimer.
13228753Smm * 2. Redistributions in binary form must reproduce the above copyright
14228753Smm *    notice, this list of conditions and the following disclaimer in the
15228753Smm *    documentation and/or other materials provided with the distribution.
16228753Smm *
17228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20228753Smm * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26228763Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27228753Smm */
28228753Smm
29228753Smm#include <sys/cdefs.h>
30228753Smm__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302892 2016-07-15 08:40:22Z sephe $");
31228753Smm
32228753Smm#include <sys/param.h>
33228753Smm#include <sys/kernel.h>
34228753Smm#include <sys/malloc.h>
35228753Smm#include <sys/systm.h>
36228753Smm#include <sys/mbuf.h>
37228753Smm#include <sys/lock.h>
38228753Smm#include <sys/mutex.h>
39228753Smm#include <sys/sysctl.h>
40228753Smm
41228753Smm#include <machine/atomic.h>
42228753Smm#include <machine/bus.h>
43228753Smm
44228753Smm#include <vm/vm.h>
45228753Smm#include <vm/vm_param.h>
46228753Smm#include <vm/pmap.h>
47228753Smm
48228753Smm#include <dev/hyperv/include/hyperv_busdma.h>
49228753Smm#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50228753Smm#include <dev/hyperv/vmbus/hyperv_var.h>
51228753Smm#include <dev/hyperv/vmbus/vmbus_reg.h>
52228753Smm#include <dev/hyperv/vmbus/vmbus_var.h>
53228753Smm
54228753Smmstatic void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
55228753Smmstatic void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56228753Smm		    const struct hv_vmbus_channel *);
57228753Smm
58228753Smmstatic void	vmbus_chan_task(void *, int);
59228753Smmstatic void	vmbus_chan_task_nobatch(void *, int);
60228753Smmstatic void	vmbus_chan_detach_task(void *, int);
61228753Smm
62228753Smmstatic void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63228753Smm		    const struct vmbus_message *);
64228753Smmstatic void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65248616Smm		    const struct vmbus_message *);
66228753Smm
67228753Smm/*
68228753Smm * Vmbus channel message processing.
69228753Smm */
70228753Smmstatic const vmbus_chanmsg_proc_t
71228753Smmvmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72228753Smm	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73228753Smm	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74228753Smm
75228753Smm	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76228753Smm	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77228753Smm	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78228753Smm};
79228753Smm
80228753Smm/**
81228753Smm *  @brief Trigger an event notification on the specified channel
82228753Smm */
83228753Smmstatic void
84228753Smmvmbus_chan_send_event(hv_vmbus_channel *channel)
85228753Smm{
86228753Smm	struct vmbus_softc *sc = channel->vmbus_sc;
87228753Smm	uint32_t chanid = channel->ch_id;
88228753Smm
89228753Smm	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90228753Smm	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91228753Smm
92228753Smm	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93228753Smm		atomic_set_int(
94228753Smm		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
95228753Smm		channel->ch_montrig_mask);
96228753Smm	} else {
97228753Smm		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
98228753Smm	}
99228753Smm}
100228753Smm
101228753Smmstatic int
102228753Smmvmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
103232153Smm{
104232153Smm	struct hv_vmbus_channel *chan = arg1;
105228753Smm	int mnf = 0;
106228753Smm
107228753Smm	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108228753Smm		mnf = 1;
109228753Smm	return sysctl_handle_int(oidp, &mnf, 0, req);
110228753Smm}
111232153Smm
112228753Smmstatic void
113228753Smmvmbus_chan_sysctl_create(struct hv_vmbus_channel *chan)
114228753Smm{
115228753Smm	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
116228753Smm	struct sysctl_ctx_list *ctx;
117228753Smm	uint32_t ch_id;
118228753Smm	char name[16];
119228753Smm
120228753Smm	/*
121228753Smm	 * Add sysctl nodes related to this channel to this
122228753Smm	 * channel's sysctl ctx, so that they can be destroyed
123228753Smm	 * independently upon close of this channel, which can
124228753Smm	 * happen even if the device is not detached.
125228753Smm	 */
126228753Smm	ctx = &chan->ch_sysctl_ctx;
127228753Smm	sysctl_ctx_init(ctx);
128228753Smm
129228753Smm	/*
130228753Smm	 * Create dev.NAME.UNIT.channel tree.
131228753Smm	 */
132228753Smm	ch_tree = SYSCTL_ADD_NODE(ctx,
133228753Smm	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
134228753Smm	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
135228753Smm	if (ch_tree == NULL)
136228753Smm		return;
137232153Smm
138232153Smm	/*
139228753Smm	 * Create dev.NAME.UNIT.channel.CHANID tree.
140228753Smm	 */
141228753Smm	if (VMBUS_CHAN_ISPRIMARY(chan))
142228753Smm		ch_id = chan->ch_id;
143228753Smm	else
144228753Smm		ch_id = chan->ch_prichan->ch_id;
145228753Smm	snprintf(name, sizeof(name), "%d", ch_id);
146228753Smm	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
147228753Smm	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
148228753Smm	if (chid_tree == NULL)
149228753Smm		return;
150228753Smm
151228753Smm	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
152228753Smm		/*
153228753Smm		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
154228753Smm		 */
155228753Smm		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
156228753Smm		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
157228753Smm		if (ch_tree == NULL)
158228753Smm			return;
159228753Smm
160228753Smm		/*
161228753Smm		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
162228753Smm		 *
163228753Smm		 * NOTE:
164228753Smm		 * chid_tree is changed to this new sysctl tree.
165228753Smm		 */
166228753Smm		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
167		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
168		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
169		if (chid_tree == NULL)
170			return;
171
172		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
173		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
174	}
175
176	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
177	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
178	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
179	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
180	    chan, 0, vmbus_chan_sysctl_mnf, "I",
181	    "has monitor notification facilities");
182
183	/*
184	 * Create sysctl tree for RX bufring.
185	 */
186	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
187	    "in", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
188	if (br_tree != NULL) {
189		hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(br_tree),
190		    &chan->inbound, "inbound ring buffer stats");
191	}
192
193	/*
194	 * Create sysctl tree for TX bufring.
195	 */
196	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
197	    "out", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
198	if (br_tree != NULL) {
199		hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(br_tree),
200		    &chan->outbound, "outbound ring buffer stats");
201	}
202}
203
204/**
205 * @brief Open the specified channel
206 */
207int
208hv_vmbus_channel_open(
209	hv_vmbus_channel*		new_channel,
210	uint32_t			send_ring_buffer_size,
211	uint32_t			recv_ring_buffer_size,
212	void*				user_data,
213	uint32_t			user_data_len,
214	vmbus_chan_callback_t		cb,
215	void				*cbarg)
216{
217	struct vmbus_softc *sc = new_channel->vmbus_sc;
218	const struct vmbus_chanmsg_chopen_resp *resp;
219	const struct vmbus_message *msg;
220	struct vmbus_chanmsg_chopen *req;
221	struct vmbus_msghc *mh;
222	uint32_t status;
223	int ret = 0;
224	uint8_t *br;
225
226	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
227		device_printf(sc->vmbus_dev,
228		    "invalid udata len %u for chan%u\n",
229		    user_data_len, new_channel->ch_id);
230		return EINVAL;
231	}
232	KASSERT((send_ring_buffer_size & PAGE_MASK) == 0,
233	    ("send bufring size is not multiple page"));
234	KASSERT((recv_ring_buffer_size & PAGE_MASK) == 0,
235	    ("recv bufring size is not multiple page"));
236
237	if (atomic_testandset_int(&new_channel->ch_stflags,
238	    VMBUS_CHAN_ST_OPENED_SHIFT))
239		panic("double-open chan%u", new_channel->ch_id);
240
241	new_channel->ch_cb = cb;
242	new_channel->ch_cbarg = cbarg;
243
244	vmbus_chan_update_evtflagcnt(sc, new_channel);
245
246	new_channel->ch_tq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
247	    new_channel->ch_cpuid);
248	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
249		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task,
250		    new_channel);
251	} else {
252		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task_nobatch,
253		    new_channel);
254	}
255
256	/*
257	 * Allocate the TX+RX bufrings.
258	 * XXX should use ch_dev dtag
259	 */
260	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
261	    PAGE_SIZE, 0, send_ring_buffer_size + recv_ring_buffer_size,
262	    &new_channel->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
263	if (br == NULL) {
264		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
265		ret = ENOMEM;
266		goto failed;
267	}
268	new_channel->ch_bufring = br;
269
270	/* TX bufring comes first */
271	hv_vmbus_ring_buffer_init(&new_channel->outbound,
272	    br, send_ring_buffer_size);
273	/* RX bufring immediately follows TX bufring */
274	hv_vmbus_ring_buffer_init(&new_channel->inbound,
275	    br + send_ring_buffer_size, recv_ring_buffer_size);
276
277	/* Create sysctl tree for this channel */
278	vmbus_chan_sysctl_create(new_channel);
279
280	/*
281	 * Connect the bufrings, both RX and TX, to this channel.
282	 */
283	ret = vmbus_chan_gpadl_connect(new_channel,
284		new_channel->ch_bufring_dma.hv_paddr,
285		send_ring_buffer_size + recv_ring_buffer_size,
286		&new_channel->ch_bufring_gpadl);
287	if (ret != 0) {
288		device_printf(sc->vmbus_dev,
289		    "failed to connect bufring GPADL to chan%u\n",
290		    new_channel->ch_id);
291		goto failed;
292	}
293
294	/*
295	 * Open channel w/ the bufring GPADL on the target CPU.
296	 */
297	mh = vmbus_msghc_get(sc, sizeof(*req));
298	if (mh == NULL) {
299		device_printf(sc->vmbus_dev,
300		    "can not get msg hypercall for chopen(chan%u)\n",
301		    new_channel->ch_id);
302		ret = ENXIO;
303		goto failed;
304	}
305
306	req = vmbus_msghc_dataptr(mh);
307	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
308	req->chm_chanid = new_channel->ch_id;
309	req->chm_openid = new_channel->ch_id;
310	req->chm_gpadl = new_channel->ch_bufring_gpadl;
311	req->chm_vcpuid = new_channel->ch_vcpuid;
312	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
313	if (user_data_len)
314		memcpy(req->chm_udata, user_data, user_data_len);
315
316	ret = vmbus_msghc_exec(sc, mh);
317	if (ret != 0) {
318		device_printf(sc->vmbus_dev,
319		    "chopen(chan%u) msg hypercall exec failed: %d\n",
320		    new_channel->ch_id, ret);
321		vmbus_msghc_put(sc, mh);
322		goto failed;
323	}
324
325	msg = vmbus_msghc_wait_result(sc, mh);
326	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
327	status = resp->chm_status;
328
329	vmbus_msghc_put(sc, mh);
330
331	if (status == 0) {
332		if (bootverbose) {
333			device_printf(sc->vmbus_dev, "chan%u opened\n",
334			    new_channel->ch_id);
335		}
336		return 0;
337	}
338
339	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
340	    new_channel->ch_id);
341	ret = ENXIO;
342
343failed:
344	if (new_channel->ch_bufring_gpadl) {
345		vmbus_chan_gpadl_disconnect(new_channel,
346		    new_channel->ch_bufring_gpadl);
347		new_channel->ch_bufring_gpadl = 0;
348	}
349	if (new_channel->ch_bufring != NULL) {
350		hyperv_dmamem_free(&new_channel->ch_bufring_dma,
351		    new_channel->ch_bufring);
352		new_channel->ch_bufring = NULL;
353	}
354	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
355	return ret;
356}
357
358int
359vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
360    int size, uint32_t *gpadl0)
361{
362	struct vmbus_softc *sc = chan->vmbus_sc;
363	struct vmbus_msghc *mh;
364	struct vmbus_chanmsg_gpadl_conn *req;
365	const struct vmbus_message *msg;
366	size_t reqsz;
367	uint32_t gpadl, status;
368	int page_count, range_len, i, cnt, error;
369	uint64_t page_id;
370
371	/*
372	 * Preliminary checks.
373	 */
374
375	KASSERT((size & PAGE_MASK) == 0,
376	    ("invalid GPA size %d, not multiple page size", size));
377	page_count = size >> PAGE_SHIFT;
378
379	KASSERT((paddr & PAGE_MASK) == 0,
380	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
381	page_id = paddr >> PAGE_SHIFT;
382
383	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
384	/*
385	 * We don't support multiple GPA ranges.
386	 */
387	if (range_len > UINT16_MAX) {
388		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
389		    page_count);
390		return EOPNOTSUPP;
391	}
392
393	/*
394	 * Allocate GPADL id.
395	 */
396	gpadl = vmbus_gpadl_alloc(sc);
397	*gpadl0 = gpadl;
398
399	/*
400	 * Connect this GPADL to the target channel.
401	 *
402	 * NOTE:
403	 * Since each message can only hold small set of page
404	 * addresses, several messages may be required to
405	 * complete the connection.
406	 */
407	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
408		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
409	else
410		cnt = page_count;
411	page_count -= cnt;
412
413	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
414	    chm_range.gpa_page[cnt]);
415	mh = vmbus_msghc_get(sc, reqsz);
416	if (mh == NULL) {
417		device_printf(sc->vmbus_dev,
418		    "can not get msg hypercall for gpadl->chan%u\n",
419		    chan->ch_id);
420		return EIO;
421	}
422
423	req = vmbus_msghc_dataptr(mh);
424	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
425	req->chm_chanid = chan->ch_id;
426	req->chm_gpadl = gpadl;
427	req->chm_range_len = range_len;
428	req->chm_range_cnt = 1;
429	req->chm_range.gpa_len = size;
430	req->chm_range.gpa_ofs = 0;
431	for (i = 0; i < cnt; ++i)
432		req->chm_range.gpa_page[i] = page_id++;
433
434	error = vmbus_msghc_exec(sc, mh);
435	if (error) {
436		device_printf(sc->vmbus_dev,
437		    "gpadl->chan%u msg hypercall exec failed: %d\n",
438		    chan->ch_id, error);
439		vmbus_msghc_put(sc, mh);
440		return error;
441	}
442
443	while (page_count > 0) {
444		struct vmbus_chanmsg_gpadl_subconn *subreq;
445
446		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
447			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
448		else
449			cnt = page_count;
450		page_count -= cnt;
451
452		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
453		    chm_gpa_page[cnt]);
454		vmbus_msghc_reset(mh, reqsz);
455
456		subreq = vmbus_msghc_dataptr(mh);
457		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
458		subreq->chm_gpadl = gpadl;
459		for (i = 0; i < cnt; ++i)
460			subreq->chm_gpa_page[i] = page_id++;
461
462		vmbus_msghc_exec_noresult(mh);
463	}
464	KASSERT(page_count == 0, ("invalid page count %d", page_count));
465
466	msg = vmbus_msghc_wait_result(sc, mh);
467	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
468	    msg->msg_data)->chm_status;
469
470	vmbus_msghc_put(sc, mh);
471
472	if (status != 0) {
473		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
474		    "status %u\n", chan->ch_id, status);
475		return EIO;
476	} else {
477		if (bootverbose) {
478			device_printf(sc->vmbus_dev, "gpadl->chan%u "
479			    "succeeded\n", chan->ch_id);
480		}
481	}
482	return 0;
483}
484
485/*
486 * Disconnect the GPA from the target channel
487 */
488int
489vmbus_chan_gpadl_disconnect(struct hv_vmbus_channel *chan, uint32_t gpadl)
490{
491	struct vmbus_softc *sc = chan->vmbus_sc;
492	struct vmbus_msghc *mh;
493	struct vmbus_chanmsg_gpadl_disconn *req;
494	int error;
495
496	mh = vmbus_msghc_get(sc, sizeof(*req));
497	if (mh == NULL) {
498		device_printf(sc->vmbus_dev,
499		    "can not get msg hypercall for gpa x->chan%u\n",
500		    chan->ch_id);
501		return EBUSY;
502	}
503
504	req = vmbus_msghc_dataptr(mh);
505	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
506	req->chm_chanid = chan->ch_id;
507	req->chm_gpadl = gpadl;
508
509	error = vmbus_msghc_exec(sc, mh);
510	if (error) {
511		device_printf(sc->vmbus_dev,
512		    "gpa x->chan%u msg hypercall exec failed: %d\n",
513		    chan->ch_id, error);
514		vmbus_msghc_put(sc, mh);
515		return error;
516	}
517
518	vmbus_msghc_wait_result(sc, mh);
519	/* Discard result; no useful information */
520	vmbus_msghc_put(sc, mh);
521
522	return 0;
523}
524
525static void
526vmbus_chan_close_internal(struct hv_vmbus_channel *chan)
527{
528	struct vmbus_softc *sc = chan->vmbus_sc;
529	struct vmbus_msghc *mh;
530	struct vmbus_chanmsg_chclose *req;
531	struct taskqueue *tq = chan->ch_tq;
532	int error;
533
534	/* TODO: stringent check */
535	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
536
537	/*
538	 * Free this channel's sysctl tree attached to its device's
539	 * sysctl tree.
540	 */
541	sysctl_ctx_free(&chan->ch_sysctl_ctx);
542
543	/*
544	 * Set ch_tq to NULL to avoid more requests be scheduled.
545	 * XXX pretty broken; need rework.
546	 */
547	chan->ch_tq = NULL;
548	taskqueue_drain(tq, &chan->ch_task);
549	chan->ch_cb = NULL;
550
551	/*
552	 * Close this channel.
553	 */
554	mh = vmbus_msghc_get(sc, sizeof(*req));
555	if (mh == NULL) {
556		device_printf(sc->vmbus_dev,
557		    "can not get msg hypercall for chclose(chan%u)\n",
558		    chan->ch_id);
559		return;
560	}
561
562	req = vmbus_msghc_dataptr(mh);
563	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
564	req->chm_chanid = chan->ch_id;
565
566	error = vmbus_msghc_exec_noresult(mh);
567	vmbus_msghc_put(sc, mh);
568
569	if (error) {
570		device_printf(sc->vmbus_dev,
571		    "chclose(chan%u) msg hypercall exec failed: %d\n",
572		    chan->ch_id, error);
573		return;
574	} else if (bootverbose) {
575		device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id);
576	}
577
578	/*
579	 * Disconnect the TX+RX bufrings from this channel.
580	 */
581	if (chan->ch_bufring_gpadl) {
582		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
583		chan->ch_bufring_gpadl = 0;
584	}
585
586	/*
587	 * Destroy the TX+RX bufrings.
588	 */
589	hv_ring_buffer_cleanup(&chan->outbound);
590	hv_ring_buffer_cleanup(&chan->inbound);
591	if (chan->ch_bufring != NULL) {
592		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
593		chan->ch_bufring = NULL;
594	}
595}
596
597/*
598 * Caller should make sure that all sub-channels have
599 * been added to 'chan' and all to-be-closed channels
600 * are not being opened.
601 */
602void
603hv_vmbus_channel_close(struct hv_vmbus_channel *chan)
604{
605	int subchan_cnt;
606
607	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
608		/*
609		 * Sub-channel is closed when its primary channel
610		 * is closed; done.
611		 */
612		return;
613	}
614
615	/*
616	 * Close all sub-channels, if any.
617	 */
618	subchan_cnt = chan->ch_subchan_cnt;
619	if (subchan_cnt > 0) {
620		struct hv_vmbus_channel **subchan;
621		int i;
622
623		subchan = vmbus_subchan_get(chan, subchan_cnt);
624		for (i = 0; i < subchan_cnt; ++i)
625			vmbus_chan_close_internal(subchan[i]);
626		vmbus_subchan_rel(subchan, subchan_cnt);
627	}
628
629	/* Then close the primary channel. */
630	vmbus_chan_close_internal(chan);
631}
632
633int
634vmbus_chan_send(struct hv_vmbus_channel *chan, uint16_t type, uint16_t flags,
635    void *data, int dlen, uint64_t xactid)
636{
637	struct vmbus_chanpkt pkt;
638	int pktlen, pad_pktlen, hlen, error;
639	uint64_t pad = 0;
640	struct iovec iov[3];
641	boolean_t send_evt;
642
643	hlen = sizeof(pkt);
644	pktlen = hlen + dlen;
645	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
646
647	pkt.cp_hdr.cph_type = type;
648	pkt.cp_hdr.cph_flags = flags;
649	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
650	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
651	pkt.cp_hdr.cph_xactid = xactid;
652
653	iov[0].iov_base = &pkt;
654	iov[0].iov_len = hlen;
655	iov[1].iov_base = data;
656	iov[1].iov_len = dlen;
657	iov[2].iov_base = &pad;
658	iov[2].iov_len = pad_pktlen - pktlen;
659
660	error = hv_ring_buffer_write(&chan->outbound, iov, 3, &send_evt);
661	if (!error && send_evt)
662		vmbus_chan_send_event(chan);
663	return error;
664}
665
666int
667vmbus_chan_send_sglist(struct hv_vmbus_channel *chan,
668    struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
669{
670	struct vmbus_chanpkt_sglist pkt;
671	int pktlen, pad_pktlen, hlen, error;
672	struct iovec iov[4];
673	boolean_t send_evt;
674	uint64_t pad = 0;
675
676	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
677	    ("invalid sglist len %d", sglen));
678
679	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
680	pktlen = hlen + dlen;
681	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
682
683	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
684	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
685	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
686	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
687	pkt.cp_hdr.cph_xactid = xactid;
688	pkt.cp_rsvd = 0;
689	pkt.cp_gpa_cnt = sglen;
690
691	iov[0].iov_base = &pkt;
692	iov[0].iov_len = sizeof(pkt);
693	iov[1].iov_base = sg;
694	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
695	iov[2].iov_base = data;
696	iov[2].iov_len = dlen;
697	iov[3].iov_base = &pad;
698	iov[3].iov_len = pad_pktlen - pktlen;
699
700	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
701	if (!error && send_evt)
702		vmbus_chan_send_event(chan);
703	return error;
704}
705
706int
707vmbus_chan_send_prplist(struct hv_vmbus_channel *chan,
708    struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
709    uint64_t xactid)
710{
711	struct vmbus_chanpkt_prplist pkt;
712	int pktlen, pad_pktlen, hlen, error;
713	struct iovec iov[4];
714	boolean_t send_evt;
715	uint64_t pad = 0;
716
717	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
718	    ("invalid prplist entry count %d", prp_cnt));
719
720	hlen = __offsetof(struct vmbus_chanpkt_prplist,
721	    cp_range[0].gpa_page[prp_cnt]);
722	pktlen = hlen + dlen;
723	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
724
725	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
726	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
727	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
728	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
729	pkt.cp_hdr.cph_xactid = xactid;
730	pkt.cp_rsvd = 0;
731	pkt.cp_range_cnt = 1;
732
733	iov[0].iov_base = &pkt;
734	iov[0].iov_len = sizeof(pkt);
735	iov[1].iov_base = prp;
736	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
737	iov[2].iov_base = data;
738	iov[2].iov_len = dlen;
739	iov[3].iov_base = &pad;
740	iov[3].iov_len = pad_pktlen - pktlen;
741
742	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
743	if (!error && send_evt)
744		vmbus_chan_send_event(chan);
745	return error;
746}
747
748int
749vmbus_chan_recv(struct hv_vmbus_channel *chan, void *data, int *dlen0,
750    uint64_t *xactid)
751{
752	struct vmbus_chanpkt_hdr pkt;
753	int error, dlen, hlen;
754
755	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
756	if (error)
757		return error;
758
759	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
760	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
761
762	if (*dlen0 < dlen) {
763		/* Return the size of this packet's data. */
764		*dlen0 = dlen;
765		return ENOBUFS;
766	}
767
768	*xactid = pkt.cph_xactid;
769	*dlen0 = dlen;
770
771	/* Skip packet header */
772	error = hv_ring_buffer_read(&chan->inbound, data, dlen, hlen);
773	KASSERT(!error, ("hv_ring_buffer_read failed"));
774
775	return 0;
776}
777
778int
779vmbus_chan_recv_pkt(struct hv_vmbus_channel *chan,
780    struct vmbus_chanpkt_hdr *pkt0, int *pktlen0)
781{
782	struct vmbus_chanpkt_hdr pkt;
783	int error, pktlen;
784
785	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
786	if (error)
787		return error;
788
789	pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen);
790	if (*pktlen0 < pktlen) {
791		/* Return the size of this packet. */
792		*pktlen0 = pktlen;
793		return ENOBUFS;
794	}
795	*pktlen0 = pktlen;
796
797	/* Include packet header */
798	error = hv_ring_buffer_read(&chan->inbound, pkt0, pktlen, 0);
799	KASSERT(!error, ("hv_ring_buffer_read failed"));
800
801	return 0;
802}
803
804static void
805vmbus_chan_task(void *xchan, int pending __unused)
806{
807	struct hv_vmbus_channel *chan = xchan;
808	vmbus_chan_callback_t cb = chan->ch_cb;
809	void *cbarg = chan->ch_cbarg;
810
811	/*
812	 * Optimize host to guest signaling by ensuring:
813	 * 1. While reading the channel, we disable interrupts from
814	 *    host.
815	 * 2. Ensure that we process all posted messages from the host
816	 *    before returning from this callback.
817	 * 3. Once we return, enable signaling from the host. Once this
818	 *    state is set we check to see if additional packets are
819	 *    available to read. In this case we repeat the process.
820	 *
821	 * NOTE: Interrupt has been disabled in the ISR.
822	 */
823	for (;;) {
824		uint32_t left;
825
826		cb(cbarg);
827
828		left = hv_ring_buffer_read_end(&chan->inbound);
829		if (left == 0) {
830			/* No more data in RX bufring; done */
831			break;
832		}
833		hv_ring_buffer_read_begin(&chan->inbound);
834	}
835}
836
837static void
838vmbus_chan_task_nobatch(void *xchan, int pending __unused)
839{
840	struct hv_vmbus_channel *chan = xchan;
841
842	chan->ch_cb(chan->ch_cbarg);
843}
844
845static __inline void
846vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
847    int flag_cnt)
848{
849	int f;
850
851	for (f = 0; f < flag_cnt; ++f) {
852		uint32_t chid_base;
853		u_long flags;
854		int chid_ofs;
855
856		if (event_flags[f] == 0)
857			continue;
858
859		flags = atomic_swap_long(&event_flags[f], 0);
860		chid_base = f << VMBUS_EVTFLAG_SHIFT;
861
862		while ((chid_ofs = ffsl(flags)) != 0) {
863			struct hv_vmbus_channel *channel;
864
865			--chid_ofs; /* NOTE: ffsl is 1-based */
866			flags &= ~(1UL << chid_ofs);
867
868			channel = sc->vmbus_chmap[chid_base + chid_ofs];
869
870			/* if channel is closed or closing */
871			if (channel == NULL || channel->ch_tq == NULL)
872				continue;
873
874			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
875				hv_ring_buffer_read_begin(&channel->inbound);
876			taskqueue_enqueue(channel->ch_tq, &channel->ch_task);
877		}
878	}
879}
880
881void
882vmbus_event_proc(struct vmbus_softc *sc, int cpu)
883{
884	struct vmbus_evtflags *eventf;
885
886	/*
887	 * On Host with Win8 or above, the event page can be checked directly
888	 * to get the id of the channel that has the pending interrupt.
889	 */
890	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
891	vmbus_event_flags_proc(sc, eventf->evt_flags,
892	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
893}
894
895void
896vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
897{
898	struct vmbus_evtflags *eventf;
899
900	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
901	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
902		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
903		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
904	}
905}
906
907static void
908vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
909    const struct hv_vmbus_channel *chan)
910{
911	volatile int *flag_cnt_ptr;
912	int flag_cnt;
913
914	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
915	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
916
917	for (;;) {
918		int old_flag_cnt;
919
920		old_flag_cnt = *flag_cnt_ptr;
921		if (old_flag_cnt >= flag_cnt)
922			break;
923		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
924			if (bootverbose) {
925				device_printf(sc->vmbus_dev,
926				    "channel%u update cpu%d flag_cnt to %d\n",
927				    chan->ch_id, chan->ch_cpuid, flag_cnt);
928			}
929			break;
930		}
931	}
932}
933
934static struct hv_vmbus_channel *
935vmbus_chan_alloc(struct vmbus_softc *sc)
936{
937	struct hv_vmbus_channel *chan;
938
939	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
940
941	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
942	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
943	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
944	if (chan->ch_monprm == NULL) {
945		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
946		free(chan, M_DEVBUF);
947		return NULL;
948	}
949
950	chan->vmbus_sc = sc;
951	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
952	TAILQ_INIT(&chan->ch_subchans);
953	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
954
955	return chan;
956}
957
958static void
959vmbus_chan_free(struct hv_vmbus_channel *chan)
960{
961	/* TODO: assert sub-channel list is empty */
962	/* TODO: asset no longer on the primary channel's sub-channel list */
963	/* TODO: asset no longer on the vmbus channel list */
964	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
965	mtx_destroy(&chan->ch_subchan_lock);
966	free(chan, M_DEVBUF);
967}
968
969static int
970vmbus_chan_add(struct hv_vmbus_channel *newchan)
971{
972	struct vmbus_softc *sc = newchan->vmbus_sc;
973	struct hv_vmbus_channel *prichan;
974
975	if (newchan->ch_id == 0) {
976		/*
977		 * XXX
978		 * Chan0 will neither be processed nor should be offered;
979		 * skip it.
980		 */
981		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
982		return EINVAL;
983	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
984		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
985		    newchan->ch_id);
986		return EINVAL;
987	}
988	sc->vmbus_chmap[newchan->ch_id] = newchan;
989
990	if (bootverbose) {
991		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
992		    newchan->ch_id, newchan->ch_subidx);
993	}
994
995	mtx_lock(&sc->vmbus_prichan_lock);
996	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
997		/*
998		 * Sub-channel will have the same type GUID and instance
999		 * GUID as its primary channel.
1000		 */
1001		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1002		    sizeof(struct hyperv_guid)) == 0 &&
1003		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1004		    sizeof(struct hyperv_guid)) == 0)
1005			break;
1006	}
1007	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1008		if (prichan == NULL) {
1009			/* Install the new primary channel */
1010			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1011			    ch_prilink);
1012			mtx_unlock(&sc->vmbus_prichan_lock);
1013			return 0;
1014		} else {
1015			mtx_unlock(&sc->vmbus_prichan_lock);
1016			device_printf(sc->vmbus_dev, "duplicated primary "
1017			    "chan%u\n", newchan->ch_id);
1018			return EINVAL;
1019		}
1020	} else { /* Sub-channel */
1021		if (prichan == NULL) {
1022			mtx_unlock(&sc->vmbus_prichan_lock);
1023			device_printf(sc->vmbus_dev, "no primary chan for "
1024			    "chan%u\n", newchan->ch_id);
1025			return EINVAL;
1026		}
1027		/*
1028		 * Found the primary channel for this sub-channel and
1029		 * move on.
1030		 *
1031		 * XXX refcnt prichan
1032		 */
1033	}
1034	mtx_unlock(&sc->vmbus_prichan_lock);
1035
1036	/*
1037	 * This is a sub-channel; link it with the primary channel.
1038	 */
1039	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1040	    ("new channel is not sub-channel"));
1041	KASSERT(prichan != NULL, ("no primary channel"));
1042
1043	newchan->ch_prichan = prichan;
1044	newchan->ch_dev = prichan->ch_dev;
1045
1046	mtx_lock(&prichan->ch_subchan_lock);
1047	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1048	/*
1049	 * Bump up sub-channel count and notify anyone that is
1050	 * interested in this sub-channel, after this sub-channel
1051	 * is setup.
1052	 */
1053	prichan->ch_subchan_cnt++;
1054	mtx_unlock(&prichan->ch_subchan_lock);
1055	wakeup(prichan);
1056
1057	return 0;
1058}
1059
1060void
1061vmbus_chan_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1062{
1063	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1064
1065	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1066	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1067		/* Only cpu0 is supported */
1068		cpu = 0;
1069	}
1070
1071	chan->ch_cpuid = cpu;
1072	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1073
1074	if (bootverbose) {
1075		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1076		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
1077	}
1078}
1079
1080void
1081vmbus_chan_cpu_rr(struct hv_vmbus_channel *chan)
1082{
1083	static uint32_t vmbus_chan_nextcpu;
1084	int cpu;
1085
1086	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1087	vmbus_chan_cpu_set(chan, cpu);
1088}
1089
1090static void
1091vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1092{
1093	/*
1094	 * By default, pin the channel to cpu0.  Devices having
1095	 * special channel-cpu mapping requirement should call
1096	 * vmbus_chan_cpu_{set,rr}().
1097	 */
1098	vmbus_chan_cpu_set(chan, 0);
1099}
1100
1101static void
1102vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1103    const struct vmbus_message *msg)
1104{
1105	const struct vmbus_chanmsg_choffer *offer;
1106	struct hv_vmbus_channel *chan;
1107	int error;
1108
1109	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1110
1111	chan = vmbus_chan_alloc(sc);
1112	if (chan == NULL) {
1113		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1114		    offer->chm_chanid);
1115		return;
1116	}
1117
1118	chan->ch_id = offer->chm_chanid;
1119	chan->ch_subidx = offer->chm_subidx;
1120	chan->ch_guid_type = offer->chm_chtype;
1121	chan->ch_guid_inst = offer->chm_chinst;
1122
1123	/* Batch reading is on by default */
1124	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1125
1126	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1127	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1128		chan->ch_monprm->mp_connid = offer->chm_connid;
1129
1130	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1131		/*
1132		 * Setup MNF stuffs.
1133		 */
1134		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1135		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1136		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1137			panic("invalid monitor trigger %u", offer->chm_montrig);
1138		chan->ch_montrig_mask =
1139		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1140	}
1141
1142	/* Select default cpu for this channel. */
1143	vmbus_chan_cpu_default(chan);
1144
1145	error = vmbus_chan_add(chan);
1146	if (error) {
1147		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1148		    chan->ch_id, error);
1149		vmbus_chan_free(chan);
1150		return;
1151	}
1152
1153	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1154		/*
1155		 * Add device for this primary channel.
1156		 *
1157		 * NOTE:
1158		 * Error is ignored here; don't have much to do if error
1159		 * really happens.
1160		 */
1161		vmbus_add_child(chan);
1162	}
1163}
1164
1165/*
1166 * XXX pretty broken; need rework.
1167 */
1168static void
1169vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1170    const struct vmbus_message *msg)
1171{
1172	const struct vmbus_chanmsg_chrescind *note;
1173	struct hv_vmbus_channel *chan;
1174
1175	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1176	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1177		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1178		    note->chm_chanid);
1179		return;
1180	}
1181
1182	if (bootverbose) {
1183		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1184		    note->chm_chanid);
1185	}
1186
1187	chan = sc->vmbus_chmap[note->chm_chanid];
1188	if (chan == NULL)
1189		return;
1190	sc->vmbus_chmap[note->chm_chanid] = NULL;
1191
1192	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1193}
1194
1195static void
1196vmbus_chan_detach_task(void *xchan, int pending __unused)
1197{
1198	struct hv_vmbus_channel *chan = xchan;
1199
1200	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1201		/* Only primary channel owns the device */
1202		vmbus_delete_child(chan);
1203		/* NOTE: DO NOT free primary channel for now */
1204	} else {
1205		struct vmbus_softc *sc = chan->vmbus_sc;
1206		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1207		struct vmbus_chanmsg_chfree *req;
1208		struct vmbus_msghc *mh;
1209		int error;
1210
1211		mh = vmbus_msghc_get(sc, sizeof(*req));
1212		if (mh == NULL) {
1213			device_printf(sc->vmbus_dev,
1214			    "can not get msg hypercall for chfree(chan%u)\n",
1215			    chan->ch_id);
1216			goto remove;
1217		}
1218
1219		req = vmbus_msghc_dataptr(mh);
1220		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1221		req->chm_chanid = chan->ch_id;
1222
1223		error = vmbus_msghc_exec_noresult(mh);
1224		vmbus_msghc_put(sc, mh);
1225
1226		if (error) {
1227			device_printf(sc->vmbus_dev,
1228			    "chfree(chan%u) failed: %d",
1229			    chan->ch_id, error);
1230			/* NOTE: Move on! */
1231		} else {
1232			if (bootverbose) {
1233				device_printf(sc->vmbus_dev, "chan%u freed\n",
1234				    chan->ch_id);
1235			}
1236		}
1237remove:
1238		mtx_lock(&pri_chan->ch_subchan_lock);
1239		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1240		KASSERT(pri_chan->ch_subchan_cnt > 0,
1241		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1242		pri_chan->ch_subchan_cnt--;
1243		mtx_unlock(&pri_chan->ch_subchan_lock);
1244		wakeup(pri_chan);
1245
1246		vmbus_chan_free(chan);
1247	}
1248}
1249
1250/*
1251 * Detach all devices and destroy the corresponding primary channels.
1252 */
1253void
1254vmbus_chan_destroy_all(struct vmbus_softc *sc)
1255{
1256	struct hv_vmbus_channel *chan;
1257
1258	mtx_lock(&sc->vmbus_prichan_lock);
1259	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1260		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1261		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1262		mtx_unlock(&sc->vmbus_prichan_lock);
1263
1264		vmbus_delete_child(chan);
1265		vmbus_chan_free(chan);
1266
1267		mtx_lock(&sc->vmbus_prichan_lock);
1268	}
1269	bzero(sc->vmbus_chmap,
1270	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1271	mtx_unlock(&sc->vmbus_prichan_lock);
1272}
1273
1274/**
1275 * @brief Select the best outgoing channel
1276 *
1277 * The channel whose vcpu binding is closest to the currect vcpu will
1278 * be selected.
1279 * If no multi-channel, always select primary channel
1280 *
1281 * @param primary - primary channel
1282 */
1283struct hv_vmbus_channel *
1284vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1285{
1286	hv_vmbus_channel *new_channel = NULL;
1287	hv_vmbus_channel *outgoing_channel = primary;
1288	int old_cpu_distance = 0;
1289	int new_cpu_distance = 0;
1290	int cur_vcpu = 0;
1291	int smp_pro_id = PCPU_GET(cpuid);
1292
1293	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1294		return outgoing_channel;
1295	}
1296
1297	if (smp_pro_id >= MAXCPU) {
1298		return outgoing_channel;
1299	}
1300
1301	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1302
1303	/* XXX need lock */
1304	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1305		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1306			continue;
1307		}
1308
1309		if (new_channel->ch_vcpuid == cur_vcpu){
1310			return new_channel;
1311		}
1312
1313		old_cpu_distance = ((outgoing_channel->ch_vcpuid > cur_vcpu) ?
1314		    (outgoing_channel->ch_vcpuid - cur_vcpu) :
1315		    (cur_vcpu - outgoing_channel->ch_vcpuid));
1316
1317		new_cpu_distance = ((new_channel->ch_vcpuid > cur_vcpu) ?
1318		    (new_channel->ch_vcpuid - cur_vcpu) :
1319		    (cur_vcpu - new_channel->ch_vcpuid));
1320
1321		if (old_cpu_distance < new_cpu_distance) {
1322			continue;
1323		}
1324
1325		outgoing_channel = new_channel;
1326	}
1327
1328	return(outgoing_channel);
1329}
1330
1331struct hv_vmbus_channel **
1332vmbus_subchan_get(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1333{
1334	struct hv_vmbus_channel **ret, *chan;
1335	int i;
1336
1337	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1338	    M_WAITOK);
1339
1340	mtx_lock(&pri_chan->ch_subchan_lock);
1341
1342	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1343		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1344
1345	i = 0;
1346	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1347		/* TODO: refcnt chan */
1348		ret[i] = chan;
1349
1350		++i;
1351		if (i == subchan_cnt)
1352			break;
1353	}
1354	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1355	    pri_chan->ch_subchan_cnt, subchan_cnt));
1356
1357	mtx_unlock(&pri_chan->ch_subchan_lock);
1358
1359	return ret;
1360}
1361
1362void
1363vmbus_subchan_rel(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1364{
1365
1366	free(subchan, M_TEMP);
1367}
1368
1369void
1370vmbus_subchan_drain(struct hv_vmbus_channel *pri_chan)
1371{
1372	mtx_lock(&pri_chan->ch_subchan_lock);
1373	while (pri_chan->ch_subchan_cnt > 0)
1374		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1375	mtx_unlock(&pri_chan->ch_subchan_lock);
1376}
1377
1378void
1379vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1380{
1381	vmbus_chanmsg_proc_t msg_proc;
1382	uint32_t msg_type;
1383
1384	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1385	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1386	    ("invalid message type %u", msg_type));
1387
1388	msg_proc = vmbus_chan_msgprocs[msg_type];
1389	if (msg_proc != NULL)
1390		msg_proc(sc, msg);
1391}
1392