vmbus_chan.c revision 307460
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/dev/hyperv/vmbus/vmbus_chan.c 307460 2016-10-17 03:07:06Z sephe $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40
41#include <machine/atomic.h>
42#include <machine/bus.h>
43
44#include <vm/vm.h>
45#include <vm/vm_param.h>
46#include <vm/pmap.h>
47
48#include <dev/hyperv/include/hyperv_busdma.h>
49#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50#include <dev/hyperv/vmbus/hyperv_var.h>
51#include <dev/hyperv/vmbus/vmbus_reg.h>
52#include <dev/hyperv/vmbus/vmbus_var.h>
53
54static void 	vmbus_chan_signal_tx(struct hv_vmbus_channel *chan);
55static void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56		    const struct hv_vmbus_channel *);
57
58static void	vmbus_chan_task(void *, int);
59static void	vmbus_chan_task_nobatch(void *, int);
60static void	vmbus_chan_detach_task(void *, int);
61
62static void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63		    const struct vmbus_message *);
64static void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65		    const struct vmbus_message *);
66
67/*
68 * Vmbus channel message processing.
69 */
70static const vmbus_chanmsg_proc_t
71vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74
75	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78};
79
80/**
81 *  @brief Trigger an event notification on the specified channel
82 */
83static void
84vmbus_chan_signal_tx(struct hv_vmbus_channel *chan)
85{
86	struct vmbus_softc *sc = chan->vmbus_sc;
87	uint32_t chanid = chan->ch_id;
88
89	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91
92	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93		atomic_set_int(
94		&sc->vmbus_mnf2->mnf_trigs[chan->ch_montrig_idx].mt_pending,
95		chan->ch_montrig_mask);
96	} else {
97		hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
98	}
99}
100
101static int
102vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
103{
104	struct hv_vmbus_channel *chan = arg1;
105	int mnf = 0;
106
107	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108		mnf = 1;
109	return sysctl_handle_int(oidp, &mnf, 0, req);
110}
111
112static void
113vmbus_chan_sysctl_create(struct hv_vmbus_channel *chan)
114{
115	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
116	struct sysctl_ctx_list *ctx;
117	uint32_t ch_id;
118	char name[16];
119
120	/*
121	 * Add sysctl nodes related to this channel to this
122	 * channel's sysctl ctx, so that they can be destroyed
123	 * independently upon close of this channel, which can
124	 * happen even if the device is not detached.
125	 */
126	ctx = &chan->ch_sysctl_ctx;
127	sysctl_ctx_init(ctx);
128
129	/*
130	 * Create dev.NAME.UNIT.channel tree.
131	 */
132	ch_tree = SYSCTL_ADD_NODE(ctx,
133	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
134	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
135	if (ch_tree == NULL)
136		return;
137
138	/*
139	 * Create dev.NAME.UNIT.channel.CHANID tree.
140	 */
141	if (VMBUS_CHAN_ISPRIMARY(chan))
142		ch_id = chan->ch_id;
143	else
144		ch_id = chan->ch_prichan->ch_id;
145	snprintf(name, sizeof(name), "%d", ch_id);
146	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
147	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
148	if (chid_tree == NULL)
149		return;
150
151	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
152		/*
153		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
154		 */
155		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
156		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
157		if (ch_tree == NULL)
158			return;
159
160		/*
161		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
162		 *
163		 * NOTE:
164		 * chid_tree is changed to this new sysctl tree.
165		 */
166		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
167		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
168		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
169		if (chid_tree == NULL)
170			return;
171
172		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
173		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
174	}
175
176	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
177	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
178	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
179	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
180	    chan, 0, vmbus_chan_sysctl_mnf, "I",
181	    "has monitor notification facilities");
182
183	/*
184	 * Create sysctl tree for RX bufring.
185	 */
186	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
187	    "in", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
188	if (br_tree != NULL) {
189		hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(br_tree),
190		    &chan->inbound, "inbound ring buffer stats");
191	}
192
193	/*
194	 * Create sysctl tree for TX bufring.
195	 */
196	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
197	    "out", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
198	if (br_tree != NULL) {
199		hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(br_tree),
200		    &chan->outbound, "outbound ring buffer stats");
201	}
202}
203
204int
205vmbus_chan_open(struct hv_vmbus_channel *chan, int txbr_size, int rxbr_size,
206    const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
207{
208	struct vmbus_softc *sc = chan->vmbus_sc;
209	const struct vmbus_chanmsg_chopen_resp *resp;
210	const struct vmbus_message *msg;
211	struct vmbus_chanmsg_chopen *req;
212	struct vmbus_msghc *mh;
213	uint32_t status;
214	int error;
215	uint8_t *br;
216
217	if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
218		device_printf(sc->vmbus_dev,
219		    "invalid udata len %d for chan%u\n", udlen, chan->ch_id);
220		return EINVAL;
221	}
222	KASSERT((txbr_size & PAGE_MASK) == 0,
223	    ("send bufring size is not multiple page"));
224	KASSERT((rxbr_size & PAGE_MASK) == 0,
225	    ("recv bufring size is not multiple page"));
226
227	if (atomic_testandset_int(&chan->ch_stflags,
228	    VMBUS_CHAN_ST_OPENED_SHIFT))
229		panic("double-open chan%u", chan->ch_id);
230
231	chan->ch_cb = cb;
232	chan->ch_cbarg = cbarg;
233
234	vmbus_chan_update_evtflagcnt(sc, chan);
235
236	chan->ch_tq = VMBUS_PCPU_GET(chan->vmbus_sc, event_tq, chan->ch_cpuid);
237	if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
238		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task, chan);
239	else
240		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task_nobatch, chan);
241
242	/*
243	 * Allocate the TX+RX bufrings.
244	 * XXX should use ch_dev dtag
245	 */
246	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
247	    PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma,
248	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
249	if (br == NULL) {
250		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
251		error = ENOMEM;
252		goto failed;
253	}
254	chan->ch_bufring = br;
255
256	/* TX bufring comes first */
257	hv_vmbus_ring_buffer_init(&chan->outbound, br, txbr_size);
258	/* RX bufring immediately follows TX bufring */
259	hv_vmbus_ring_buffer_init(&chan->inbound, br + txbr_size, rxbr_size);
260
261	/* Create sysctl tree for this channel */
262	vmbus_chan_sysctl_create(chan);
263
264	/*
265	 * Connect the bufrings, both RX and TX, to this channel.
266	 */
267	error = vmbus_chan_gpadl_connect(chan, chan->ch_bufring_dma.hv_paddr,
268	    txbr_size + rxbr_size, &chan->ch_bufring_gpadl);
269	if (error) {
270		device_printf(sc->vmbus_dev,
271		    "failed to connect bufring GPADL to chan%u\n", chan->ch_id);
272		goto failed;
273	}
274
275	/*
276	 * Open channel w/ the bufring GPADL on the target CPU.
277	 */
278	mh = vmbus_msghc_get(sc, sizeof(*req));
279	if (mh == NULL) {
280		device_printf(sc->vmbus_dev,
281		    "can not get msg hypercall for chopen(chan%u)\n",
282		    chan->ch_id);
283		error = ENXIO;
284		goto failed;
285	}
286
287	req = vmbus_msghc_dataptr(mh);
288	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
289	req->chm_chanid = chan->ch_id;
290	req->chm_openid = chan->ch_id;
291	req->chm_gpadl = chan->ch_bufring_gpadl;
292	req->chm_vcpuid = chan->ch_vcpuid;
293	req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT;
294	if (udlen > 0)
295		memcpy(req->chm_udata, udata, udlen);
296
297	error = vmbus_msghc_exec(sc, mh);
298	if (error) {
299		device_printf(sc->vmbus_dev,
300		    "chopen(chan%u) msg hypercall exec failed: %d\n",
301		    chan->ch_id, error);
302		vmbus_msghc_put(sc, mh);
303		goto failed;
304	}
305
306	msg = vmbus_msghc_wait_result(sc, mh);
307	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
308	status = resp->chm_status;
309
310	vmbus_msghc_put(sc, mh);
311
312	if (status == 0) {
313		if (bootverbose) {
314			device_printf(sc->vmbus_dev, "chan%u opened\n",
315			    chan->ch_id);
316		}
317		return 0;
318	}
319
320	device_printf(sc->vmbus_dev, "failed to open chan%u\n", chan->ch_id);
321	error = ENXIO;
322
323failed:
324	if (chan->ch_bufring_gpadl) {
325		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
326		chan->ch_bufring_gpadl = 0;
327	}
328	if (chan->ch_bufring != NULL) {
329		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
330		chan->ch_bufring = NULL;
331	}
332	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
333	return error;
334}
335
336int
337vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
338    int size, uint32_t *gpadl0)
339{
340	struct vmbus_softc *sc = chan->vmbus_sc;
341	struct vmbus_msghc *mh;
342	struct vmbus_chanmsg_gpadl_conn *req;
343	const struct vmbus_message *msg;
344	size_t reqsz;
345	uint32_t gpadl, status;
346	int page_count, range_len, i, cnt, error;
347	uint64_t page_id;
348
349	/*
350	 * Preliminary checks.
351	 */
352
353	KASSERT((size & PAGE_MASK) == 0,
354	    ("invalid GPA size %d, not multiple page size", size));
355	page_count = size >> PAGE_SHIFT;
356
357	KASSERT((paddr & PAGE_MASK) == 0,
358	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
359	page_id = paddr >> PAGE_SHIFT;
360
361	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
362	/*
363	 * We don't support multiple GPA ranges.
364	 */
365	if (range_len > UINT16_MAX) {
366		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
367		    page_count);
368		return EOPNOTSUPP;
369	}
370
371	/*
372	 * Allocate GPADL id.
373	 */
374	gpadl = vmbus_gpadl_alloc(sc);
375	*gpadl0 = gpadl;
376
377	/*
378	 * Connect this GPADL to the target channel.
379	 *
380	 * NOTE:
381	 * Since each message can only hold small set of page
382	 * addresses, several messages may be required to
383	 * complete the connection.
384	 */
385	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
386		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
387	else
388		cnt = page_count;
389	page_count -= cnt;
390
391	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
392	    chm_range.gpa_page[cnt]);
393	mh = vmbus_msghc_get(sc, reqsz);
394	if (mh == NULL) {
395		device_printf(sc->vmbus_dev,
396		    "can not get msg hypercall for gpadl->chan%u\n",
397		    chan->ch_id);
398		return EIO;
399	}
400
401	req = vmbus_msghc_dataptr(mh);
402	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
403	req->chm_chanid = chan->ch_id;
404	req->chm_gpadl = gpadl;
405	req->chm_range_len = range_len;
406	req->chm_range_cnt = 1;
407	req->chm_range.gpa_len = size;
408	req->chm_range.gpa_ofs = 0;
409	for (i = 0; i < cnt; ++i)
410		req->chm_range.gpa_page[i] = page_id++;
411
412	error = vmbus_msghc_exec(sc, mh);
413	if (error) {
414		device_printf(sc->vmbus_dev,
415		    "gpadl->chan%u msg hypercall exec failed: %d\n",
416		    chan->ch_id, error);
417		vmbus_msghc_put(sc, mh);
418		return error;
419	}
420
421	while (page_count > 0) {
422		struct vmbus_chanmsg_gpadl_subconn *subreq;
423
424		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
425			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
426		else
427			cnt = page_count;
428		page_count -= cnt;
429
430		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
431		    chm_gpa_page[cnt]);
432		vmbus_msghc_reset(mh, reqsz);
433
434		subreq = vmbus_msghc_dataptr(mh);
435		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
436		subreq->chm_gpadl = gpadl;
437		for (i = 0; i < cnt; ++i)
438			subreq->chm_gpa_page[i] = page_id++;
439
440		vmbus_msghc_exec_noresult(mh);
441	}
442	KASSERT(page_count == 0, ("invalid page count %d", page_count));
443
444	msg = vmbus_msghc_wait_result(sc, mh);
445	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
446	    msg->msg_data)->chm_status;
447
448	vmbus_msghc_put(sc, mh);
449
450	if (status != 0) {
451		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
452		    "status %u\n", chan->ch_id, status);
453		return EIO;
454	} else {
455		if (bootverbose) {
456			device_printf(sc->vmbus_dev, "gpadl->chan%u "
457			    "succeeded\n", chan->ch_id);
458		}
459	}
460	return 0;
461}
462
463/*
464 * Disconnect the GPA from the target channel
465 */
466int
467vmbus_chan_gpadl_disconnect(struct hv_vmbus_channel *chan, uint32_t gpadl)
468{
469	struct vmbus_softc *sc = chan->vmbus_sc;
470	struct vmbus_msghc *mh;
471	struct vmbus_chanmsg_gpadl_disconn *req;
472	int error;
473
474	mh = vmbus_msghc_get(sc, sizeof(*req));
475	if (mh == NULL) {
476		device_printf(sc->vmbus_dev,
477		    "can not get msg hypercall for gpa x->chan%u\n",
478		    chan->ch_id);
479		return EBUSY;
480	}
481
482	req = vmbus_msghc_dataptr(mh);
483	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
484	req->chm_chanid = chan->ch_id;
485	req->chm_gpadl = gpadl;
486
487	error = vmbus_msghc_exec(sc, mh);
488	if (error) {
489		device_printf(sc->vmbus_dev,
490		    "gpa x->chan%u msg hypercall exec failed: %d\n",
491		    chan->ch_id, error);
492		vmbus_msghc_put(sc, mh);
493		return error;
494	}
495
496	vmbus_msghc_wait_result(sc, mh);
497	/* Discard result; no useful information */
498	vmbus_msghc_put(sc, mh);
499
500	return 0;
501}
502
503static void
504vmbus_chan_close_internal(struct hv_vmbus_channel *chan)
505{
506	struct vmbus_softc *sc = chan->vmbus_sc;
507	struct vmbus_msghc *mh;
508	struct vmbus_chanmsg_chclose *req;
509	struct taskqueue *tq = chan->ch_tq;
510	int error;
511
512	/* TODO: stringent check */
513	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
514
515	/*
516	 * Free this channel's sysctl tree attached to its device's
517	 * sysctl tree.
518	 */
519	sysctl_ctx_free(&chan->ch_sysctl_ctx);
520
521	/*
522	 * Set ch_tq to NULL to avoid more requests be scheduled.
523	 * XXX pretty broken; need rework.
524	 */
525	chan->ch_tq = NULL;
526	taskqueue_drain(tq, &chan->ch_task);
527	chan->ch_cb = NULL;
528
529	/*
530	 * Close this channel.
531	 */
532	mh = vmbus_msghc_get(sc, sizeof(*req));
533	if (mh == NULL) {
534		device_printf(sc->vmbus_dev,
535		    "can not get msg hypercall for chclose(chan%u)\n",
536		    chan->ch_id);
537		return;
538	}
539
540	req = vmbus_msghc_dataptr(mh);
541	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
542	req->chm_chanid = chan->ch_id;
543
544	error = vmbus_msghc_exec_noresult(mh);
545	vmbus_msghc_put(sc, mh);
546
547	if (error) {
548		device_printf(sc->vmbus_dev,
549		    "chclose(chan%u) msg hypercall exec failed: %d\n",
550		    chan->ch_id, error);
551		return;
552	} else if (bootverbose) {
553		device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id);
554	}
555
556	/*
557	 * Disconnect the TX+RX bufrings from this channel.
558	 */
559	if (chan->ch_bufring_gpadl) {
560		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
561		chan->ch_bufring_gpadl = 0;
562	}
563
564	/*
565	 * Destroy the TX+RX bufrings.
566	 */
567	hv_ring_buffer_cleanup(&chan->outbound);
568	hv_ring_buffer_cleanup(&chan->inbound);
569	if (chan->ch_bufring != NULL) {
570		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
571		chan->ch_bufring = NULL;
572	}
573}
574
575/*
576 * Caller should make sure that all sub-channels have
577 * been added to 'chan' and all to-be-closed channels
578 * are not being opened.
579 */
580void
581vmbus_chan_close(struct hv_vmbus_channel *chan)
582{
583	int subchan_cnt;
584
585	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
586		/*
587		 * Sub-channel is closed when its primary channel
588		 * is closed; done.
589		 */
590		return;
591	}
592
593	/*
594	 * Close all sub-channels, if any.
595	 */
596	subchan_cnt = chan->ch_subchan_cnt;
597	if (subchan_cnt > 0) {
598		struct hv_vmbus_channel **subchan;
599		int i;
600
601		subchan = vmbus_subchan_get(chan, subchan_cnt);
602		for (i = 0; i < subchan_cnt; ++i)
603			vmbus_chan_close_internal(subchan[i]);
604		vmbus_subchan_rel(subchan, subchan_cnt);
605	}
606
607	/* Then close the primary channel. */
608	vmbus_chan_close_internal(chan);
609}
610
611int
612vmbus_chan_send(struct hv_vmbus_channel *chan, uint16_t type, uint16_t flags,
613    void *data, int dlen, uint64_t xactid)
614{
615	struct vmbus_chanpkt pkt;
616	int pktlen, pad_pktlen, hlen, error;
617	uint64_t pad = 0;
618	struct iovec iov[3];
619	boolean_t send_evt;
620
621	hlen = sizeof(pkt);
622	pktlen = hlen + dlen;
623	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
624
625	pkt.cp_hdr.cph_type = type;
626	pkt.cp_hdr.cph_flags = flags;
627	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
628	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
629	pkt.cp_hdr.cph_xactid = xactid;
630
631	iov[0].iov_base = &pkt;
632	iov[0].iov_len = hlen;
633	iov[1].iov_base = data;
634	iov[1].iov_len = dlen;
635	iov[2].iov_base = &pad;
636	iov[2].iov_len = pad_pktlen - pktlen;
637
638	error = hv_ring_buffer_write(&chan->outbound, iov, 3, &send_evt);
639	if (!error && send_evt)
640		vmbus_chan_signal_tx(chan);
641	return error;
642}
643
644int
645vmbus_chan_send_sglist(struct hv_vmbus_channel *chan,
646    struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
647{
648	struct vmbus_chanpkt_sglist pkt;
649	int pktlen, pad_pktlen, hlen, error;
650	struct iovec iov[4];
651	boolean_t send_evt;
652	uint64_t pad = 0;
653
654	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
655	    ("invalid sglist len %d", sglen));
656
657	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
658	pktlen = hlen + dlen;
659	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
660
661	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
662	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
663	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
664	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
665	pkt.cp_hdr.cph_xactid = xactid;
666	pkt.cp_rsvd = 0;
667	pkt.cp_gpa_cnt = sglen;
668
669	iov[0].iov_base = &pkt;
670	iov[0].iov_len = sizeof(pkt);
671	iov[1].iov_base = sg;
672	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
673	iov[2].iov_base = data;
674	iov[2].iov_len = dlen;
675	iov[3].iov_base = &pad;
676	iov[3].iov_len = pad_pktlen - pktlen;
677
678	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
679	if (!error && send_evt)
680		vmbus_chan_signal_tx(chan);
681	return error;
682}
683
684int
685vmbus_chan_send_prplist(struct hv_vmbus_channel *chan,
686    struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
687    uint64_t xactid)
688{
689	struct vmbus_chanpkt_prplist pkt;
690	int pktlen, pad_pktlen, hlen, error;
691	struct iovec iov[4];
692	boolean_t send_evt;
693	uint64_t pad = 0;
694
695	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
696	    ("invalid prplist entry count %d", prp_cnt));
697
698	hlen = __offsetof(struct vmbus_chanpkt_prplist,
699	    cp_range[0].gpa_page[prp_cnt]);
700	pktlen = hlen + dlen;
701	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
702
703	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
704	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
705	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
706	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
707	pkt.cp_hdr.cph_xactid = xactid;
708	pkt.cp_rsvd = 0;
709	pkt.cp_range_cnt = 1;
710
711	iov[0].iov_base = &pkt;
712	iov[0].iov_len = sizeof(pkt);
713	iov[1].iov_base = prp;
714	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
715	iov[2].iov_base = data;
716	iov[2].iov_len = dlen;
717	iov[3].iov_base = &pad;
718	iov[3].iov_len = pad_pktlen - pktlen;
719
720	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
721	if (!error && send_evt)
722		vmbus_chan_signal_tx(chan);
723	return error;
724}
725
726int
727vmbus_chan_recv(struct hv_vmbus_channel *chan, void *data, int *dlen0,
728    uint64_t *xactid)
729{
730	struct vmbus_chanpkt_hdr pkt;
731	int error, dlen, hlen;
732
733	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
734	if (error)
735		return error;
736
737	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
738	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
739
740	if (*dlen0 < dlen) {
741		/* Return the size of this packet's data. */
742		*dlen0 = dlen;
743		return ENOBUFS;
744	}
745
746	*xactid = pkt.cph_xactid;
747	*dlen0 = dlen;
748
749	/* Skip packet header */
750	error = hv_ring_buffer_read(&chan->inbound, data, dlen, hlen);
751	KASSERT(!error, ("hv_ring_buffer_read failed"));
752
753	return 0;
754}
755
756int
757vmbus_chan_recv_pkt(struct hv_vmbus_channel *chan,
758    struct vmbus_chanpkt_hdr *pkt0, int *pktlen0)
759{
760	struct vmbus_chanpkt_hdr pkt;
761	int error, pktlen;
762
763	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
764	if (error)
765		return error;
766
767	pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen);
768	if (*pktlen0 < pktlen) {
769		/* Return the size of this packet. */
770		*pktlen0 = pktlen;
771		return ENOBUFS;
772	}
773	*pktlen0 = pktlen;
774
775	/* Include packet header */
776	error = hv_ring_buffer_read(&chan->inbound, pkt0, pktlen, 0);
777	KASSERT(!error, ("hv_ring_buffer_read failed"));
778
779	return 0;
780}
781
782static void
783vmbus_chan_task(void *xchan, int pending __unused)
784{
785	struct hv_vmbus_channel *chan = xchan;
786	vmbus_chan_callback_t cb = chan->ch_cb;
787	void *cbarg = chan->ch_cbarg;
788
789	/*
790	 * Optimize host to guest signaling by ensuring:
791	 * 1. While reading the channel, we disable interrupts from
792	 *    host.
793	 * 2. Ensure that we process all posted messages from the host
794	 *    before returning from this callback.
795	 * 3. Once we return, enable signaling from the host. Once this
796	 *    state is set we check to see if additional packets are
797	 *    available to read. In this case we repeat the process.
798	 *
799	 * NOTE: Interrupt has been disabled in the ISR.
800	 */
801	for (;;) {
802		uint32_t left;
803
804		cb(cbarg);
805
806		left = hv_ring_buffer_read_end(&chan->inbound);
807		if (left == 0) {
808			/* No more data in RX bufring; done */
809			break;
810		}
811		hv_ring_buffer_read_begin(&chan->inbound);
812	}
813}
814
815static void
816vmbus_chan_task_nobatch(void *xchan, int pending __unused)
817{
818	struct hv_vmbus_channel *chan = xchan;
819
820	chan->ch_cb(chan->ch_cbarg);
821}
822
823static __inline void
824vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
825    int flag_cnt)
826{
827	int f;
828
829	for (f = 0; f < flag_cnt; ++f) {
830		uint32_t chid_base;
831		u_long flags;
832		int chid_ofs;
833
834		if (event_flags[f] == 0)
835			continue;
836
837		flags = atomic_swap_long(&event_flags[f], 0);
838		chid_base = f << VMBUS_EVTFLAG_SHIFT;
839
840		while ((chid_ofs = ffsl(flags)) != 0) {
841			struct hv_vmbus_channel *chan;
842
843			--chid_ofs; /* NOTE: ffsl is 1-based */
844			flags &= ~(1UL << chid_ofs);
845
846			chan = sc->vmbus_chmap[chid_base + chid_ofs];
847
848			/* if channel is closed or closing */
849			if (chan == NULL || chan->ch_tq == NULL)
850				continue;
851
852			if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
853				hv_ring_buffer_read_begin(&chan->inbound);
854			taskqueue_enqueue(chan->ch_tq, &chan->ch_task);
855		}
856	}
857}
858
859void
860vmbus_event_proc(struct vmbus_softc *sc, int cpu)
861{
862	struct vmbus_evtflags *eventf;
863
864	/*
865	 * On Host with Win8 or above, the event page can be checked directly
866	 * to get the id of the channel that has the pending interrupt.
867	 */
868	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
869	vmbus_event_flags_proc(sc, eventf->evt_flags,
870	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
871}
872
873void
874vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
875{
876	struct vmbus_evtflags *eventf;
877
878	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
879	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
880		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
881		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
882	}
883}
884
885static void
886vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
887    const struct hv_vmbus_channel *chan)
888{
889	volatile int *flag_cnt_ptr;
890	int flag_cnt;
891
892	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
893	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
894
895	for (;;) {
896		int old_flag_cnt;
897
898		old_flag_cnt = *flag_cnt_ptr;
899		if (old_flag_cnt >= flag_cnt)
900			break;
901		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
902			if (bootverbose) {
903				device_printf(sc->vmbus_dev,
904				    "channel%u update cpu%d flag_cnt to %d\n",
905				    chan->ch_id, chan->ch_cpuid, flag_cnt);
906			}
907			break;
908		}
909	}
910}
911
912static struct hv_vmbus_channel *
913vmbus_chan_alloc(struct vmbus_softc *sc)
914{
915	struct hv_vmbus_channel *chan;
916
917	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
918
919	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
920	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
921	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
922	if (chan->ch_monprm == NULL) {
923		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
924		free(chan, M_DEVBUF);
925		return NULL;
926	}
927
928	chan->vmbus_sc = sc;
929	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
930	TAILQ_INIT(&chan->ch_subchans);
931	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
932
933	return chan;
934}
935
936static void
937vmbus_chan_free(struct hv_vmbus_channel *chan)
938{
939	/* TODO: assert sub-channel list is empty */
940	/* TODO: asset no longer on the primary channel's sub-channel list */
941	/* TODO: asset no longer on the vmbus channel list */
942	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
943	mtx_destroy(&chan->ch_subchan_lock);
944	free(chan, M_DEVBUF);
945}
946
947static int
948vmbus_chan_add(struct hv_vmbus_channel *newchan)
949{
950	struct vmbus_softc *sc = newchan->vmbus_sc;
951	struct hv_vmbus_channel *prichan;
952
953	if (newchan->ch_id == 0) {
954		/*
955		 * XXX
956		 * Chan0 will neither be processed nor should be offered;
957		 * skip it.
958		 */
959		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
960		return EINVAL;
961	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
962		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
963		    newchan->ch_id);
964		return EINVAL;
965	}
966	sc->vmbus_chmap[newchan->ch_id] = newchan;
967
968	if (bootverbose) {
969		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
970		    newchan->ch_id, newchan->ch_subidx);
971	}
972
973	mtx_lock(&sc->vmbus_prichan_lock);
974	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
975		/*
976		 * Sub-channel will have the same type GUID and instance
977		 * GUID as its primary channel.
978		 */
979		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
980		    sizeof(struct hyperv_guid)) == 0 &&
981		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
982		    sizeof(struct hyperv_guid)) == 0)
983			break;
984	}
985	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
986		if (prichan == NULL) {
987			/* Install the new primary channel */
988			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
989			    ch_prilink);
990			mtx_unlock(&sc->vmbus_prichan_lock);
991			return 0;
992		} else {
993			mtx_unlock(&sc->vmbus_prichan_lock);
994			device_printf(sc->vmbus_dev, "duplicated primary "
995			    "chan%u\n", newchan->ch_id);
996			return EINVAL;
997		}
998	} else { /* Sub-channel */
999		if (prichan == NULL) {
1000			mtx_unlock(&sc->vmbus_prichan_lock);
1001			device_printf(sc->vmbus_dev, "no primary chan for "
1002			    "chan%u\n", newchan->ch_id);
1003			return EINVAL;
1004		}
1005		/*
1006		 * Found the primary channel for this sub-channel and
1007		 * move on.
1008		 *
1009		 * XXX refcnt prichan
1010		 */
1011	}
1012	mtx_unlock(&sc->vmbus_prichan_lock);
1013
1014	/*
1015	 * This is a sub-channel; link it with the primary channel.
1016	 */
1017	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1018	    ("new channel is not sub-channel"));
1019	KASSERT(prichan != NULL, ("no primary channel"));
1020
1021	newchan->ch_prichan = prichan;
1022	newchan->ch_dev = prichan->ch_dev;
1023
1024	mtx_lock(&prichan->ch_subchan_lock);
1025	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1026	/*
1027	 * Bump up sub-channel count and notify anyone that is
1028	 * interested in this sub-channel, after this sub-channel
1029	 * is setup.
1030	 */
1031	prichan->ch_subchan_cnt++;
1032	mtx_unlock(&prichan->ch_subchan_lock);
1033	wakeup(prichan);
1034
1035	return 0;
1036}
1037
1038void
1039vmbus_chan_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1040{
1041	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1042
1043	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1044	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1045		/* Only cpu0 is supported */
1046		cpu = 0;
1047	}
1048
1049	chan->ch_cpuid = cpu;
1050	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1051
1052	if (bootverbose) {
1053		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1054		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
1055	}
1056}
1057
1058void
1059vmbus_chan_cpu_rr(struct hv_vmbus_channel *chan)
1060{
1061	static uint32_t vmbus_chan_nextcpu;
1062	int cpu;
1063
1064	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1065	vmbus_chan_cpu_set(chan, cpu);
1066}
1067
1068static void
1069vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1070{
1071	/*
1072	 * By default, pin the channel to cpu0.  Devices having
1073	 * special channel-cpu mapping requirement should call
1074	 * vmbus_chan_cpu_{set,rr}().
1075	 */
1076	vmbus_chan_cpu_set(chan, 0);
1077}
1078
1079static void
1080vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1081    const struct vmbus_message *msg)
1082{
1083	const struct vmbus_chanmsg_choffer *offer;
1084	struct hv_vmbus_channel *chan;
1085	int error;
1086
1087	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1088
1089	chan = vmbus_chan_alloc(sc);
1090	if (chan == NULL) {
1091		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1092		    offer->chm_chanid);
1093		return;
1094	}
1095
1096	chan->ch_id = offer->chm_chanid;
1097	chan->ch_subidx = offer->chm_subidx;
1098	chan->ch_guid_type = offer->chm_chtype;
1099	chan->ch_guid_inst = offer->chm_chinst;
1100
1101	/* Batch reading is on by default */
1102	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1103
1104	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1105	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1106		chan->ch_monprm->mp_connid = offer->chm_connid;
1107
1108	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1109		/*
1110		 * Setup MNF stuffs.
1111		 */
1112		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1113		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1114		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1115			panic("invalid monitor trigger %u", offer->chm_montrig);
1116		chan->ch_montrig_mask =
1117		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1118	}
1119
1120	/* Select default cpu for this channel. */
1121	vmbus_chan_cpu_default(chan);
1122
1123	error = vmbus_chan_add(chan);
1124	if (error) {
1125		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1126		    chan->ch_id, error);
1127		vmbus_chan_free(chan);
1128		return;
1129	}
1130
1131	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1132		/*
1133		 * Add device for this primary channel.
1134		 *
1135		 * NOTE:
1136		 * Error is ignored here; don't have much to do if error
1137		 * really happens.
1138		 */
1139		vmbus_add_child(chan);
1140	}
1141}
1142
1143/*
1144 * XXX pretty broken; need rework.
1145 */
1146static void
1147vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1148    const struct vmbus_message *msg)
1149{
1150	const struct vmbus_chanmsg_chrescind *note;
1151	struct hv_vmbus_channel *chan;
1152
1153	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1154	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1155		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1156		    note->chm_chanid);
1157		return;
1158	}
1159
1160	if (bootverbose) {
1161		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1162		    note->chm_chanid);
1163	}
1164
1165	chan = sc->vmbus_chmap[note->chm_chanid];
1166	if (chan == NULL)
1167		return;
1168	sc->vmbus_chmap[note->chm_chanid] = NULL;
1169
1170	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1171}
1172
1173static void
1174vmbus_chan_detach_task(void *xchan, int pending __unused)
1175{
1176	struct hv_vmbus_channel *chan = xchan;
1177
1178	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1179		/* Only primary channel owns the device */
1180		vmbus_delete_child(chan);
1181		/* NOTE: DO NOT free primary channel for now */
1182	} else {
1183		struct vmbus_softc *sc = chan->vmbus_sc;
1184		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1185		struct vmbus_chanmsg_chfree *req;
1186		struct vmbus_msghc *mh;
1187		int error;
1188
1189		mh = vmbus_msghc_get(sc, sizeof(*req));
1190		if (mh == NULL) {
1191			device_printf(sc->vmbus_dev,
1192			    "can not get msg hypercall for chfree(chan%u)\n",
1193			    chan->ch_id);
1194			goto remove;
1195		}
1196
1197		req = vmbus_msghc_dataptr(mh);
1198		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1199		req->chm_chanid = chan->ch_id;
1200
1201		error = vmbus_msghc_exec_noresult(mh);
1202		vmbus_msghc_put(sc, mh);
1203
1204		if (error) {
1205			device_printf(sc->vmbus_dev,
1206			    "chfree(chan%u) failed: %d",
1207			    chan->ch_id, error);
1208			/* NOTE: Move on! */
1209		} else {
1210			if (bootverbose) {
1211				device_printf(sc->vmbus_dev, "chan%u freed\n",
1212				    chan->ch_id);
1213			}
1214		}
1215remove:
1216		mtx_lock(&pri_chan->ch_subchan_lock);
1217		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1218		KASSERT(pri_chan->ch_subchan_cnt > 0,
1219		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1220		pri_chan->ch_subchan_cnt--;
1221		mtx_unlock(&pri_chan->ch_subchan_lock);
1222		wakeup(pri_chan);
1223
1224		vmbus_chan_free(chan);
1225	}
1226}
1227
1228/*
1229 * Detach all devices and destroy the corresponding primary channels.
1230 */
1231void
1232vmbus_chan_destroy_all(struct vmbus_softc *sc)
1233{
1234	struct hv_vmbus_channel *chan;
1235
1236	mtx_lock(&sc->vmbus_prichan_lock);
1237	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1238		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1239		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1240		mtx_unlock(&sc->vmbus_prichan_lock);
1241
1242		vmbus_delete_child(chan);
1243		vmbus_chan_free(chan);
1244
1245		mtx_lock(&sc->vmbus_prichan_lock);
1246	}
1247	bzero(sc->vmbus_chmap,
1248	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1249	mtx_unlock(&sc->vmbus_prichan_lock);
1250}
1251
1252/*
1253 * The channel whose vcpu binding is closest to the currect vcpu will
1254 * be selected.
1255 * If no multi-channel, always select primary channel.
1256 */
1257struct hv_vmbus_channel *
1258vmbus_chan_cpu2chan(struct hv_vmbus_channel *prichan, int cpu)
1259{
1260	struct hv_vmbus_channel *sel, *chan;
1261	uint32_t vcpu, sel_dist;
1262
1263	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpuid %d", cpu));
1264	if (TAILQ_EMPTY(&prichan->ch_subchans))
1265		return prichan;
1266
1267	vcpu = VMBUS_PCPU_GET(prichan->vmbus_sc, vcpuid, cpu);
1268
1269#define CHAN_VCPU_DIST(ch, vcpu)		\
1270	(((ch)->ch_vcpuid > (vcpu)) ?		\
1271	 ((ch)->ch_vcpuid - (vcpu)) : ((vcpu) - (ch)->ch_vcpuid))
1272
1273#define CHAN_SELECT(ch)				\
1274do {						\
1275	sel = ch;				\
1276	sel_dist = CHAN_VCPU_DIST(ch, vcpu);	\
1277} while (0)
1278
1279	CHAN_SELECT(prichan);
1280
1281	mtx_lock(&prichan->ch_subchan_lock);
1282	TAILQ_FOREACH(chan, &prichan->ch_subchans, ch_sublink) {
1283		uint32_t dist;
1284
1285		KASSERT(chan->ch_stflags & VMBUS_CHAN_ST_OPENED,
1286		    ("chan%u is not opened", chan->ch_id));
1287
1288		if (chan->ch_vcpuid == vcpu) {
1289			/* Exact match; done */
1290			CHAN_SELECT(chan);
1291			break;
1292		}
1293
1294		dist = CHAN_VCPU_DIST(chan, vcpu);
1295		if (sel_dist <= dist) {
1296			/* Far or same distance; skip */
1297			continue;
1298		}
1299
1300		/* Select the closer channel. */
1301		CHAN_SELECT(chan);
1302	}
1303	mtx_unlock(&prichan->ch_subchan_lock);
1304
1305#undef CHAN_SELECT
1306#undef CHAN_VCPU_DIST
1307
1308	return sel;
1309}
1310
1311struct hv_vmbus_channel **
1312vmbus_subchan_get(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1313{
1314	struct hv_vmbus_channel **ret, *chan;
1315	int i;
1316
1317	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1318	    M_WAITOK);
1319
1320	mtx_lock(&pri_chan->ch_subchan_lock);
1321
1322	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1323		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1324
1325	i = 0;
1326	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1327		/* TODO: refcnt chan */
1328		ret[i] = chan;
1329
1330		++i;
1331		if (i == subchan_cnt)
1332			break;
1333	}
1334	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1335	    pri_chan->ch_subchan_cnt, subchan_cnt));
1336
1337	mtx_unlock(&pri_chan->ch_subchan_lock);
1338
1339	return ret;
1340}
1341
1342void
1343vmbus_subchan_rel(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1344{
1345
1346	free(subchan, M_TEMP);
1347}
1348
1349void
1350vmbus_subchan_drain(struct hv_vmbus_channel *pri_chan)
1351{
1352	mtx_lock(&pri_chan->ch_subchan_lock);
1353	while (pri_chan->ch_subchan_cnt > 0)
1354		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1355	mtx_unlock(&pri_chan->ch_subchan_lock);
1356}
1357
1358void
1359vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1360{
1361	vmbus_chanmsg_proc_t msg_proc;
1362	uint32_t msg_type;
1363
1364	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1365	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1366	    ("invalid message type %u", msg_type));
1367
1368	msg_proc = vmbus_chan_msgprocs[msg_type];
1369	if (msg_proc != NULL)
1370		msg_proc(sc, msg);
1371}
1372
1373void
1374vmbus_chan_set_readbatch(struct hv_vmbus_channel *chan, bool on)
1375{
1376	if (!on)
1377		chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD;
1378	else
1379		chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1380}
1381