vmbus_chan.c revision 302885
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302885 2016-07-15 07:45:30Z sephe $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40
41#include <machine/atomic.h>
42#include <machine/bus.h>
43
44#include <vm/vm.h>
45#include <vm/vm_param.h>
46#include <vm/pmap.h>
47
48#include <dev/hyperv/include/hyperv_busdma.h>
49#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50#include <dev/hyperv/vmbus/hyperv_var.h>
51#include <dev/hyperv/vmbus/vmbus_reg.h>
52#include <dev/hyperv/vmbus/vmbus_var.h>
53
54static void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
55static void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56		    const struct hv_vmbus_channel *);
57
58static void	vmbus_chan_task(void *, int);
59static void	vmbus_chan_task_nobatch(void *, int);
60static void	vmbus_chan_detach_task(void *, int);
61
62static void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63		    const struct vmbus_message *);
64static void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65		    const struct vmbus_message *);
66
67/*
68 * Vmbus channel message processing.
69 */
70static const vmbus_chanmsg_proc_t
71vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74
75	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78};
79
80/**
81 *  @brief Trigger an event notification on the specified channel
82 */
83static void
84vmbus_chan_send_event(hv_vmbus_channel *channel)
85{
86	struct vmbus_softc *sc = channel->vmbus_sc;
87	uint32_t chanid = channel->ch_id;
88
89	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91
92	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93		atomic_set_int(
94		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
95		channel->ch_montrig_mask);
96	} else {
97		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
98	}
99}
100
101static int
102vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
103{
104	struct hv_vmbus_channel *chan = arg1;
105	int alloc = 0;
106
107	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108		alloc = 1;
109	return sysctl_handle_int(oidp, &alloc, 0, req);
110}
111
112static void
113vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
114{
115	device_t dev;
116	struct sysctl_oid *devch_sysctl;
117	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
118	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
119	struct sysctl_ctx_list *ctx;
120	uint32_t ch_id;
121	uint16_t sub_ch_id;
122	char name[16];
123
124	hv_vmbus_channel* primary_ch = channel->ch_prichan;
125
126	if (primary_ch == NULL) {
127		dev = channel->ch_dev;
128		ch_id = channel->ch_id;
129	} else {
130		dev = primary_ch->ch_dev;
131		ch_id = primary_ch->ch_id;
132		sub_ch_id = channel->ch_subidx;
133	}
134	ctx = &channel->ch_sysctl_ctx;
135	sysctl_ctx_init(ctx);
136	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
137	devch_sysctl = SYSCTL_ADD_NODE(ctx,
138		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
139		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
140	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
141	snprintf(name, sizeof(name), "%d", ch_id);
142	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
143	    	    SYSCTL_CHILDREN(devch_sysctl),
144	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
145
146	if (primary_ch != NULL) {
147		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
148			SYSCTL_CHILDREN(devch_id_sysctl),
149			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
150		snprintf(name, sizeof(name), "%d", sub_ch_id);
151		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
152			SYSCTL_CHILDREN(devch_sub_sysctl),
153			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
154
155		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
156		    OID_AUTO, "chanid", CTLFLAG_RD,
157		    &channel->ch_id, 0, "channel id");
158	}
159	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
160	    "cpu", CTLFLAG_RD, &channel->ch_cpuid, 0, "owner CPU id");
161	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
162	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
163	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
164	    "is monitor allocated to this channel");
165
166	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
167                    SYSCTL_CHILDREN(devch_id_sysctl),
168                    OID_AUTO,
169		    "in",
170		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
171	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
172                    SYSCTL_CHILDREN(devch_id_sysctl),
173                    OID_AUTO,
174		    "out",
175		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
176	hv_ring_buffer_stat(ctx,
177		SYSCTL_CHILDREN(devch_id_in_sysctl),
178		&(channel->inbound),
179		"inbound ring buffer stats");
180	hv_ring_buffer_stat(ctx,
181		SYSCTL_CHILDREN(devch_id_out_sysctl),
182		&(channel->outbound),
183		"outbound ring buffer stats");
184}
185
186/**
187 * @brief Open the specified channel
188 */
189int
190hv_vmbus_channel_open(
191	hv_vmbus_channel*		new_channel,
192	uint32_t			send_ring_buffer_size,
193	uint32_t			recv_ring_buffer_size,
194	void*				user_data,
195	uint32_t			user_data_len,
196	vmbus_chan_callback_t		cb,
197	void				*cbarg)
198{
199	struct vmbus_softc *sc = new_channel->vmbus_sc;
200	const struct vmbus_chanmsg_chopen_resp *resp;
201	const struct vmbus_message *msg;
202	struct vmbus_chanmsg_chopen *req;
203	struct vmbus_msghc *mh;
204	uint32_t status;
205	int ret = 0;
206	uint8_t *br;
207
208	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
209		device_printf(sc->vmbus_dev,
210		    "invalid udata len %u for chan%u\n",
211		    user_data_len, new_channel->ch_id);
212		return EINVAL;
213	}
214	KASSERT((send_ring_buffer_size & PAGE_MASK) == 0,
215	    ("send bufring size is not multiple page"));
216	KASSERT((recv_ring_buffer_size & PAGE_MASK) == 0,
217	    ("recv bufring size is not multiple page"));
218
219	if (atomic_testandset_int(&new_channel->ch_stflags,
220	    VMBUS_CHAN_ST_OPENED_SHIFT))
221		panic("double-open chan%u", new_channel->ch_id);
222
223	new_channel->ch_cb = cb;
224	new_channel->ch_cbarg = cbarg;
225
226	vmbus_chan_update_evtflagcnt(sc, new_channel);
227
228	new_channel->ch_tq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
229	    new_channel->ch_cpuid);
230	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
231		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task,
232		    new_channel);
233	} else {
234		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task_nobatch,
235		    new_channel);
236	}
237
238	/*
239	 * Allocate the TX+RX bufrings.
240	 * XXX should use ch_dev dtag
241	 */
242	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
243	    PAGE_SIZE, 0, send_ring_buffer_size + recv_ring_buffer_size,
244	    &new_channel->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
245	if (br == NULL) {
246		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
247		ret = ENOMEM;
248		goto failed;
249	}
250	new_channel->ch_bufring = br;
251
252	/* TX bufring comes first */
253	hv_vmbus_ring_buffer_init(&new_channel->outbound,
254	    br, send_ring_buffer_size);
255	/* RX bufring immediately follows TX bufring */
256	hv_vmbus_ring_buffer_init(&new_channel->inbound,
257	    br + send_ring_buffer_size, recv_ring_buffer_size);
258
259	/* Create sysctl tree for this channel */
260	vmbus_channel_sysctl_create(new_channel);
261
262	/*
263	 * Connect the bufrings, both RX and TX, to this channel.
264	 */
265	ret = vmbus_chan_gpadl_connect(new_channel,
266		new_channel->ch_bufring_dma.hv_paddr,
267		send_ring_buffer_size + recv_ring_buffer_size,
268		&new_channel->ch_bufring_gpadl);
269	if (ret != 0) {
270		device_printf(sc->vmbus_dev,
271		    "failed to connect bufring GPADL to chan%u\n",
272		    new_channel->ch_id);
273		goto failed;
274	}
275
276	/*
277	 * Open channel w/ the bufring GPADL on the target CPU.
278	 */
279	mh = vmbus_msghc_get(sc, sizeof(*req));
280	if (mh == NULL) {
281		device_printf(sc->vmbus_dev,
282		    "can not get msg hypercall for chopen(chan%u)\n",
283		    new_channel->ch_id);
284		ret = ENXIO;
285		goto failed;
286	}
287
288	req = vmbus_msghc_dataptr(mh);
289	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
290	req->chm_chanid = new_channel->ch_id;
291	req->chm_openid = new_channel->ch_id;
292	req->chm_gpadl = new_channel->ch_bufring_gpadl;
293	req->chm_vcpuid = new_channel->ch_vcpuid;
294	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
295	if (user_data_len)
296		memcpy(req->chm_udata, user_data, user_data_len);
297
298	ret = vmbus_msghc_exec(sc, mh);
299	if (ret != 0) {
300		device_printf(sc->vmbus_dev,
301		    "chopen(chan%u) msg hypercall exec failed: %d\n",
302		    new_channel->ch_id, ret);
303		vmbus_msghc_put(sc, mh);
304		goto failed;
305	}
306
307	msg = vmbus_msghc_wait_result(sc, mh);
308	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
309	status = resp->chm_status;
310
311	vmbus_msghc_put(sc, mh);
312
313	if (status == 0) {
314		if (bootverbose) {
315			device_printf(sc->vmbus_dev, "chan%u opened\n",
316			    new_channel->ch_id);
317		}
318		return 0;
319	}
320
321	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
322	    new_channel->ch_id);
323	ret = ENXIO;
324
325failed:
326	if (new_channel->ch_bufring_gpadl) {
327		hv_vmbus_channel_teardown_gpdal(new_channel,
328		    new_channel->ch_bufring_gpadl);
329		new_channel->ch_bufring_gpadl = 0;
330	}
331	if (new_channel->ch_bufring != NULL) {
332		hyperv_dmamem_free(&new_channel->ch_bufring_dma,
333		    new_channel->ch_bufring);
334		new_channel->ch_bufring = NULL;
335	}
336	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
337	return ret;
338}
339
340/**
341 * @brief Establish a GPADL for the specified buffer
342 */
343int
344hv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
345    void *contig_buffer, uint32_t size, uint32_t *gpadl)
346{
347	return vmbus_chan_gpadl_connect(channel,
348	    hv_get_phys_addr(contig_buffer), size, gpadl);
349}
350
351int
352vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
353    int size, uint32_t *gpadl0)
354{
355	struct vmbus_softc *sc = chan->vmbus_sc;
356	struct vmbus_msghc *mh;
357	struct vmbus_chanmsg_gpadl_conn *req;
358	const struct vmbus_message *msg;
359	size_t reqsz;
360	uint32_t gpadl, status;
361	int page_count, range_len, i, cnt, error;
362	uint64_t page_id;
363
364	/*
365	 * Preliminary checks.
366	 */
367
368	KASSERT((size & PAGE_MASK) == 0,
369	    ("invalid GPA size %d, not multiple page size", size));
370	page_count = size >> PAGE_SHIFT;
371
372	KASSERT((paddr & PAGE_MASK) == 0,
373	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
374	page_id = paddr >> PAGE_SHIFT;
375
376	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
377	/*
378	 * We don't support multiple GPA ranges.
379	 */
380	if (range_len > UINT16_MAX) {
381		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
382		    page_count);
383		return EOPNOTSUPP;
384	}
385
386	/*
387	 * Allocate GPADL id.
388	 */
389	gpadl = vmbus_gpadl_alloc(sc);
390	*gpadl0 = gpadl;
391
392	/*
393	 * Connect this GPADL to the target channel.
394	 *
395	 * NOTE:
396	 * Since each message can only hold small set of page
397	 * addresses, several messages may be required to
398	 * complete the connection.
399	 */
400	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
401		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
402	else
403		cnt = page_count;
404	page_count -= cnt;
405
406	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
407	    chm_range.gpa_page[cnt]);
408	mh = vmbus_msghc_get(sc, reqsz);
409	if (mh == NULL) {
410		device_printf(sc->vmbus_dev,
411		    "can not get msg hypercall for gpadl->chan%u\n",
412		    chan->ch_id);
413		return EIO;
414	}
415
416	req = vmbus_msghc_dataptr(mh);
417	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
418	req->chm_chanid = chan->ch_id;
419	req->chm_gpadl = gpadl;
420	req->chm_range_len = range_len;
421	req->chm_range_cnt = 1;
422	req->chm_range.gpa_len = size;
423	req->chm_range.gpa_ofs = 0;
424	for (i = 0; i < cnt; ++i)
425		req->chm_range.gpa_page[i] = page_id++;
426
427	error = vmbus_msghc_exec(sc, mh);
428	if (error) {
429		device_printf(sc->vmbus_dev,
430		    "gpadl->chan%u msg hypercall exec failed: %d\n",
431		    chan->ch_id, error);
432		vmbus_msghc_put(sc, mh);
433		return error;
434	}
435
436	while (page_count > 0) {
437		struct vmbus_chanmsg_gpadl_subconn *subreq;
438
439		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
440			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
441		else
442			cnt = page_count;
443		page_count -= cnt;
444
445		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
446		    chm_gpa_page[cnt]);
447		vmbus_msghc_reset(mh, reqsz);
448
449		subreq = vmbus_msghc_dataptr(mh);
450		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
451		subreq->chm_gpadl = gpadl;
452		for (i = 0; i < cnt; ++i)
453			subreq->chm_gpa_page[i] = page_id++;
454
455		vmbus_msghc_exec_noresult(mh);
456	}
457	KASSERT(page_count == 0, ("invalid page count %d", page_count));
458
459	msg = vmbus_msghc_wait_result(sc, mh);
460	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
461	    msg->msg_data)->chm_status;
462
463	vmbus_msghc_put(sc, mh);
464
465	if (status != 0) {
466		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
467		    "status %u\n", chan->ch_id, status);
468		return EIO;
469	} else {
470		if (bootverbose) {
471			device_printf(sc->vmbus_dev, "gpadl->chan%u "
472			    "succeeded\n", chan->ch_id);
473		}
474	}
475	return 0;
476}
477
478/*
479 * Disconnect the GPA from the target channel
480 */
481int
482hv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
483{
484	struct vmbus_softc *sc = chan->vmbus_sc;
485	struct vmbus_msghc *mh;
486	struct vmbus_chanmsg_gpadl_disconn *req;
487	int error;
488
489	mh = vmbus_msghc_get(sc, sizeof(*req));
490	if (mh == NULL) {
491		device_printf(sc->vmbus_dev,
492		    "can not get msg hypercall for gpa x->chan%u\n",
493		    chan->ch_id);
494		return EBUSY;
495	}
496
497	req = vmbus_msghc_dataptr(mh);
498	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
499	req->chm_chanid = chan->ch_id;
500	req->chm_gpadl = gpadl;
501
502	error = vmbus_msghc_exec(sc, mh);
503	if (error) {
504		device_printf(sc->vmbus_dev,
505		    "gpa x->chan%u msg hypercall exec failed: %d\n",
506		    chan->ch_id, error);
507		vmbus_msghc_put(sc, mh);
508		return error;
509	}
510
511	vmbus_msghc_wait_result(sc, mh);
512	/* Discard result; no useful information */
513	vmbus_msghc_put(sc, mh);
514
515	return 0;
516}
517
518static void
519hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
520{
521	struct vmbus_softc *sc = channel->vmbus_sc;
522	struct vmbus_msghc *mh;
523	struct vmbus_chanmsg_chclose *req;
524	struct taskqueue *tq = channel->ch_tq;
525	int error;
526
527	/* TODO: stringent check */
528	atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
529
530	sysctl_ctx_free(&channel->ch_sysctl_ctx);
531
532	/*
533	 * Set ch_tq to NULL to avoid more requests be scheduled
534	 */
535	channel->ch_tq = NULL;
536	taskqueue_drain(tq, &channel->ch_task);
537	channel->ch_cb = NULL;
538
539	/**
540	 * Send a closing message
541	 */
542
543	mh = vmbus_msghc_get(sc, sizeof(*req));
544	if (mh == NULL) {
545		device_printf(sc->vmbus_dev,
546		    "can not get msg hypercall for chclose(chan%u)\n",
547		    channel->ch_id);
548		return;
549	}
550
551	req = vmbus_msghc_dataptr(mh);
552	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
553	req->chm_chanid = channel->ch_id;
554
555	error = vmbus_msghc_exec_noresult(mh);
556	vmbus_msghc_put(sc, mh);
557
558	if (error) {
559		device_printf(sc->vmbus_dev,
560		    "chclose(chan%u) msg hypercall exec failed: %d\n",
561		    channel->ch_id, error);
562		return;
563	} else if (bootverbose) {
564		device_printf(sc->vmbus_dev, "close chan%u\n",
565		    channel->ch_id);
566	}
567
568	/* Tear down the gpadl for the channel's ring buffer */
569	if (channel->ch_bufring_gpadl) {
570		hv_vmbus_channel_teardown_gpdal(channel,
571		    channel->ch_bufring_gpadl);
572		channel->ch_bufring_gpadl = 0;
573	}
574
575	/* TODO: Send a msg to release the childRelId */
576
577	/* cleanup the ring buffers for this channel */
578	hv_ring_buffer_cleanup(&channel->outbound);
579	hv_ring_buffer_cleanup(&channel->inbound);
580
581	if (channel->ch_bufring != NULL) {
582		hyperv_dmamem_free(&channel->ch_bufring_dma,
583		    channel->ch_bufring);
584		channel->ch_bufring = NULL;
585	}
586}
587
588/*
589 * Caller should make sure that all sub-channels have
590 * been added to 'chan' and all to-be-closed channels
591 * are not being opened.
592 */
593void
594hv_vmbus_channel_close(struct hv_vmbus_channel *chan)
595{
596	int subchan_cnt;
597
598	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
599		/*
600		 * Sub-channel is closed when its primary channel
601		 * is closed; done.
602		 */
603		return;
604	}
605
606	/*
607	 * Close all sub-channels, if any.
608	 */
609	subchan_cnt = chan->ch_subchan_cnt;
610	if (subchan_cnt > 0) {
611		struct hv_vmbus_channel **subchan;
612		int i;
613
614		subchan = vmbus_get_subchan(chan, subchan_cnt);
615		for (i = 0; i < subchan_cnt; ++i)
616			hv_vmbus_channel_close_internal(subchan[i]);
617		vmbus_rel_subchan(subchan, subchan_cnt);
618	}
619
620	/* Then close the primary channel. */
621	hv_vmbus_channel_close_internal(chan);
622}
623
624int
625vmbus_chan_send(struct hv_vmbus_channel *chan, uint16_t type, uint16_t flags,
626    void *data, int dlen, uint64_t xactid)
627{
628	struct vmbus_chanpkt pkt;
629	int pktlen, pad_pktlen, hlen, error;
630	uint64_t pad = 0;
631	struct iovec iov[3];
632	boolean_t send_evt;
633
634	hlen = sizeof(pkt);
635	pktlen = hlen + dlen;
636	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
637
638	pkt.cp_hdr.cph_type = type;
639	pkt.cp_hdr.cph_flags = flags;
640	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
641	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
642	pkt.cp_hdr.cph_xactid = xactid;
643
644	iov[0].iov_base = &pkt;
645	iov[0].iov_len = hlen;
646	iov[1].iov_base = data;
647	iov[1].iov_len = dlen;
648	iov[2].iov_base = &pad;
649	iov[2].iov_len = pad_pktlen - pktlen;
650
651	error = hv_ring_buffer_write(&chan->outbound, iov, 3, &send_evt);
652	if (!error && send_evt)
653		vmbus_chan_send_event(chan);
654	return error;
655}
656
657int
658vmbus_chan_send_sglist(struct hv_vmbus_channel *chan,
659    struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
660{
661	struct vmbus_chanpkt_sglist pkt;
662	int pktlen, pad_pktlen, hlen, error;
663	struct iovec iov[4];
664	boolean_t send_evt;
665	uint64_t pad = 0;
666
667	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
668	    ("invalid sglist len %d", sglen));
669
670	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
671	pktlen = hlen + dlen;
672	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
673
674	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
675	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
676	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
677	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
678	pkt.cp_hdr.cph_xactid = xactid;
679	pkt.cp_rsvd = 0;
680	pkt.cp_gpa_cnt = sglen;
681
682	iov[0].iov_base = &pkt;
683	iov[0].iov_len = sizeof(pkt);
684	iov[1].iov_base = sg;
685	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
686	iov[2].iov_base = data;
687	iov[2].iov_len = dlen;
688	iov[3].iov_base = &pad;
689	iov[3].iov_len = pad_pktlen - pktlen;
690
691	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
692	if (!error && send_evt)
693		vmbus_chan_send_event(chan);
694	return error;
695}
696
697int
698vmbus_chan_send_prplist(struct hv_vmbus_channel *chan,
699    struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
700    uint64_t xactid)
701{
702	struct vmbus_chanpkt_prplist pkt;
703	int pktlen, pad_pktlen, hlen, error;
704	struct iovec iov[4];
705	boolean_t send_evt;
706	uint64_t pad = 0;
707
708	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
709	    ("invalid prplist entry count %d", prp_cnt));
710
711	hlen = __offsetof(struct vmbus_chanpkt_prplist,
712	    cp_range[0].gpa_page[prp_cnt]);
713	pktlen = hlen + dlen;
714	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
715
716	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
717	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
718	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
719	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
720	pkt.cp_hdr.cph_xactid = xactid;
721	pkt.cp_rsvd = 0;
722	pkt.cp_range_cnt = 1;
723
724	iov[0].iov_base = &pkt;
725	iov[0].iov_len = sizeof(pkt);
726	iov[1].iov_base = prp;
727	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
728	iov[2].iov_base = data;
729	iov[2].iov_len = dlen;
730	iov[3].iov_base = &pad;
731	iov[3].iov_len = pad_pktlen - pktlen;
732
733	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
734	if (!error && send_evt)
735		vmbus_chan_send_event(chan);
736	return error;
737}
738
739int
740vmbus_chan_recv(struct hv_vmbus_channel *chan, void *data, int *dlen0,
741    uint64_t *xactid)
742{
743	struct vmbus_chanpkt_hdr pkt;
744	int error, dlen, hlen;
745
746	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
747	if (error)
748		return error;
749
750	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
751	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
752
753	if (*dlen0 < dlen) {
754		/* Return the size of this packet. */
755		*dlen0 = dlen;
756		return ENOBUFS;
757	}
758
759	*xactid = pkt.cph_xactid;
760	*dlen0 = dlen;
761
762	error = hv_ring_buffer_read(&chan->inbound, data, dlen, hlen);
763	KASSERT(!error, ("hv_ring_buffer_read failed"));
764
765	return 0;
766}
767
768/**
769 * @brief Retrieve the raw packet on the specified channel
770 */
771int
772hv_vmbus_channel_recv_packet_raw(
773	hv_vmbus_channel*	channel,
774	void*			buffer,
775	uint32_t		buffer_len,
776	uint32_t*		buffer_actual_len,
777	uint64_t*		request_id)
778{
779	int		ret;
780	uint32_t	packetLen;
781	hv_vm_packet_descriptor	desc;
782
783	*buffer_actual_len = 0;
784	*request_id = 0;
785
786	ret = hv_ring_buffer_peek(
787		&channel->inbound, &desc,
788		sizeof(hv_vm_packet_descriptor));
789
790	if (ret != 0)
791	    return (0);
792
793	packetLen = desc.length8 << 3;
794	*buffer_actual_len = packetLen;
795
796	if (packetLen > buffer_len)
797	    return (ENOBUFS);
798
799	*request_id = desc.transaction_id;
800
801	/* Copy over the entire packet to the user buffer */
802	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
803
804	return (0);
805}
806
807static void
808vmbus_chan_task(void *xchan, int pending __unused)
809{
810	struct hv_vmbus_channel *chan = xchan;
811	vmbus_chan_callback_t cb = chan->ch_cb;
812	void *cbarg = chan->ch_cbarg;
813
814	/*
815	 * Optimize host to guest signaling by ensuring:
816	 * 1. While reading the channel, we disable interrupts from
817	 *    host.
818	 * 2. Ensure that we process all posted messages from the host
819	 *    before returning from this callback.
820	 * 3. Once we return, enable signaling from the host. Once this
821	 *    state is set we check to see if additional packets are
822	 *    available to read. In this case we repeat the process.
823	 *
824	 * NOTE: Interrupt has been disabled in the ISR.
825	 */
826	for (;;) {
827		uint32_t left;
828
829		cb(cbarg);
830
831		left = hv_ring_buffer_read_end(&chan->inbound);
832		if (left == 0) {
833			/* No more data in RX bufring; done */
834			break;
835		}
836		hv_ring_buffer_read_begin(&chan->inbound);
837	}
838}
839
840static void
841vmbus_chan_task_nobatch(void *xchan, int pending __unused)
842{
843	struct hv_vmbus_channel *chan = xchan;
844
845	chan->ch_cb(chan->ch_cbarg);
846}
847
848static __inline void
849vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
850    int flag_cnt)
851{
852	int f;
853
854	for (f = 0; f < flag_cnt; ++f) {
855		uint32_t chid_base;
856		u_long flags;
857		int chid_ofs;
858
859		if (event_flags[f] == 0)
860			continue;
861
862		flags = atomic_swap_long(&event_flags[f], 0);
863		chid_base = f << VMBUS_EVTFLAG_SHIFT;
864
865		while ((chid_ofs = ffsl(flags)) != 0) {
866			struct hv_vmbus_channel *channel;
867
868			--chid_ofs; /* NOTE: ffsl is 1-based */
869			flags &= ~(1UL << chid_ofs);
870
871			channel = sc->vmbus_chmap[chid_base + chid_ofs];
872
873			/* if channel is closed or closing */
874			if (channel == NULL || channel->ch_tq == NULL)
875				continue;
876
877			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
878				hv_ring_buffer_read_begin(&channel->inbound);
879			taskqueue_enqueue(channel->ch_tq, &channel->ch_task);
880		}
881	}
882}
883
884void
885vmbus_event_proc(struct vmbus_softc *sc, int cpu)
886{
887	struct vmbus_evtflags *eventf;
888
889	/*
890	 * On Host with Win8 or above, the event page can be checked directly
891	 * to get the id of the channel that has the pending interrupt.
892	 */
893	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
894	vmbus_event_flags_proc(sc, eventf->evt_flags,
895	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
896}
897
898void
899vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
900{
901	struct vmbus_evtflags *eventf;
902
903	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
904	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
905		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
906		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
907	}
908}
909
910static void
911vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
912    const struct hv_vmbus_channel *chan)
913{
914	volatile int *flag_cnt_ptr;
915	int flag_cnt;
916
917	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
918	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
919
920	for (;;) {
921		int old_flag_cnt;
922
923		old_flag_cnt = *flag_cnt_ptr;
924		if (old_flag_cnt >= flag_cnt)
925			break;
926		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
927			if (bootverbose) {
928				device_printf(sc->vmbus_dev,
929				    "channel%u update cpu%d flag_cnt to %d\n",
930				    chan->ch_id, chan->ch_cpuid, flag_cnt);
931			}
932			break;
933		}
934	}
935}
936
937static struct hv_vmbus_channel *
938vmbus_chan_alloc(struct vmbus_softc *sc)
939{
940	struct hv_vmbus_channel *chan;
941
942	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
943
944	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
945	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
946	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
947	if (chan->ch_monprm == NULL) {
948		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
949		free(chan, M_DEVBUF);
950		return NULL;
951	}
952
953	chan->vmbus_sc = sc;
954	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
955	TAILQ_INIT(&chan->ch_subchans);
956	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
957
958	return chan;
959}
960
961static void
962vmbus_chan_free(struct hv_vmbus_channel *chan)
963{
964	/* TODO: assert sub-channel list is empty */
965	/* TODO: asset no longer on the primary channel's sub-channel list */
966	/* TODO: asset no longer on the vmbus channel list */
967	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
968	mtx_destroy(&chan->ch_subchan_lock);
969	free(chan, M_DEVBUF);
970}
971
972static int
973vmbus_chan_add(struct hv_vmbus_channel *newchan)
974{
975	struct vmbus_softc *sc = newchan->vmbus_sc;
976	struct hv_vmbus_channel *prichan;
977
978	if (newchan->ch_id == 0) {
979		/*
980		 * XXX
981		 * Chan0 will neither be processed nor should be offered;
982		 * skip it.
983		 */
984		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
985		return EINVAL;
986	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
987		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
988		    newchan->ch_id);
989		return EINVAL;
990	}
991	sc->vmbus_chmap[newchan->ch_id] = newchan;
992
993	if (bootverbose) {
994		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
995		    newchan->ch_id, newchan->ch_subidx);
996	}
997
998	mtx_lock(&sc->vmbus_prichan_lock);
999	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
1000		/*
1001		 * Sub-channel will have the same type GUID and instance
1002		 * GUID as its primary channel.
1003		 */
1004		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1005		    sizeof(struct hyperv_guid)) == 0 &&
1006		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1007		    sizeof(struct hyperv_guid)) == 0)
1008			break;
1009	}
1010	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1011		if (prichan == NULL) {
1012			/* Install the new primary channel */
1013			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1014			    ch_prilink);
1015			mtx_unlock(&sc->vmbus_prichan_lock);
1016			return 0;
1017		} else {
1018			mtx_unlock(&sc->vmbus_prichan_lock);
1019			device_printf(sc->vmbus_dev, "duplicated primary "
1020			    "chan%u\n", newchan->ch_id);
1021			return EINVAL;
1022		}
1023	} else { /* Sub-channel */
1024		if (prichan == NULL) {
1025			mtx_unlock(&sc->vmbus_prichan_lock);
1026			device_printf(sc->vmbus_dev, "no primary chan for "
1027			    "chan%u\n", newchan->ch_id);
1028			return EINVAL;
1029		}
1030		/*
1031		 * Found the primary channel for this sub-channel and
1032		 * move on.
1033		 *
1034		 * XXX refcnt prichan
1035		 */
1036	}
1037	mtx_unlock(&sc->vmbus_prichan_lock);
1038
1039	/*
1040	 * This is a sub-channel; link it with the primary channel.
1041	 */
1042	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1043	    ("new channel is not sub-channel"));
1044	KASSERT(prichan != NULL, ("no primary channel"));
1045
1046	newchan->ch_prichan = prichan;
1047	newchan->ch_dev = prichan->ch_dev;
1048
1049	mtx_lock(&prichan->ch_subchan_lock);
1050	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1051	/*
1052	 * Bump up sub-channel count and notify anyone that is
1053	 * interested in this sub-channel, after this sub-channel
1054	 * is setup.
1055	 */
1056	prichan->ch_subchan_cnt++;
1057	mtx_unlock(&prichan->ch_subchan_lock);
1058	wakeup(prichan);
1059
1060	return 0;
1061}
1062
1063void
1064vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1065{
1066	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1067
1068	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1069	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1070		/* Only cpu0 is supported */
1071		cpu = 0;
1072	}
1073
1074	chan->ch_cpuid = cpu;
1075	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1076
1077	if (bootverbose) {
1078		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1079		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
1080	}
1081}
1082
1083void
1084vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
1085{
1086	static uint32_t vmbus_chan_nextcpu;
1087	int cpu;
1088
1089	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1090	vmbus_channel_cpu_set(chan, cpu);
1091}
1092
1093static void
1094vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1095{
1096	/*
1097	 * By default, pin the channel to cpu0.  Devices having
1098	 * special channel-cpu mapping requirement should call
1099	 * vmbus_channel_cpu_{set,rr}().
1100	 */
1101	vmbus_channel_cpu_set(chan, 0);
1102}
1103
1104static void
1105vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1106    const struct vmbus_message *msg)
1107{
1108	const struct vmbus_chanmsg_choffer *offer;
1109	struct hv_vmbus_channel *chan;
1110	int error;
1111
1112	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1113
1114	chan = vmbus_chan_alloc(sc);
1115	if (chan == NULL) {
1116		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1117		    offer->chm_chanid);
1118		return;
1119	}
1120
1121	chan->ch_id = offer->chm_chanid;
1122	chan->ch_subidx = offer->chm_subidx;
1123	chan->ch_guid_type = offer->chm_chtype;
1124	chan->ch_guid_inst = offer->chm_chinst;
1125
1126	/* Batch reading is on by default */
1127	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1128
1129	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1130	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1131		chan->ch_monprm->mp_connid = offer->chm_connid;
1132
1133	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1134		/*
1135		 * Setup MNF stuffs.
1136		 */
1137		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1138		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1139		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1140			panic("invalid monitor trigger %u", offer->chm_montrig);
1141		chan->ch_montrig_mask =
1142		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1143	}
1144
1145	/* Select default cpu for this channel. */
1146	vmbus_chan_cpu_default(chan);
1147
1148	error = vmbus_chan_add(chan);
1149	if (error) {
1150		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1151		    chan->ch_id, error);
1152		vmbus_chan_free(chan);
1153		return;
1154	}
1155
1156	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1157		/*
1158		 * Add device for this primary channel.
1159		 *
1160		 * NOTE:
1161		 * Error is ignored here; don't have much to do if error
1162		 * really happens.
1163		 */
1164		vmbus_add_child(chan);
1165	}
1166}
1167
1168/*
1169 * XXX pretty broken; need rework.
1170 */
1171static void
1172vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1173    const struct vmbus_message *msg)
1174{
1175	const struct vmbus_chanmsg_chrescind *note;
1176	struct hv_vmbus_channel *chan;
1177
1178	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1179	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1180		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1181		    note->chm_chanid);
1182		return;
1183	}
1184
1185	if (bootverbose) {
1186		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1187		    note->chm_chanid);
1188	}
1189
1190	chan = sc->vmbus_chmap[note->chm_chanid];
1191	if (chan == NULL)
1192		return;
1193	sc->vmbus_chmap[note->chm_chanid] = NULL;
1194
1195	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1196}
1197
1198static void
1199vmbus_chan_detach_task(void *xchan, int pending __unused)
1200{
1201	struct hv_vmbus_channel *chan = xchan;
1202
1203	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1204		/* Only primary channel owns the device */
1205		vmbus_delete_child(chan);
1206		/* NOTE: DO NOT free primary channel for now */
1207	} else {
1208		struct vmbus_softc *sc = chan->vmbus_sc;
1209		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1210		struct vmbus_chanmsg_chfree *req;
1211		struct vmbus_msghc *mh;
1212		int error;
1213
1214		mh = vmbus_msghc_get(sc, sizeof(*req));
1215		if (mh == NULL) {
1216			device_printf(sc->vmbus_dev,
1217			    "can not get msg hypercall for chfree(chan%u)\n",
1218			    chan->ch_id);
1219			goto remove;
1220		}
1221
1222		req = vmbus_msghc_dataptr(mh);
1223		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1224		req->chm_chanid = chan->ch_id;
1225
1226		error = vmbus_msghc_exec_noresult(mh);
1227		vmbus_msghc_put(sc, mh);
1228
1229		if (error) {
1230			device_printf(sc->vmbus_dev,
1231			    "chfree(chan%u) failed: %d",
1232			    chan->ch_id, error);
1233			/* NOTE: Move on! */
1234		} else {
1235			if (bootverbose) {
1236				device_printf(sc->vmbus_dev, "chan%u freed\n",
1237				    chan->ch_id);
1238			}
1239		}
1240remove:
1241		mtx_lock(&pri_chan->ch_subchan_lock);
1242		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1243		KASSERT(pri_chan->ch_subchan_cnt > 0,
1244		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1245		pri_chan->ch_subchan_cnt--;
1246		mtx_unlock(&pri_chan->ch_subchan_lock);
1247		wakeup(pri_chan);
1248
1249		vmbus_chan_free(chan);
1250	}
1251}
1252
1253/*
1254 * Detach all devices and destroy the corresponding primary channels.
1255 */
1256void
1257vmbus_chan_destroy_all(struct vmbus_softc *sc)
1258{
1259	struct hv_vmbus_channel *chan;
1260
1261	mtx_lock(&sc->vmbus_prichan_lock);
1262	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1263		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1264		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1265		mtx_unlock(&sc->vmbus_prichan_lock);
1266
1267		vmbus_delete_child(chan);
1268		vmbus_chan_free(chan);
1269
1270		mtx_lock(&sc->vmbus_prichan_lock);
1271	}
1272	bzero(sc->vmbus_chmap,
1273	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1274	mtx_unlock(&sc->vmbus_prichan_lock);
1275}
1276
1277/**
1278 * @brief Select the best outgoing channel
1279 *
1280 * The channel whose vcpu binding is closest to the currect vcpu will
1281 * be selected.
1282 * If no multi-channel, always select primary channel
1283 *
1284 * @param primary - primary channel
1285 */
1286struct hv_vmbus_channel *
1287vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1288{
1289	hv_vmbus_channel *new_channel = NULL;
1290	hv_vmbus_channel *outgoing_channel = primary;
1291	int old_cpu_distance = 0;
1292	int new_cpu_distance = 0;
1293	int cur_vcpu = 0;
1294	int smp_pro_id = PCPU_GET(cpuid);
1295
1296	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1297		return outgoing_channel;
1298	}
1299
1300	if (smp_pro_id >= MAXCPU) {
1301		return outgoing_channel;
1302	}
1303
1304	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1305
1306	/* XXX need lock */
1307	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1308		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1309			continue;
1310		}
1311
1312		if (new_channel->ch_vcpuid == cur_vcpu){
1313			return new_channel;
1314		}
1315
1316		old_cpu_distance = ((outgoing_channel->ch_vcpuid > cur_vcpu) ?
1317		    (outgoing_channel->ch_vcpuid - cur_vcpu) :
1318		    (cur_vcpu - outgoing_channel->ch_vcpuid));
1319
1320		new_cpu_distance = ((new_channel->ch_vcpuid > cur_vcpu) ?
1321		    (new_channel->ch_vcpuid - cur_vcpu) :
1322		    (cur_vcpu - new_channel->ch_vcpuid));
1323
1324		if (old_cpu_distance < new_cpu_distance) {
1325			continue;
1326		}
1327
1328		outgoing_channel = new_channel;
1329	}
1330
1331	return(outgoing_channel);
1332}
1333
1334struct hv_vmbus_channel **
1335vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1336{
1337	struct hv_vmbus_channel **ret, *chan;
1338	int i;
1339
1340	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1341	    M_WAITOK);
1342
1343	mtx_lock(&pri_chan->ch_subchan_lock);
1344
1345	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1346		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1347
1348	i = 0;
1349	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1350		/* TODO: refcnt chan */
1351		ret[i] = chan;
1352
1353		++i;
1354		if (i == subchan_cnt)
1355			break;
1356	}
1357	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1358	    pri_chan->ch_subchan_cnt, subchan_cnt));
1359
1360	mtx_unlock(&pri_chan->ch_subchan_lock);
1361
1362	return ret;
1363}
1364
1365void
1366vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1367{
1368
1369	free(subchan, M_TEMP);
1370}
1371
1372void
1373vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan)
1374{
1375	mtx_lock(&pri_chan->ch_subchan_lock);
1376	while (pri_chan->ch_subchan_cnt > 0)
1377		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1378	mtx_unlock(&pri_chan->ch_subchan_lock);
1379}
1380
1381void
1382vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1383{
1384	vmbus_chanmsg_proc_t msg_proc;
1385	uint32_t msg_type;
1386
1387	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1388	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1389	    ("invalid message type %u", msg_type));
1390
1391	msg_proc = vmbus_chan_msgprocs[msg_type];
1392	if (msg_proc != NULL)
1393		msg_proc(sc, msg);
1394}
1395