vmbus_chan.c revision 302891
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302891 2016-07-15 08:31:53Z sephe $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40
41#include <machine/atomic.h>
42#include <machine/bus.h>
43
44#include <vm/vm.h>
45#include <vm/vm_param.h>
46#include <vm/pmap.h>
47
48#include <dev/hyperv/include/hyperv_busdma.h>
49#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50#include <dev/hyperv/vmbus/hyperv_var.h>
51#include <dev/hyperv/vmbus/vmbus_reg.h>
52#include <dev/hyperv/vmbus/vmbus_var.h>
53
54static void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
55static void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56		    const struct hv_vmbus_channel *);
57
58static void	vmbus_chan_task(void *, int);
59static void	vmbus_chan_task_nobatch(void *, int);
60static void	vmbus_chan_detach_task(void *, int);
61
62static void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63		    const struct vmbus_message *);
64static void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65		    const struct vmbus_message *);
66
67/*
68 * Vmbus channel message processing.
69 */
70static const vmbus_chanmsg_proc_t
71vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74
75	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78};
79
80/**
81 *  @brief Trigger an event notification on the specified channel
82 */
83static void
84vmbus_chan_send_event(hv_vmbus_channel *channel)
85{
86	struct vmbus_softc *sc = channel->vmbus_sc;
87	uint32_t chanid = channel->ch_id;
88
89	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91
92	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93		atomic_set_int(
94		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
95		channel->ch_montrig_mask);
96	} else {
97		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
98	}
99}
100
101static int
102vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
103{
104	struct hv_vmbus_channel *chan = arg1;
105	int alloc = 0;
106
107	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108		alloc = 1;
109	return sysctl_handle_int(oidp, &alloc, 0, req);
110}
111
112static void
113vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
114{
115	device_t dev;
116	struct sysctl_oid *devch_sysctl;
117	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
118	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
119	struct sysctl_ctx_list *ctx;
120	uint32_t ch_id;
121	uint16_t sub_ch_id;
122	char name[16];
123
124	hv_vmbus_channel* primary_ch = channel->ch_prichan;
125
126	if (primary_ch == NULL) {
127		dev = channel->ch_dev;
128		ch_id = channel->ch_id;
129	} else {
130		dev = primary_ch->ch_dev;
131		ch_id = primary_ch->ch_id;
132		sub_ch_id = channel->ch_subidx;
133	}
134	ctx = &channel->ch_sysctl_ctx;
135	sysctl_ctx_init(ctx);
136	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
137	devch_sysctl = SYSCTL_ADD_NODE(ctx,
138		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
139		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
140	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
141	snprintf(name, sizeof(name), "%d", ch_id);
142	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
143	    	    SYSCTL_CHILDREN(devch_sysctl),
144	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
145
146	if (primary_ch != NULL) {
147		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
148			SYSCTL_CHILDREN(devch_id_sysctl),
149			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
150		snprintf(name, sizeof(name), "%d", sub_ch_id);
151		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
152			SYSCTL_CHILDREN(devch_sub_sysctl),
153			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
154
155		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
156		    OID_AUTO, "chanid", CTLFLAG_RD,
157		    &channel->ch_id, 0, "channel id");
158	}
159	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
160	    "cpu", CTLFLAG_RD, &channel->ch_cpuid, 0, "owner CPU id");
161	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
162	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
163	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
164	    "is monitor allocated to this channel");
165
166	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
167                    SYSCTL_CHILDREN(devch_id_sysctl),
168                    OID_AUTO,
169		    "in",
170		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
171	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
172                    SYSCTL_CHILDREN(devch_id_sysctl),
173                    OID_AUTO,
174		    "out",
175		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
176	hv_ring_buffer_stat(ctx,
177		SYSCTL_CHILDREN(devch_id_in_sysctl),
178		&(channel->inbound),
179		"inbound ring buffer stats");
180	hv_ring_buffer_stat(ctx,
181		SYSCTL_CHILDREN(devch_id_out_sysctl),
182		&(channel->outbound),
183		"outbound ring buffer stats");
184}
185
186/**
187 * @brief Open the specified channel
188 */
189int
190hv_vmbus_channel_open(
191	hv_vmbus_channel*		new_channel,
192	uint32_t			send_ring_buffer_size,
193	uint32_t			recv_ring_buffer_size,
194	void*				user_data,
195	uint32_t			user_data_len,
196	vmbus_chan_callback_t		cb,
197	void				*cbarg)
198{
199	struct vmbus_softc *sc = new_channel->vmbus_sc;
200	const struct vmbus_chanmsg_chopen_resp *resp;
201	const struct vmbus_message *msg;
202	struct vmbus_chanmsg_chopen *req;
203	struct vmbus_msghc *mh;
204	uint32_t status;
205	int ret = 0;
206	uint8_t *br;
207
208	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
209		device_printf(sc->vmbus_dev,
210		    "invalid udata len %u for chan%u\n",
211		    user_data_len, new_channel->ch_id);
212		return EINVAL;
213	}
214	KASSERT((send_ring_buffer_size & PAGE_MASK) == 0,
215	    ("send bufring size is not multiple page"));
216	KASSERT((recv_ring_buffer_size & PAGE_MASK) == 0,
217	    ("recv bufring size is not multiple page"));
218
219	if (atomic_testandset_int(&new_channel->ch_stflags,
220	    VMBUS_CHAN_ST_OPENED_SHIFT))
221		panic("double-open chan%u", new_channel->ch_id);
222
223	new_channel->ch_cb = cb;
224	new_channel->ch_cbarg = cbarg;
225
226	vmbus_chan_update_evtflagcnt(sc, new_channel);
227
228	new_channel->ch_tq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
229	    new_channel->ch_cpuid);
230	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
231		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task,
232		    new_channel);
233	} else {
234		TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task_nobatch,
235		    new_channel);
236	}
237
238	/*
239	 * Allocate the TX+RX bufrings.
240	 * XXX should use ch_dev dtag
241	 */
242	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
243	    PAGE_SIZE, 0, send_ring_buffer_size + recv_ring_buffer_size,
244	    &new_channel->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
245	if (br == NULL) {
246		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
247		ret = ENOMEM;
248		goto failed;
249	}
250	new_channel->ch_bufring = br;
251
252	/* TX bufring comes first */
253	hv_vmbus_ring_buffer_init(&new_channel->outbound,
254	    br, send_ring_buffer_size);
255	/* RX bufring immediately follows TX bufring */
256	hv_vmbus_ring_buffer_init(&new_channel->inbound,
257	    br + send_ring_buffer_size, recv_ring_buffer_size);
258
259	/* Create sysctl tree for this channel */
260	vmbus_channel_sysctl_create(new_channel);
261
262	/*
263	 * Connect the bufrings, both RX and TX, to this channel.
264	 */
265	ret = vmbus_chan_gpadl_connect(new_channel,
266		new_channel->ch_bufring_dma.hv_paddr,
267		send_ring_buffer_size + recv_ring_buffer_size,
268		&new_channel->ch_bufring_gpadl);
269	if (ret != 0) {
270		device_printf(sc->vmbus_dev,
271		    "failed to connect bufring GPADL to chan%u\n",
272		    new_channel->ch_id);
273		goto failed;
274	}
275
276	/*
277	 * Open channel w/ the bufring GPADL on the target CPU.
278	 */
279	mh = vmbus_msghc_get(sc, sizeof(*req));
280	if (mh == NULL) {
281		device_printf(sc->vmbus_dev,
282		    "can not get msg hypercall for chopen(chan%u)\n",
283		    new_channel->ch_id);
284		ret = ENXIO;
285		goto failed;
286	}
287
288	req = vmbus_msghc_dataptr(mh);
289	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
290	req->chm_chanid = new_channel->ch_id;
291	req->chm_openid = new_channel->ch_id;
292	req->chm_gpadl = new_channel->ch_bufring_gpadl;
293	req->chm_vcpuid = new_channel->ch_vcpuid;
294	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
295	if (user_data_len)
296		memcpy(req->chm_udata, user_data, user_data_len);
297
298	ret = vmbus_msghc_exec(sc, mh);
299	if (ret != 0) {
300		device_printf(sc->vmbus_dev,
301		    "chopen(chan%u) msg hypercall exec failed: %d\n",
302		    new_channel->ch_id, ret);
303		vmbus_msghc_put(sc, mh);
304		goto failed;
305	}
306
307	msg = vmbus_msghc_wait_result(sc, mh);
308	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
309	status = resp->chm_status;
310
311	vmbus_msghc_put(sc, mh);
312
313	if (status == 0) {
314		if (bootverbose) {
315			device_printf(sc->vmbus_dev, "chan%u opened\n",
316			    new_channel->ch_id);
317		}
318		return 0;
319	}
320
321	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
322	    new_channel->ch_id);
323	ret = ENXIO;
324
325failed:
326	if (new_channel->ch_bufring_gpadl) {
327		vmbus_chan_gpadl_disconnect(new_channel,
328		    new_channel->ch_bufring_gpadl);
329		new_channel->ch_bufring_gpadl = 0;
330	}
331	if (new_channel->ch_bufring != NULL) {
332		hyperv_dmamem_free(&new_channel->ch_bufring_dma,
333		    new_channel->ch_bufring);
334		new_channel->ch_bufring = NULL;
335	}
336	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
337	return ret;
338}
339
340int
341vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
342    int size, uint32_t *gpadl0)
343{
344	struct vmbus_softc *sc = chan->vmbus_sc;
345	struct vmbus_msghc *mh;
346	struct vmbus_chanmsg_gpadl_conn *req;
347	const struct vmbus_message *msg;
348	size_t reqsz;
349	uint32_t gpadl, status;
350	int page_count, range_len, i, cnt, error;
351	uint64_t page_id;
352
353	/*
354	 * Preliminary checks.
355	 */
356
357	KASSERT((size & PAGE_MASK) == 0,
358	    ("invalid GPA size %d, not multiple page size", size));
359	page_count = size >> PAGE_SHIFT;
360
361	KASSERT((paddr & PAGE_MASK) == 0,
362	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
363	page_id = paddr >> PAGE_SHIFT;
364
365	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
366	/*
367	 * We don't support multiple GPA ranges.
368	 */
369	if (range_len > UINT16_MAX) {
370		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
371		    page_count);
372		return EOPNOTSUPP;
373	}
374
375	/*
376	 * Allocate GPADL id.
377	 */
378	gpadl = vmbus_gpadl_alloc(sc);
379	*gpadl0 = gpadl;
380
381	/*
382	 * Connect this GPADL to the target channel.
383	 *
384	 * NOTE:
385	 * Since each message can only hold small set of page
386	 * addresses, several messages may be required to
387	 * complete the connection.
388	 */
389	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
390		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
391	else
392		cnt = page_count;
393	page_count -= cnt;
394
395	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
396	    chm_range.gpa_page[cnt]);
397	mh = vmbus_msghc_get(sc, reqsz);
398	if (mh == NULL) {
399		device_printf(sc->vmbus_dev,
400		    "can not get msg hypercall for gpadl->chan%u\n",
401		    chan->ch_id);
402		return EIO;
403	}
404
405	req = vmbus_msghc_dataptr(mh);
406	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
407	req->chm_chanid = chan->ch_id;
408	req->chm_gpadl = gpadl;
409	req->chm_range_len = range_len;
410	req->chm_range_cnt = 1;
411	req->chm_range.gpa_len = size;
412	req->chm_range.gpa_ofs = 0;
413	for (i = 0; i < cnt; ++i)
414		req->chm_range.gpa_page[i] = page_id++;
415
416	error = vmbus_msghc_exec(sc, mh);
417	if (error) {
418		device_printf(sc->vmbus_dev,
419		    "gpadl->chan%u msg hypercall exec failed: %d\n",
420		    chan->ch_id, error);
421		vmbus_msghc_put(sc, mh);
422		return error;
423	}
424
425	while (page_count > 0) {
426		struct vmbus_chanmsg_gpadl_subconn *subreq;
427
428		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
429			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
430		else
431			cnt = page_count;
432		page_count -= cnt;
433
434		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
435		    chm_gpa_page[cnt]);
436		vmbus_msghc_reset(mh, reqsz);
437
438		subreq = vmbus_msghc_dataptr(mh);
439		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
440		subreq->chm_gpadl = gpadl;
441		for (i = 0; i < cnt; ++i)
442			subreq->chm_gpa_page[i] = page_id++;
443
444		vmbus_msghc_exec_noresult(mh);
445	}
446	KASSERT(page_count == 0, ("invalid page count %d", page_count));
447
448	msg = vmbus_msghc_wait_result(sc, mh);
449	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
450	    msg->msg_data)->chm_status;
451
452	vmbus_msghc_put(sc, mh);
453
454	if (status != 0) {
455		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
456		    "status %u\n", chan->ch_id, status);
457		return EIO;
458	} else {
459		if (bootverbose) {
460			device_printf(sc->vmbus_dev, "gpadl->chan%u "
461			    "succeeded\n", chan->ch_id);
462		}
463	}
464	return 0;
465}
466
467/*
468 * Disconnect the GPA from the target channel
469 */
470int
471vmbus_chan_gpadl_disconnect(struct hv_vmbus_channel *chan, uint32_t gpadl)
472{
473	struct vmbus_softc *sc = chan->vmbus_sc;
474	struct vmbus_msghc *mh;
475	struct vmbus_chanmsg_gpadl_disconn *req;
476	int error;
477
478	mh = vmbus_msghc_get(sc, sizeof(*req));
479	if (mh == NULL) {
480		device_printf(sc->vmbus_dev,
481		    "can not get msg hypercall for gpa x->chan%u\n",
482		    chan->ch_id);
483		return EBUSY;
484	}
485
486	req = vmbus_msghc_dataptr(mh);
487	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
488	req->chm_chanid = chan->ch_id;
489	req->chm_gpadl = gpadl;
490
491	error = vmbus_msghc_exec(sc, mh);
492	if (error) {
493		device_printf(sc->vmbus_dev,
494		    "gpa x->chan%u msg hypercall exec failed: %d\n",
495		    chan->ch_id, error);
496		vmbus_msghc_put(sc, mh);
497		return error;
498	}
499
500	vmbus_msghc_wait_result(sc, mh);
501	/* Discard result; no useful information */
502	vmbus_msghc_put(sc, mh);
503
504	return 0;
505}
506
507static void
508vmbus_chan_close_internal(struct hv_vmbus_channel *chan)
509{
510	struct vmbus_softc *sc = chan->vmbus_sc;
511	struct vmbus_msghc *mh;
512	struct vmbus_chanmsg_chclose *req;
513	struct taskqueue *tq = chan->ch_tq;
514	int error;
515
516	/* TODO: stringent check */
517	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
518
519	/*
520	 * Free this channel's sysctl tree attached to its device's
521	 * sysctl tree.
522	 */
523	sysctl_ctx_free(&chan->ch_sysctl_ctx);
524
525	/*
526	 * Set ch_tq to NULL to avoid more requests be scheduled.
527	 * XXX pretty broken; need rework.
528	 */
529	chan->ch_tq = NULL;
530	taskqueue_drain(tq, &chan->ch_task);
531	chan->ch_cb = NULL;
532
533	/*
534	 * Close this channel.
535	 */
536	mh = vmbus_msghc_get(sc, sizeof(*req));
537	if (mh == NULL) {
538		device_printf(sc->vmbus_dev,
539		    "can not get msg hypercall for chclose(chan%u)\n",
540		    chan->ch_id);
541		return;
542	}
543
544	req = vmbus_msghc_dataptr(mh);
545	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
546	req->chm_chanid = chan->ch_id;
547
548	error = vmbus_msghc_exec_noresult(mh);
549	vmbus_msghc_put(sc, mh);
550
551	if (error) {
552		device_printf(sc->vmbus_dev,
553		    "chclose(chan%u) msg hypercall exec failed: %d\n",
554		    chan->ch_id, error);
555		return;
556	} else if (bootverbose) {
557		device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id);
558	}
559
560	/*
561	 * Disconnect the TX+RX bufrings from this channel.
562	 */
563	if (chan->ch_bufring_gpadl) {
564		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
565		chan->ch_bufring_gpadl = 0;
566	}
567
568	/*
569	 * Destroy the TX+RX bufrings.
570	 */
571	hv_ring_buffer_cleanup(&chan->outbound);
572	hv_ring_buffer_cleanup(&chan->inbound);
573	if (chan->ch_bufring != NULL) {
574		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
575		chan->ch_bufring = NULL;
576	}
577}
578
579/*
580 * Caller should make sure that all sub-channels have
581 * been added to 'chan' and all to-be-closed channels
582 * are not being opened.
583 */
584void
585hv_vmbus_channel_close(struct hv_vmbus_channel *chan)
586{
587	int subchan_cnt;
588
589	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
590		/*
591		 * Sub-channel is closed when its primary channel
592		 * is closed; done.
593		 */
594		return;
595	}
596
597	/*
598	 * Close all sub-channels, if any.
599	 */
600	subchan_cnt = chan->ch_subchan_cnt;
601	if (subchan_cnt > 0) {
602		struct hv_vmbus_channel **subchan;
603		int i;
604
605		subchan = vmbus_subchan_get(chan, subchan_cnt);
606		for (i = 0; i < subchan_cnt; ++i)
607			vmbus_chan_close_internal(subchan[i]);
608		vmbus_subchan_rel(subchan, subchan_cnt);
609	}
610
611	/* Then close the primary channel. */
612	vmbus_chan_close_internal(chan);
613}
614
615int
616vmbus_chan_send(struct hv_vmbus_channel *chan, uint16_t type, uint16_t flags,
617    void *data, int dlen, uint64_t xactid)
618{
619	struct vmbus_chanpkt pkt;
620	int pktlen, pad_pktlen, hlen, error;
621	uint64_t pad = 0;
622	struct iovec iov[3];
623	boolean_t send_evt;
624
625	hlen = sizeof(pkt);
626	pktlen = hlen + dlen;
627	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
628
629	pkt.cp_hdr.cph_type = type;
630	pkt.cp_hdr.cph_flags = flags;
631	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
632	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
633	pkt.cp_hdr.cph_xactid = xactid;
634
635	iov[0].iov_base = &pkt;
636	iov[0].iov_len = hlen;
637	iov[1].iov_base = data;
638	iov[1].iov_len = dlen;
639	iov[2].iov_base = &pad;
640	iov[2].iov_len = pad_pktlen - pktlen;
641
642	error = hv_ring_buffer_write(&chan->outbound, iov, 3, &send_evt);
643	if (!error && send_evt)
644		vmbus_chan_send_event(chan);
645	return error;
646}
647
648int
649vmbus_chan_send_sglist(struct hv_vmbus_channel *chan,
650    struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
651{
652	struct vmbus_chanpkt_sglist pkt;
653	int pktlen, pad_pktlen, hlen, error;
654	struct iovec iov[4];
655	boolean_t send_evt;
656	uint64_t pad = 0;
657
658	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
659	    ("invalid sglist len %d", sglen));
660
661	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
662	pktlen = hlen + dlen;
663	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
664
665	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
666	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
667	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
668	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
669	pkt.cp_hdr.cph_xactid = xactid;
670	pkt.cp_rsvd = 0;
671	pkt.cp_gpa_cnt = sglen;
672
673	iov[0].iov_base = &pkt;
674	iov[0].iov_len = sizeof(pkt);
675	iov[1].iov_base = sg;
676	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
677	iov[2].iov_base = data;
678	iov[2].iov_len = dlen;
679	iov[3].iov_base = &pad;
680	iov[3].iov_len = pad_pktlen - pktlen;
681
682	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
683	if (!error && send_evt)
684		vmbus_chan_send_event(chan);
685	return error;
686}
687
688int
689vmbus_chan_send_prplist(struct hv_vmbus_channel *chan,
690    struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
691    uint64_t xactid)
692{
693	struct vmbus_chanpkt_prplist pkt;
694	int pktlen, pad_pktlen, hlen, error;
695	struct iovec iov[4];
696	boolean_t send_evt;
697	uint64_t pad = 0;
698
699	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
700	    ("invalid prplist entry count %d", prp_cnt));
701
702	hlen = __offsetof(struct vmbus_chanpkt_prplist,
703	    cp_range[0].gpa_page[prp_cnt]);
704	pktlen = hlen + dlen;
705	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
706
707	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
708	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
709	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
710	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
711	pkt.cp_hdr.cph_xactid = xactid;
712	pkt.cp_rsvd = 0;
713	pkt.cp_range_cnt = 1;
714
715	iov[0].iov_base = &pkt;
716	iov[0].iov_len = sizeof(pkt);
717	iov[1].iov_base = prp;
718	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
719	iov[2].iov_base = data;
720	iov[2].iov_len = dlen;
721	iov[3].iov_base = &pad;
722	iov[3].iov_len = pad_pktlen - pktlen;
723
724	error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt);
725	if (!error && send_evt)
726		vmbus_chan_send_event(chan);
727	return error;
728}
729
730int
731vmbus_chan_recv(struct hv_vmbus_channel *chan, void *data, int *dlen0,
732    uint64_t *xactid)
733{
734	struct vmbus_chanpkt_hdr pkt;
735	int error, dlen, hlen;
736
737	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
738	if (error)
739		return error;
740
741	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
742	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
743
744	if (*dlen0 < dlen) {
745		/* Return the size of this packet's data. */
746		*dlen0 = dlen;
747		return ENOBUFS;
748	}
749
750	*xactid = pkt.cph_xactid;
751	*dlen0 = dlen;
752
753	/* Skip packet header */
754	error = hv_ring_buffer_read(&chan->inbound, data, dlen, hlen);
755	KASSERT(!error, ("hv_ring_buffer_read failed"));
756
757	return 0;
758}
759
760int
761vmbus_chan_recv_pkt(struct hv_vmbus_channel *chan,
762    struct vmbus_chanpkt_hdr *pkt0, int *pktlen0)
763{
764	struct vmbus_chanpkt_hdr pkt;
765	int error, pktlen;
766
767	error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt));
768	if (error)
769		return error;
770
771	pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen);
772	if (*pktlen0 < pktlen) {
773		/* Return the size of this packet. */
774		*pktlen0 = pktlen;
775		return ENOBUFS;
776	}
777	*pktlen0 = pktlen;
778
779	/* Include packet header */
780	error = hv_ring_buffer_read(&chan->inbound, pkt0, pktlen, 0);
781	KASSERT(!error, ("hv_ring_buffer_read failed"));
782
783	return 0;
784}
785
786static void
787vmbus_chan_task(void *xchan, int pending __unused)
788{
789	struct hv_vmbus_channel *chan = xchan;
790	vmbus_chan_callback_t cb = chan->ch_cb;
791	void *cbarg = chan->ch_cbarg;
792
793	/*
794	 * Optimize host to guest signaling by ensuring:
795	 * 1. While reading the channel, we disable interrupts from
796	 *    host.
797	 * 2. Ensure that we process all posted messages from the host
798	 *    before returning from this callback.
799	 * 3. Once we return, enable signaling from the host. Once this
800	 *    state is set we check to see if additional packets are
801	 *    available to read. In this case we repeat the process.
802	 *
803	 * NOTE: Interrupt has been disabled in the ISR.
804	 */
805	for (;;) {
806		uint32_t left;
807
808		cb(cbarg);
809
810		left = hv_ring_buffer_read_end(&chan->inbound);
811		if (left == 0) {
812			/* No more data in RX bufring; done */
813			break;
814		}
815		hv_ring_buffer_read_begin(&chan->inbound);
816	}
817}
818
819static void
820vmbus_chan_task_nobatch(void *xchan, int pending __unused)
821{
822	struct hv_vmbus_channel *chan = xchan;
823
824	chan->ch_cb(chan->ch_cbarg);
825}
826
827static __inline void
828vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
829    int flag_cnt)
830{
831	int f;
832
833	for (f = 0; f < flag_cnt; ++f) {
834		uint32_t chid_base;
835		u_long flags;
836		int chid_ofs;
837
838		if (event_flags[f] == 0)
839			continue;
840
841		flags = atomic_swap_long(&event_flags[f], 0);
842		chid_base = f << VMBUS_EVTFLAG_SHIFT;
843
844		while ((chid_ofs = ffsl(flags)) != 0) {
845			struct hv_vmbus_channel *channel;
846
847			--chid_ofs; /* NOTE: ffsl is 1-based */
848			flags &= ~(1UL << chid_ofs);
849
850			channel = sc->vmbus_chmap[chid_base + chid_ofs];
851
852			/* if channel is closed or closing */
853			if (channel == NULL || channel->ch_tq == NULL)
854				continue;
855
856			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
857				hv_ring_buffer_read_begin(&channel->inbound);
858			taskqueue_enqueue(channel->ch_tq, &channel->ch_task);
859		}
860	}
861}
862
863void
864vmbus_event_proc(struct vmbus_softc *sc, int cpu)
865{
866	struct vmbus_evtflags *eventf;
867
868	/*
869	 * On Host with Win8 or above, the event page can be checked directly
870	 * to get the id of the channel that has the pending interrupt.
871	 */
872	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
873	vmbus_event_flags_proc(sc, eventf->evt_flags,
874	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
875}
876
877void
878vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
879{
880	struct vmbus_evtflags *eventf;
881
882	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
883	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
884		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
885		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
886	}
887}
888
889static void
890vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
891    const struct hv_vmbus_channel *chan)
892{
893	volatile int *flag_cnt_ptr;
894	int flag_cnt;
895
896	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
897	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
898
899	for (;;) {
900		int old_flag_cnt;
901
902		old_flag_cnt = *flag_cnt_ptr;
903		if (old_flag_cnt >= flag_cnt)
904			break;
905		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
906			if (bootverbose) {
907				device_printf(sc->vmbus_dev,
908				    "channel%u update cpu%d flag_cnt to %d\n",
909				    chan->ch_id, chan->ch_cpuid, flag_cnt);
910			}
911			break;
912		}
913	}
914}
915
916static struct hv_vmbus_channel *
917vmbus_chan_alloc(struct vmbus_softc *sc)
918{
919	struct hv_vmbus_channel *chan;
920
921	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
922
923	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
924	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
925	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
926	if (chan->ch_monprm == NULL) {
927		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
928		free(chan, M_DEVBUF);
929		return NULL;
930	}
931
932	chan->vmbus_sc = sc;
933	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
934	TAILQ_INIT(&chan->ch_subchans);
935	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
936
937	return chan;
938}
939
940static void
941vmbus_chan_free(struct hv_vmbus_channel *chan)
942{
943	/* TODO: assert sub-channel list is empty */
944	/* TODO: asset no longer on the primary channel's sub-channel list */
945	/* TODO: asset no longer on the vmbus channel list */
946	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
947	mtx_destroy(&chan->ch_subchan_lock);
948	free(chan, M_DEVBUF);
949}
950
951static int
952vmbus_chan_add(struct hv_vmbus_channel *newchan)
953{
954	struct vmbus_softc *sc = newchan->vmbus_sc;
955	struct hv_vmbus_channel *prichan;
956
957	if (newchan->ch_id == 0) {
958		/*
959		 * XXX
960		 * Chan0 will neither be processed nor should be offered;
961		 * skip it.
962		 */
963		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
964		return EINVAL;
965	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
966		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
967		    newchan->ch_id);
968		return EINVAL;
969	}
970	sc->vmbus_chmap[newchan->ch_id] = newchan;
971
972	if (bootverbose) {
973		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
974		    newchan->ch_id, newchan->ch_subidx);
975	}
976
977	mtx_lock(&sc->vmbus_prichan_lock);
978	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
979		/*
980		 * Sub-channel will have the same type GUID and instance
981		 * GUID as its primary channel.
982		 */
983		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
984		    sizeof(struct hyperv_guid)) == 0 &&
985		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
986		    sizeof(struct hyperv_guid)) == 0)
987			break;
988	}
989	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
990		if (prichan == NULL) {
991			/* Install the new primary channel */
992			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
993			    ch_prilink);
994			mtx_unlock(&sc->vmbus_prichan_lock);
995			return 0;
996		} else {
997			mtx_unlock(&sc->vmbus_prichan_lock);
998			device_printf(sc->vmbus_dev, "duplicated primary "
999			    "chan%u\n", newchan->ch_id);
1000			return EINVAL;
1001		}
1002	} else { /* Sub-channel */
1003		if (prichan == NULL) {
1004			mtx_unlock(&sc->vmbus_prichan_lock);
1005			device_printf(sc->vmbus_dev, "no primary chan for "
1006			    "chan%u\n", newchan->ch_id);
1007			return EINVAL;
1008		}
1009		/*
1010		 * Found the primary channel for this sub-channel and
1011		 * move on.
1012		 *
1013		 * XXX refcnt prichan
1014		 */
1015	}
1016	mtx_unlock(&sc->vmbus_prichan_lock);
1017
1018	/*
1019	 * This is a sub-channel; link it with the primary channel.
1020	 */
1021	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1022	    ("new channel is not sub-channel"));
1023	KASSERT(prichan != NULL, ("no primary channel"));
1024
1025	newchan->ch_prichan = prichan;
1026	newchan->ch_dev = prichan->ch_dev;
1027
1028	mtx_lock(&prichan->ch_subchan_lock);
1029	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1030	/*
1031	 * Bump up sub-channel count and notify anyone that is
1032	 * interested in this sub-channel, after this sub-channel
1033	 * is setup.
1034	 */
1035	prichan->ch_subchan_cnt++;
1036	mtx_unlock(&prichan->ch_subchan_lock);
1037	wakeup(prichan);
1038
1039	return 0;
1040}
1041
1042void
1043vmbus_chan_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1044{
1045	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1046
1047	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1048	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1049		/* Only cpu0 is supported */
1050		cpu = 0;
1051	}
1052
1053	chan->ch_cpuid = cpu;
1054	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1055
1056	if (bootverbose) {
1057		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1058		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
1059	}
1060}
1061
1062void
1063vmbus_chan_cpu_rr(struct hv_vmbus_channel *chan)
1064{
1065	static uint32_t vmbus_chan_nextcpu;
1066	int cpu;
1067
1068	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1069	vmbus_chan_cpu_set(chan, cpu);
1070}
1071
1072static void
1073vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1074{
1075	/*
1076	 * By default, pin the channel to cpu0.  Devices having
1077	 * special channel-cpu mapping requirement should call
1078	 * vmbus_chan_cpu_{set,rr}().
1079	 */
1080	vmbus_chan_cpu_set(chan, 0);
1081}
1082
1083static void
1084vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1085    const struct vmbus_message *msg)
1086{
1087	const struct vmbus_chanmsg_choffer *offer;
1088	struct hv_vmbus_channel *chan;
1089	int error;
1090
1091	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1092
1093	chan = vmbus_chan_alloc(sc);
1094	if (chan == NULL) {
1095		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1096		    offer->chm_chanid);
1097		return;
1098	}
1099
1100	chan->ch_id = offer->chm_chanid;
1101	chan->ch_subidx = offer->chm_subidx;
1102	chan->ch_guid_type = offer->chm_chtype;
1103	chan->ch_guid_inst = offer->chm_chinst;
1104
1105	/* Batch reading is on by default */
1106	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1107
1108	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1109	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1110		chan->ch_monprm->mp_connid = offer->chm_connid;
1111
1112	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1113		/*
1114		 * Setup MNF stuffs.
1115		 */
1116		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1117		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1118		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1119			panic("invalid monitor trigger %u", offer->chm_montrig);
1120		chan->ch_montrig_mask =
1121		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1122	}
1123
1124	/* Select default cpu for this channel. */
1125	vmbus_chan_cpu_default(chan);
1126
1127	error = vmbus_chan_add(chan);
1128	if (error) {
1129		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1130		    chan->ch_id, error);
1131		vmbus_chan_free(chan);
1132		return;
1133	}
1134
1135	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1136		/*
1137		 * Add device for this primary channel.
1138		 *
1139		 * NOTE:
1140		 * Error is ignored here; don't have much to do if error
1141		 * really happens.
1142		 */
1143		vmbus_add_child(chan);
1144	}
1145}
1146
1147/*
1148 * XXX pretty broken; need rework.
1149 */
1150static void
1151vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1152    const struct vmbus_message *msg)
1153{
1154	const struct vmbus_chanmsg_chrescind *note;
1155	struct hv_vmbus_channel *chan;
1156
1157	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1158	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1159		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1160		    note->chm_chanid);
1161		return;
1162	}
1163
1164	if (bootverbose) {
1165		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1166		    note->chm_chanid);
1167	}
1168
1169	chan = sc->vmbus_chmap[note->chm_chanid];
1170	if (chan == NULL)
1171		return;
1172	sc->vmbus_chmap[note->chm_chanid] = NULL;
1173
1174	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1175}
1176
1177static void
1178vmbus_chan_detach_task(void *xchan, int pending __unused)
1179{
1180	struct hv_vmbus_channel *chan = xchan;
1181
1182	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1183		/* Only primary channel owns the device */
1184		vmbus_delete_child(chan);
1185		/* NOTE: DO NOT free primary channel for now */
1186	} else {
1187		struct vmbus_softc *sc = chan->vmbus_sc;
1188		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1189		struct vmbus_chanmsg_chfree *req;
1190		struct vmbus_msghc *mh;
1191		int error;
1192
1193		mh = vmbus_msghc_get(sc, sizeof(*req));
1194		if (mh == NULL) {
1195			device_printf(sc->vmbus_dev,
1196			    "can not get msg hypercall for chfree(chan%u)\n",
1197			    chan->ch_id);
1198			goto remove;
1199		}
1200
1201		req = vmbus_msghc_dataptr(mh);
1202		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1203		req->chm_chanid = chan->ch_id;
1204
1205		error = vmbus_msghc_exec_noresult(mh);
1206		vmbus_msghc_put(sc, mh);
1207
1208		if (error) {
1209			device_printf(sc->vmbus_dev,
1210			    "chfree(chan%u) failed: %d",
1211			    chan->ch_id, error);
1212			/* NOTE: Move on! */
1213		} else {
1214			if (bootverbose) {
1215				device_printf(sc->vmbus_dev, "chan%u freed\n",
1216				    chan->ch_id);
1217			}
1218		}
1219remove:
1220		mtx_lock(&pri_chan->ch_subchan_lock);
1221		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1222		KASSERT(pri_chan->ch_subchan_cnt > 0,
1223		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1224		pri_chan->ch_subchan_cnt--;
1225		mtx_unlock(&pri_chan->ch_subchan_lock);
1226		wakeup(pri_chan);
1227
1228		vmbus_chan_free(chan);
1229	}
1230}
1231
1232/*
1233 * Detach all devices and destroy the corresponding primary channels.
1234 */
1235void
1236vmbus_chan_destroy_all(struct vmbus_softc *sc)
1237{
1238	struct hv_vmbus_channel *chan;
1239
1240	mtx_lock(&sc->vmbus_prichan_lock);
1241	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1242		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1243		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1244		mtx_unlock(&sc->vmbus_prichan_lock);
1245
1246		vmbus_delete_child(chan);
1247		vmbus_chan_free(chan);
1248
1249		mtx_lock(&sc->vmbus_prichan_lock);
1250	}
1251	bzero(sc->vmbus_chmap,
1252	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1253	mtx_unlock(&sc->vmbus_prichan_lock);
1254}
1255
1256/**
1257 * @brief Select the best outgoing channel
1258 *
1259 * The channel whose vcpu binding is closest to the currect vcpu will
1260 * be selected.
1261 * If no multi-channel, always select primary channel
1262 *
1263 * @param primary - primary channel
1264 */
1265struct hv_vmbus_channel *
1266vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1267{
1268	hv_vmbus_channel *new_channel = NULL;
1269	hv_vmbus_channel *outgoing_channel = primary;
1270	int old_cpu_distance = 0;
1271	int new_cpu_distance = 0;
1272	int cur_vcpu = 0;
1273	int smp_pro_id = PCPU_GET(cpuid);
1274
1275	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1276		return outgoing_channel;
1277	}
1278
1279	if (smp_pro_id >= MAXCPU) {
1280		return outgoing_channel;
1281	}
1282
1283	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1284
1285	/* XXX need lock */
1286	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1287		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1288			continue;
1289		}
1290
1291		if (new_channel->ch_vcpuid == cur_vcpu){
1292			return new_channel;
1293		}
1294
1295		old_cpu_distance = ((outgoing_channel->ch_vcpuid > cur_vcpu) ?
1296		    (outgoing_channel->ch_vcpuid - cur_vcpu) :
1297		    (cur_vcpu - outgoing_channel->ch_vcpuid));
1298
1299		new_cpu_distance = ((new_channel->ch_vcpuid > cur_vcpu) ?
1300		    (new_channel->ch_vcpuid - cur_vcpu) :
1301		    (cur_vcpu - new_channel->ch_vcpuid));
1302
1303		if (old_cpu_distance < new_cpu_distance) {
1304			continue;
1305		}
1306
1307		outgoing_channel = new_channel;
1308	}
1309
1310	return(outgoing_channel);
1311}
1312
1313struct hv_vmbus_channel **
1314vmbus_subchan_get(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1315{
1316	struct hv_vmbus_channel **ret, *chan;
1317	int i;
1318
1319	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1320	    M_WAITOK);
1321
1322	mtx_lock(&pri_chan->ch_subchan_lock);
1323
1324	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1325		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1326
1327	i = 0;
1328	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1329		/* TODO: refcnt chan */
1330		ret[i] = chan;
1331
1332		++i;
1333		if (i == subchan_cnt)
1334			break;
1335	}
1336	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1337	    pri_chan->ch_subchan_cnt, subchan_cnt));
1338
1339	mtx_unlock(&pri_chan->ch_subchan_lock);
1340
1341	return ret;
1342}
1343
1344void
1345vmbus_subchan_rel(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1346{
1347
1348	free(subchan, M_TEMP);
1349}
1350
1351void
1352vmbus_subchan_drain(struct hv_vmbus_channel *pri_chan)
1353{
1354	mtx_lock(&pri_chan->ch_subchan_lock);
1355	while (pri_chan->ch_subchan_cnt > 0)
1356		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1357	mtx_unlock(&pri_chan->ch_subchan_lock);
1358}
1359
1360void
1361vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1362{
1363	vmbus_chanmsg_proc_t msg_proc;
1364	uint32_t msg_type;
1365
1366	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1367	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1368	    ("invalid message type %u", msg_type));
1369
1370	msg_proc = vmbus_chan_msgprocs[msg_type];
1371	if (msg_proc != NULL)
1372		msg_proc(sc, msg);
1373}
1374