vmbus_chan.c revision 302871
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302871 2016-07-15 05:40:34Z sephe $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40
41#include <machine/atomic.h>
42#include <machine/bus.h>
43
44#include <vm/vm.h>
45#include <vm/vm_param.h>
46#include <vm/pmap.h>
47
48#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
49#include <dev/hyperv/vmbus/hyperv_var.h>
50#include <dev/hyperv/vmbus/vmbus_reg.h>
51#include <dev/hyperv/vmbus/vmbus_var.h>
52
53static void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
54static void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
55		    const struct hv_vmbus_channel *);
56
57static void	vmbus_chan_task(void *, int);
58static void	vmbus_chan_task_nobatch(void *, int);
59static void	vmbus_chan_detach_task(void *, int);
60
61static void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
62		    const struct vmbus_message *);
63static void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
64		    const struct vmbus_message *);
65
66/*
67 * Vmbus channel message processing.
68 */
69static const vmbus_chanmsg_proc_t
70vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
71	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
72	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
73
74	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
75	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
76	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
77};
78
79/**
80 *  @brief Trigger an event notification on the specified channel
81 */
82static void
83vmbus_chan_send_event(hv_vmbus_channel *channel)
84{
85	struct vmbus_softc *sc = channel->vmbus_sc;
86	uint32_t chanid = channel->ch_id;
87
88	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
89	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
90
91	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
92		atomic_set_int(
93		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
94		channel->ch_montrig_mask);
95	} else {
96		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
97	}
98}
99
100static int
101vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
102{
103	struct hv_vmbus_channel *chan = arg1;
104	int alloc = 0;
105
106	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
107		alloc = 1;
108	return sysctl_handle_int(oidp, &alloc, 0, req);
109}
110
111static void
112vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
113{
114	device_t dev;
115	struct sysctl_oid *devch_sysctl;
116	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
117	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
118	struct sysctl_ctx_list *ctx;
119	uint32_t ch_id;
120	uint16_t sub_ch_id;
121	char name[16];
122
123	hv_vmbus_channel* primary_ch = channel->ch_prichan;
124
125	if (primary_ch == NULL) {
126		dev = channel->ch_dev;
127		ch_id = channel->ch_id;
128	} else {
129		dev = primary_ch->ch_dev;
130		ch_id = primary_ch->ch_id;
131		sub_ch_id = channel->ch_subidx;
132	}
133	ctx = &channel->ch_sysctl_ctx;
134	sysctl_ctx_init(ctx);
135	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
136	devch_sysctl = SYSCTL_ADD_NODE(ctx,
137		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
138		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
139	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
140	snprintf(name, sizeof(name), "%d", ch_id);
141	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
142	    	    SYSCTL_CHILDREN(devch_sysctl),
143	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
144
145	if (primary_ch != NULL) {
146		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
147			SYSCTL_CHILDREN(devch_id_sysctl),
148			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
149		snprintf(name, sizeof(name), "%d", sub_ch_id);
150		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
151			SYSCTL_CHILDREN(devch_sub_sysctl),
152			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
153
154		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
155		    OID_AUTO, "chanid", CTLFLAG_RD,
156		    &channel->ch_id, 0, "channel id");
157	}
158	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
159	    "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
160	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
161	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
162	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
163	    "is monitor allocated to this channel");
164
165	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
166                    SYSCTL_CHILDREN(devch_id_sysctl),
167                    OID_AUTO,
168		    "in",
169		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
170	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
171                    SYSCTL_CHILDREN(devch_id_sysctl),
172                    OID_AUTO,
173		    "out",
174		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
175	hv_ring_buffer_stat(ctx,
176		SYSCTL_CHILDREN(devch_id_in_sysctl),
177		&(channel->inbound),
178		"inbound ring buffer stats");
179	hv_ring_buffer_stat(ctx,
180		SYSCTL_CHILDREN(devch_id_out_sysctl),
181		&(channel->outbound),
182		"outbound ring buffer stats");
183}
184
185/**
186 * @brief Open the specified channel
187 */
188int
189hv_vmbus_channel_open(
190	hv_vmbus_channel*		new_channel,
191	uint32_t			send_ring_buffer_size,
192	uint32_t			recv_ring_buffer_size,
193	void*				user_data,
194	uint32_t			user_data_len,
195	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
196	void* 				context)
197{
198	struct vmbus_softc *sc = new_channel->vmbus_sc;
199	const struct vmbus_chanmsg_chopen_resp *resp;
200	const struct vmbus_message *msg;
201	struct vmbus_chanmsg_chopen *req;
202	struct vmbus_msghc *mh;
203	uint32_t status;
204	int ret = 0;
205	void *in, *out;
206
207	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
208		device_printf(sc->vmbus_dev,
209		    "invalid udata len %u for chan%u\n",
210		    user_data_len, new_channel->ch_id);
211		return EINVAL;
212	}
213
214	if (atomic_testandset_int(&new_channel->ch_stflags,
215	    VMBUS_CHAN_ST_OPENED_SHIFT))
216		panic("double-open chan%u", new_channel->ch_id);
217
218	new_channel->on_channel_callback = pfn_on_channel_callback;
219	new_channel->channel_callback_context = context;
220
221	vmbus_chan_update_evtflagcnt(sc, new_channel);
222
223	new_channel->rxq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
224	    new_channel->target_cpu);
225	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
226		TASK_INIT(&new_channel->channel_task, 0,
227		    vmbus_chan_task, new_channel);
228	} else {
229		TASK_INIT(&new_channel->channel_task, 0,
230		    vmbus_chan_task_nobatch, new_channel);
231	}
232
233	/* Allocate the ring buffer */
234	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
235	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
236	KASSERT(out != NULL,
237	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
238	if (out == NULL) {
239		ret = ENOMEM;
240		goto failed;
241	}
242
243	in = ((uint8_t *) out + send_ring_buffer_size);
244
245	new_channel->ring_buffer_pages = out;
246	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
247	    recv_ring_buffer_size) >> PAGE_SHIFT;
248	new_channel->ring_buffer_size = send_ring_buffer_size +
249	    recv_ring_buffer_size;
250
251	hv_vmbus_ring_buffer_init(
252		&new_channel->outbound,
253		out,
254		send_ring_buffer_size);
255
256	hv_vmbus_ring_buffer_init(
257		&new_channel->inbound,
258		in,
259		recv_ring_buffer_size);
260
261	/* Create sysctl tree for this channel */
262	vmbus_channel_sysctl_create(new_channel);
263
264	/**
265	 * Establish the gpadl for the ring buffer
266	 */
267	new_channel->ring_buffer_gpadl_handle = 0;
268
269	ret = hv_vmbus_channel_establish_gpadl(new_channel,
270		new_channel->outbound.ring_buffer,
271		send_ring_buffer_size + recv_ring_buffer_size,
272		&new_channel->ring_buffer_gpadl_handle);
273
274	/*
275	 * Open channel w/ the bufring GPADL on the target CPU.
276	 */
277	mh = vmbus_msghc_get(sc, sizeof(*req));
278	if (mh == NULL) {
279		device_printf(sc->vmbus_dev,
280		    "can not get msg hypercall for chopen(chan%u)\n",
281		    new_channel->ch_id);
282		ret = ENXIO;
283		goto failed;
284	}
285
286	req = vmbus_msghc_dataptr(mh);
287	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
288	req->chm_chanid = new_channel->ch_id;
289	req->chm_openid = new_channel->ch_id;
290	req->chm_gpadl = new_channel->ring_buffer_gpadl_handle;
291	req->chm_vcpuid = new_channel->target_vcpu;
292	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
293	if (user_data_len)
294		memcpy(req->chm_udata, user_data, user_data_len);
295
296	ret = vmbus_msghc_exec(sc, mh);
297	if (ret != 0) {
298		device_printf(sc->vmbus_dev,
299		    "chopen(chan%u) msg hypercall exec failed: %d\n",
300		    new_channel->ch_id, ret);
301		vmbus_msghc_put(sc, mh);
302		goto failed;
303	}
304
305	msg = vmbus_msghc_wait_result(sc, mh);
306	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
307	status = resp->chm_status;
308
309	vmbus_msghc_put(sc, mh);
310
311	if (status == 0) {
312		if (bootverbose) {
313			device_printf(sc->vmbus_dev, "chan%u opened\n",
314			    new_channel->ch_id);
315		}
316		return 0;
317	}
318
319	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
320	    new_channel->ch_id);
321	ret = ENXIO;
322
323failed:
324	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
325	return ret;
326}
327
328/**
329 * @brief Establish a GPADL for the specified buffer
330 */
331int
332hv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
333    void *contig_buffer, uint32_t size, uint32_t *gpadl)
334{
335	return vmbus_chan_gpadl_connect(channel,
336	    hv_get_phys_addr(contig_buffer), size, gpadl);
337}
338
339int
340vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
341    int size, uint32_t *gpadl0)
342{
343	struct vmbus_softc *sc = chan->vmbus_sc;
344	struct vmbus_msghc *mh;
345	struct vmbus_chanmsg_gpadl_conn *req;
346	const struct vmbus_message *msg;
347	size_t reqsz;
348	uint32_t gpadl, status;
349	int page_count, range_len, i, cnt, error;
350	uint64_t page_id;
351
352	/*
353	 * Preliminary checks.
354	 */
355
356	KASSERT((size & PAGE_MASK) == 0,
357	    ("invalid GPA size %d, not multiple page size", size));
358	page_count = size >> PAGE_SHIFT;
359
360	KASSERT((paddr & PAGE_MASK) == 0,
361	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
362	page_id = paddr >> PAGE_SHIFT;
363
364	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
365	/*
366	 * We don't support multiple GPA ranges.
367	 */
368	if (range_len > UINT16_MAX) {
369		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
370		    page_count);
371		return EOPNOTSUPP;
372	}
373
374	/*
375	 * Allocate GPADL id.
376	 */
377	gpadl = vmbus_gpadl_alloc(sc);
378	*gpadl0 = gpadl;
379
380	/*
381	 * Connect this GPADL to the target channel.
382	 *
383	 * NOTE:
384	 * Since each message can only hold small set of page
385	 * addresses, several messages may be required to
386	 * complete the connection.
387	 */
388	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
389		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
390	else
391		cnt = page_count;
392	page_count -= cnt;
393
394	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
395	    chm_range.gpa_page[cnt]);
396	mh = vmbus_msghc_get(sc, reqsz);
397	if (mh == NULL) {
398		device_printf(sc->vmbus_dev,
399		    "can not get msg hypercall for gpadl->chan%u\n",
400		    chan->ch_id);
401		return EIO;
402	}
403
404	req = vmbus_msghc_dataptr(mh);
405	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
406	req->chm_chanid = chan->ch_id;
407	req->chm_gpadl = gpadl;
408	req->chm_range_len = range_len;
409	req->chm_range_cnt = 1;
410	req->chm_range.gpa_len = size;
411	req->chm_range.gpa_ofs = 0;
412	for (i = 0; i < cnt; ++i)
413		req->chm_range.gpa_page[i] = page_id++;
414
415	error = vmbus_msghc_exec(sc, mh);
416	if (error) {
417		device_printf(sc->vmbus_dev,
418		    "gpadl->chan%u msg hypercall exec failed: %d\n",
419		    chan->ch_id, error);
420		vmbus_msghc_put(sc, mh);
421		return error;
422	}
423
424	while (page_count > 0) {
425		struct vmbus_chanmsg_gpadl_subconn *subreq;
426
427		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
428			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
429		else
430			cnt = page_count;
431		page_count -= cnt;
432
433		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
434		    chm_gpa_page[cnt]);
435		vmbus_msghc_reset(mh, reqsz);
436
437		subreq = vmbus_msghc_dataptr(mh);
438		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
439		subreq->chm_gpadl = gpadl;
440		for (i = 0; i < cnt; ++i)
441			subreq->chm_gpa_page[i] = page_id++;
442
443		vmbus_msghc_exec_noresult(mh);
444	}
445	KASSERT(page_count == 0, ("invalid page count %d", page_count));
446
447	msg = vmbus_msghc_wait_result(sc, mh);
448	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
449	    msg->msg_data)->chm_status;
450
451	vmbus_msghc_put(sc, mh);
452
453	if (status != 0) {
454		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
455		    "status %u\n", chan->ch_id, status);
456		return EIO;
457	} else {
458		if (bootverbose) {
459			device_printf(sc->vmbus_dev, "gpadl->chan%u "
460			    "succeeded\n", chan->ch_id);
461		}
462	}
463	return 0;
464}
465
466/*
467 * Disconnect the GPA from the target channel
468 */
469int
470hv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
471{
472	struct vmbus_softc *sc = chan->vmbus_sc;
473	struct vmbus_msghc *mh;
474	struct vmbus_chanmsg_gpadl_disconn *req;
475	int error;
476
477	mh = vmbus_msghc_get(sc, sizeof(*req));
478	if (mh == NULL) {
479		device_printf(sc->vmbus_dev,
480		    "can not get msg hypercall for gpa x->chan%u\n",
481		    chan->ch_id);
482		return EBUSY;
483	}
484
485	req = vmbus_msghc_dataptr(mh);
486	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
487	req->chm_chanid = chan->ch_id;
488	req->chm_gpadl = gpadl;
489
490	error = vmbus_msghc_exec(sc, mh);
491	if (error) {
492		device_printf(sc->vmbus_dev,
493		    "gpa x->chan%u msg hypercall exec failed: %d\n",
494		    chan->ch_id, error);
495		vmbus_msghc_put(sc, mh);
496		return error;
497	}
498
499	vmbus_msghc_wait_result(sc, mh);
500	/* Discard result; no useful information */
501	vmbus_msghc_put(sc, mh);
502
503	return 0;
504}
505
506static void
507hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
508{
509	struct vmbus_softc *sc = channel->vmbus_sc;
510	struct vmbus_msghc *mh;
511	struct vmbus_chanmsg_chclose *req;
512	struct taskqueue *rxq = channel->rxq;
513	int error;
514
515	/* TODO: stringent check */
516	atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
517
518	sysctl_ctx_free(&channel->ch_sysctl_ctx);
519
520	/*
521	 * set rxq to NULL to avoid more requests be scheduled
522	 */
523	channel->rxq = NULL;
524	taskqueue_drain(rxq, &channel->channel_task);
525	channel->on_channel_callback = NULL;
526
527	/**
528	 * Send a closing message
529	 */
530
531	mh = vmbus_msghc_get(sc, sizeof(*req));
532	if (mh == NULL) {
533		device_printf(sc->vmbus_dev,
534		    "can not get msg hypercall for chclose(chan%u)\n",
535		    channel->ch_id);
536		return;
537	}
538
539	req = vmbus_msghc_dataptr(mh);
540	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
541	req->chm_chanid = channel->ch_id;
542
543	error = vmbus_msghc_exec_noresult(mh);
544	vmbus_msghc_put(sc, mh);
545
546	if (error) {
547		device_printf(sc->vmbus_dev,
548		    "chclose(chan%u) msg hypercall exec failed: %d\n",
549		    channel->ch_id, error);
550		return;
551	} else if (bootverbose) {
552		device_printf(sc->vmbus_dev, "close chan%u\n",
553		    channel->ch_id);
554	}
555
556	/* Tear down the gpadl for the channel's ring buffer */
557	if (channel->ring_buffer_gpadl_handle) {
558		hv_vmbus_channel_teardown_gpdal(channel,
559			channel->ring_buffer_gpadl_handle);
560	}
561
562	/* TODO: Send a msg to release the childRelId */
563
564	/* cleanup the ring buffers for this channel */
565	hv_ring_buffer_cleanup(&channel->outbound);
566	hv_ring_buffer_cleanup(&channel->inbound);
567
568	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
569	    M_DEVBUF);
570}
571
572/*
573 * Caller should make sure that all sub-channels have
574 * been added to 'chan' and all to-be-closed channels
575 * are not being opened.
576 */
577void
578hv_vmbus_channel_close(struct hv_vmbus_channel *chan)
579{
580	int subchan_cnt;
581
582	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
583		/*
584		 * Sub-channel is closed when its primary channel
585		 * is closed; done.
586		 */
587		return;
588	}
589
590	/*
591	 * Close all sub-channels, if any.
592	 */
593	subchan_cnt = chan->ch_subchan_cnt;
594	if (subchan_cnt > 0) {
595		struct hv_vmbus_channel **subchan;
596		int i;
597
598		subchan = vmbus_get_subchan(chan, subchan_cnt);
599		for (i = 0; i < subchan_cnt; ++i)
600			hv_vmbus_channel_close_internal(subchan[i]);
601		vmbus_rel_subchan(subchan, subchan_cnt);
602	}
603
604	/* Then close the primary channel. */
605	hv_vmbus_channel_close_internal(chan);
606}
607
608/**
609 * @brief Send the specified buffer on the given channel
610 */
611int
612hv_vmbus_channel_send_packet(
613	hv_vmbus_channel*	channel,
614	void*			buffer,
615	uint32_t		buffer_len,
616	uint64_t		request_id,
617	hv_vmbus_packet_type	type,
618	uint32_t		flags)
619{
620	int			ret = 0;
621	hv_vm_packet_descriptor	desc;
622	uint32_t		packet_len;
623	uint64_t		aligned_data;
624	uint32_t		packet_len_aligned;
625	boolean_t		need_sig;
626	struct iovec		iov[3];
627
628	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
629	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
630	aligned_data = 0;
631
632	/* Setup the descriptor */
633	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
634	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
635			    /* in 8-bytes granularity */
636	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
637	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
638	desc.transaction_id = request_id;
639
640	iov[0].iov_base = &desc;
641	iov[0].iov_len = sizeof(hv_vm_packet_descriptor);
642
643	iov[1].iov_base = buffer;
644	iov[1].iov_len = buffer_len;
645
646	iov[2].iov_base = &aligned_data;
647	iov[2].iov_len = packet_len_aligned - packet_len;
648
649	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
650
651	/* TODO: We should determine if this is optional */
652	if (ret == 0 && need_sig)
653		vmbus_chan_send_event(channel);
654
655	return (ret);
656}
657
658/**
659 * @brief Send a range of single-page buffer packets using
660 * a GPADL Direct packet type
661 */
662int
663hv_vmbus_channel_send_packet_pagebuffer(
664	hv_vmbus_channel*	channel,
665	hv_vmbus_page_buffer	page_buffers[],
666	uint32_t		page_count,
667	void*			buffer,
668	uint32_t		buffer_len,
669	uint64_t		request_id)
670{
671
672	int					ret = 0;
673	boolean_t				need_sig;
674	uint32_t				packet_len;
675	uint32_t				page_buflen;
676	uint32_t				packetLen_aligned;
677	struct iovec				iov[4];
678	hv_vmbus_channel_packet_page_buffer	desc;
679	uint32_t				descSize;
680	uint64_t				alignedData = 0;
681
682	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
683		return (EINVAL);
684
685	/*
686	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
687	 *  is the largest size we support
688	 */
689	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
690	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
691	packet_len = descSize + page_buflen + buffer_len;
692	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
693
694	/* Setup the descriptor */
695	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
696	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
697	/* in 8-bytes granularity */
698	desc.data_offset8 = (descSize + page_buflen) >> 3;
699	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
700	desc.transaction_id = request_id;
701	desc.range_count = page_count;
702
703	iov[0].iov_base = &desc;
704	iov[0].iov_len = descSize;
705
706	iov[1].iov_base = page_buffers;
707	iov[1].iov_len = page_buflen;
708
709	iov[2].iov_base = buffer;
710	iov[2].iov_len = buffer_len;
711
712	iov[3].iov_base = &alignedData;
713	iov[3].iov_len = packetLen_aligned - packet_len;
714
715	ret = hv_ring_buffer_write(&channel->outbound, iov, 4, &need_sig);
716
717	/* TODO: We should determine if this is optional */
718	if (ret == 0 && need_sig)
719		vmbus_chan_send_event(channel);
720
721	return (ret);
722}
723
724/**
725 * @brief Send a multi-page buffer packet using a GPADL Direct packet type
726 */
727int
728hv_vmbus_channel_send_packet_multipagebuffer(
729	hv_vmbus_channel*		channel,
730	hv_vmbus_multipage_buffer*	multi_page_buffer,
731	void*				buffer,
732	uint32_t			buffer_len,
733	uint64_t			request_id)
734{
735
736	int			ret = 0;
737	uint32_t		desc_size;
738	boolean_t		need_sig;
739	uint32_t		packet_len;
740	uint32_t		packet_len_aligned;
741	uint32_t		pfn_count;
742	uint64_t		aligned_data = 0;
743	struct iovec		iov[3];
744	hv_vmbus_channel_packet_multipage_buffer desc;
745
746	pfn_count =
747	    HV_NUM_PAGES_SPANNED(
748		    multi_page_buffer->offset,
749		    multi_page_buffer->length);
750
751	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
752	    return (EINVAL);
753	/*
754	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
755	 * is the largest size we support
756	 */
757	desc_size =
758	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
759		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
760			sizeof(uint64_t));
761	packet_len = desc_size + buffer_len;
762	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
763
764	/*
765	 * Setup the descriptor
766	 */
767	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
768	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
769	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
770	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
771	desc.transaction_id = request_id;
772	desc.range_count = 1;
773
774	desc.range.length = multi_page_buffer->length;
775	desc.range.offset = multi_page_buffer->offset;
776
777	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
778		pfn_count * sizeof(uint64_t));
779
780	iov[0].iov_base = &desc;
781	iov[0].iov_len = desc_size;
782
783	iov[1].iov_base = buffer;
784	iov[1].iov_len = buffer_len;
785
786	iov[2].iov_base = &aligned_data;
787	iov[2].iov_len = packet_len_aligned - packet_len;
788
789	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
790
791	/* TODO: We should determine if this is optional */
792	if (ret == 0 && need_sig)
793		vmbus_chan_send_event(channel);
794
795	return (ret);
796}
797
798/**
799 * @brief Retrieve the user packet on the specified channel
800 */
801int
802hv_vmbus_channel_recv_packet(
803	hv_vmbus_channel*	channel,
804	void*			Buffer,
805	uint32_t		buffer_len,
806	uint32_t*		buffer_actual_len,
807	uint64_t*		request_id)
808{
809	int			ret;
810	uint32_t		user_len;
811	uint32_t		packet_len;
812	hv_vm_packet_descriptor	desc;
813
814	*buffer_actual_len = 0;
815	*request_id = 0;
816
817	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
818		sizeof(hv_vm_packet_descriptor));
819	if (ret != 0)
820		return (0);
821
822	packet_len = desc.length8 << 3;
823	user_len = packet_len - (desc.data_offset8 << 3);
824
825	*buffer_actual_len = user_len;
826
827	if (user_len > buffer_len)
828		return (EINVAL);
829
830	*request_id = desc.transaction_id;
831
832	/* Copy over the packet to the user buffer */
833	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
834		(desc.data_offset8 << 3));
835
836	return (0);
837}
838
839/**
840 * @brief Retrieve the raw packet on the specified channel
841 */
842int
843hv_vmbus_channel_recv_packet_raw(
844	hv_vmbus_channel*	channel,
845	void*			buffer,
846	uint32_t		buffer_len,
847	uint32_t*		buffer_actual_len,
848	uint64_t*		request_id)
849{
850	int		ret;
851	uint32_t	packetLen;
852	hv_vm_packet_descriptor	desc;
853
854	*buffer_actual_len = 0;
855	*request_id = 0;
856
857	ret = hv_ring_buffer_peek(
858		&channel->inbound, &desc,
859		sizeof(hv_vm_packet_descriptor));
860
861	if (ret != 0)
862	    return (0);
863
864	packetLen = desc.length8 << 3;
865	*buffer_actual_len = packetLen;
866
867	if (packetLen > buffer_len)
868	    return (ENOBUFS);
869
870	*request_id = desc.transaction_id;
871
872	/* Copy over the entire packet to the user buffer */
873	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
874
875	return (0);
876}
877
878static void
879vmbus_chan_task(void *xchan, int pending __unused)
880{
881	struct hv_vmbus_channel *chan = xchan;
882	void (*callback)(void *);
883	void *arg;
884
885	arg = chan->channel_callback_context;
886	callback = chan->on_channel_callback;
887
888	/*
889	 * Optimize host to guest signaling by ensuring:
890	 * 1. While reading the channel, we disable interrupts from
891	 *    host.
892	 * 2. Ensure that we process all posted messages from the host
893	 *    before returning from this callback.
894	 * 3. Once we return, enable signaling from the host. Once this
895	 *    state is set we check to see if additional packets are
896	 *    available to read. In this case we repeat the process.
897	 *
898	 * NOTE: Interrupt has been disabled in the ISR.
899	 */
900	for (;;) {
901		uint32_t left;
902
903		callback(arg);
904
905		left = hv_ring_buffer_read_end(&chan->inbound);
906		if (left == 0) {
907			/* No more data in RX bufring; done */
908			break;
909		}
910		hv_ring_buffer_read_begin(&chan->inbound);
911	}
912}
913
914static void
915vmbus_chan_task_nobatch(void *xchan, int pending __unused)
916{
917	struct hv_vmbus_channel *chan = xchan;
918
919	chan->on_channel_callback(chan->channel_callback_context);
920}
921
922static __inline void
923vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
924    int flag_cnt)
925{
926	int f;
927
928	for (f = 0; f < flag_cnt; ++f) {
929		uint32_t chid_base;
930		u_long flags;
931		int chid_ofs;
932
933		if (event_flags[f] == 0)
934			continue;
935
936		flags = atomic_swap_long(&event_flags[f], 0);
937		chid_base = f << VMBUS_EVTFLAG_SHIFT;
938
939		while ((chid_ofs = ffsl(flags)) != 0) {
940			struct hv_vmbus_channel *channel;
941
942			--chid_ofs; /* NOTE: ffsl is 1-based */
943			flags &= ~(1UL << chid_ofs);
944
945			channel = sc->vmbus_chmap[chid_base + chid_ofs];
946
947			/* if channel is closed or closing */
948			if (channel == NULL || channel->rxq == NULL)
949				continue;
950
951			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
952				hv_ring_buffer_read_begin(&channel->inbound);
953			taskqueue_enqueue(channel->rxq, &channel->channel_task);
954		}
955	}
956}
957
958void
959vmbus_event_proc(struct vmbus_softc *sc, int cpu)
960{
961	struct vmbus_evtflags *eventf;
962
963	/*
964	 * On Host with Win8 or above, the event page can be checked directly
965	 * to get the id of the channel that has the pending interrupt.
966	 */
967	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
968	vmbus_event_flags_proc(sc, eventf->evt_flags,
969	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
970}
971
972void
973vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
974{
975	struct vmbus_evtflags *eventf;
976
977	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
978	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
979		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
980		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
981	}
982}
983
984static void
985vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
986    const struct hv_vmbus_channel *chan)
987{
988	volatile int *flag_cnt_ptr;
989	int flag_cnt;
990
991	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
992	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->target_cpu);
993
994	for (;;) {
995		int old_flag_cnt;
996
997		old_flag_cnt = *flag_cnt_ptr;
998		if (old_flag_cnt >= flag_cnt)
999			break;
1000		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
1001			if (bootverbose) {
1002				device_printf(sc->vmbus_dev,
1003				    "channel%u update cpu%d flag_cnt to %d\n",
1004				    chan->ch_id,
1005				    chan->target_cpu, flag_cnt);
1006			}
1007			break;
1008		}
1009	}
1010}
1011
1012static struct hv_vmbus_channel *
1013vmbus_chan_alloc(struct vmbus_softc *sc)
1014{
1015	struct hv_vmbus_channel *chan;
1016
1017	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
1018
1019	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
1020	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
1021	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
1022	if (chan->ch_monprm == NULL) {
1023		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
1024		free(chan, M_DEVBUF);
1025		return NULL;
1026	}
1027
1028	chan->vmbus_sc = sc;
1029	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
1030	TAILQ_INIT(&chan->ch_subchans);
1031	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
1032
1033	return chan;
1034}
1035
1036static void
1037vmbus_chan_free(struct hv_vmbus_channel *chan)
1038{
1039	/* TODO: assert sub-channel list is empty */
1040	/* TODO: asset no longer on the primary channel's sub-channel list */
1041	/* TODO: asset no longer on the vmbus channel list */
1042	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
1043	mtx_destroy(&chan->ch_subchan_lock);
1044	free(chan, M_DEVBUF);
1045}
1046
1047static int
1048vmbus_chan_add(struct hv_vmbus_channel *newchan)
1049{
1050	struct vmbus_softc *sc = newchan->vmbus_sc;
1051	struct hv_vmbus_channel *prichan;
1052
1053	if (newchan->ch_id == 0) {
1054		/*
1055		 * XXX
1056		 * Chan0 will neither be processed nor should be offered;
1057		 * skip it.
1058		 */
1059		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
1060		return EINVAL;
1061	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
1062		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
1063		    newchan->ch_id);
1064		return EINVAL;
1065	}
1066	sc->vmbus_chmap[newchan->ch_id] = newchan;
1067
1068	if (bootverbose) {
1069		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
1070		    newchan->ch_id, newchan->ch_subidx);
1071	}
1072
1073	mtx_lock(&sc->vmbus_prichan_lock);
1074	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
1075		/*
1076		 * Sub-channel will have the same type GUID and instance
1077		 * GUID as its primary channel.
1078		 */
1079		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1080		    sizeof(struct hyperv_guid)) == 0 &&
1081		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1082		    sizeof(struct hyperv_guid)) == 0)
1083			break;
1084	}
1085	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1086		if (prichan == NULL) {
1087			/* Install the new primary channel */
1088			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1089			    ch_prilink);
1090			mtx_unlock(&sc->vmbus_prichan_lock);
1091			return 0;
1092		} else {
1093			mtx_unlock(&sc->vmbus_prichan_lock);
1094			device_printf(sc->vmbus_dev, "duplicated primary "
1095			    "chan%u\n", newchan->ch_id);
1096			return EINVAL;
1097		}
1098	} else { /* Sub-channel */
1099		if (prichan == NULL) {
1100			mtx_unlock(&sc->vmbus_prichan_lock);
1101			device_printf(sc->vmbus_dev, "no primary chan for "
1102			    "chan%u\n", newchan->ch_id);
1103			return EINVAL;
1104		}
1105		/*
1106		 * Found the primary channel for this sub-channel and
1107		 * move on.
1108		 *
1109		 * XXX refcnt prichan
1110		 */
1111	}
1112	mtx_unlock(&sc->vmbus_prichan_lock);
1113
1114	/*
1115	 * This is a sub-channel; link it with the primary channel.
1116	 */
1117	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1118	    ("new channel is not sub-channel"));
1119	KASSERT(prichan != NULL, ("no primary channel"));
1120
1121	newchan->ch_prichan = prichan;
1122	newchan->ch_dev = prichan->ch_dev;
1123
1124	mtx_lock(&prichan->ch_subchan_lock);
1125	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1126	/*
1127	 * Bump up sub-channel count and notify anyone that is
1128	 * interested in this sub-channel, after this sub-channel
1129	 * is setup.
1130	 */
1131	prichan->ch_subchan_cnt++;
1132	mtx_unlock(&prichan->ch_subchan_lock);
1133	wakeup(prichan);
1134
1135	return 0;
1136}
1137
1138void
1139vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1140{
1141	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1142
1143	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1144	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1145		/* Only cpu0 is supported */
1146		cpu = 0;
1147	}
1148
1149	chan->target_cpu = cpu;
1150	chan->target_vcpu = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1151
1152	if (bootverbose) {
1153		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1154		    chan->ch_id,
1155		    chan->target_cpu, chan->target_vcpu);
1156	}
1157}
1158
1159void
1160vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
1161{
1162	static uint32_t vmbus_chan_nextcpu;
1163	int cpu;
1164
1165	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1166	vmbus_channel_cpu_set(chan, cpu);
1167}
1168
1169static void
1170vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1171{
1172	/*
1173	 * By default, pin the channel to cpu0.  Devices having
1174	 * special channel-cpu mapping requirement should call
1175	 * vmbus_channel_cpu_{set,rr}().
1176	 */
1177	vmbus_channel_cpu_set(chan, 0);
1178}
1179
1180static void
1181vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1182    const struct vmbus_message *msg)
1183{
1184	const struct vmbus_chanmsg_choffer *offer;
1185	struct hv_vmbus_channel *chan;
1186	int error;
1187
1188	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1189
1190	chan = vmbus_chan_alloc(sc);
1191	if (chan == NULL) {
1192		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1193		    offer->chm_chanid);
1194		return;
1195	}
1196
1197	chan->ch_id = offer->chm_chanid;
1198	chan->ch_subidx = offer->chm_subidx;
1199	chan->ch_guid_type = offer->chm_chtype;
1200	chan->ch_guid_inst = offer->chm_chinst;
1201
1202	/* Batch reading is on by default */
1203	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1204
1205	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1206	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1207		chan->ch_monprm->mp_connid = offer->chm_connid;
1208
1209	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1210		/*
1211		 * Setup MNF stuffs.
1212		 */
1213		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1214		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1215		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1216			panic("invalid monitor trigger %u", offer->chm_montrig);
1217		chan->ch_montrig_mask =
1218		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1219	}
1220
1221	/* Select default cpu for this channel. */
1222	vmbus_chan_cpu_default(chan);
1223
1224	error = vmbus_chan_add(chan);
1225	if (error) {
1226		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1227		    chan->ch_id, error);
1228		vmbus_chan_free(chan);
1229		return;
1230	}
1231
1232	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1233		/*
1234		 * Add device for this primary channel.
1235		 *
1236		 * NOTE:
1237		 * Error is ignored here; don't have much to do if error
1238		 * really happens.
1239		 */
1240		vmbus_add_child(chan);
1241	}
1242}
1243
1244/*
1245 * XXX pretty broken; need rework.
1246 */
1247static void
1248vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1249    const struct vmbus_message *msg)
1250{
1251	const struct vmbus_chanmsg_chrescind *note;
1252	struct hv_vmbus_channel *chan;
1253
1254	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1255	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1256		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1257		    note->chm_chanid);
1258		return;
1259	}
1260
1261	if (bootverbose) {
1262		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1263		    note->chm_chanid);
1264	}
1265
1266	chan = sc->vmbus_chmap[note->chm_chanid];
1267	if (chan == NULL)
1268		return;
1269	sc->vmbus_chmap[note->chm_chanid] = NULL;
1270
1271	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1272}
1273
1274static void
1275vmbus_chan_detach_task(void *xchan, int pending __unused)
1276{
1277	struct hv_vmbus_channel *chan = xchan;
1278
1279	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1280		/* Only primary channel owns the device */
1281		vmbus_delete_child(chan);
1282		/* NOTE: DO NOT free primary channel for now */
1283	} else {
1284		struct vmbus_softc *sc = chan->vmbus_sc;
1285		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1286		struct vmbus_chanmsg_chfree *req;
1287		struct vmbus_msghc *mh;
1288		int error;
1289
1290		mh = vmbus_msghc_get(sc, sizeof(*req));
1291		if (mh == NULL) {
1292			device_printf(sc->vmbus_dev,
1293			    "can not get msg hypercall for chfree(chan%u)\n",
1294			    chan->ch_id);
1295			goto remove;
1296		}
1297
1298		req = vmbus_msghc_dataptr(mh);
1299		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1300		req->chm_chanid = chan->ch_id;
1301
1302		error = vmbus_msghc_exec_noresult(mh);
1303		vmbus_msghc_put(sc, mh);
1304
1305		if (error) {
1306			device_printf(sc->vmbus_dev,
1307			    "chfree(chan%u) failed: %d",
1308			    chan->ch_id, error);
1309			/* NOTE: Move on! */
1310		} else {
1311			if (bootverbose) {
1312				device_printf(sc->vmbus_dev, "chan%u freed\n",
1313				    chan->ch_id);
1314			}
1315		}
1316remove:
1317		mtx_lock(&pri_chan->ch_subchan_lock);
1318		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1319		KASSERT(pri_chan->ch_subchan_cnt > 0,
1320		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1321		pri_chan->ch_subchan_cnt--;
1322		mtx_unlock(&pri_chan->ch_subchan_lock);
1323		wakeup(pri_chan);
1324
1325		vmbus_chan_free(chan);
1326	}
1327}
1328
1329/*
1330 * Detach all devices and destroy the corresponding primary channels.
1331 */
1332void
1333vmbus_chan_destroy_all(struct vmbus_softc *sc)
1334{
1335	struct hv_vmbus_channel *chan;
1336
1337	mtx_lock(&sc->vmbus_prichan_lock);
1338	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1339		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1340		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1341		mtx_unlock(&sc->vmbus_prichan_lock);
1342
1343		vmbus_delete_child(chan);
1344		vmbus_chan_free(chan);
1345
1346		mtx_lock(&sc->vmbus_prichan_lock);
1347	}
1348	bzero(sc->vmbus_chmap,
1349	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1350	mtx_unlock(&sc->vmbus_prichan_lock);
1351}
1352
1353/**
1354 * @brief Select the best outgoing channel
1355 *
1356 * The channel whose vcpu binding is closest to the currect vcpu will
1357 * be selected.
1358 * If no multi-channel, always select primary channel
1359 *
1360 * @param primary - primary channel
1361 */
1362struct hv_vmbus_channel *
1363vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1364{
1365	hv_vmbus_channel *new_channel = NULL;
1366	hv_vmbus_channel *outgoing_channel = primary;
1367	int old_cpu_distance = 0;
1368	int new_cpu_distance = 0;
1369	int cur_vcpu = 0;
1370	int smp_pro_id = PCPU_GET(cpuid);
1371
1372	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1373		return outgoing_channel;
1374	}
1375
1376	if (smp_pro_id >= MAXCPU) {
1377		return outgoing_channel;
1378	}
1379
1380	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1381
1382	/* XXX need lock */
1383	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1384		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1385			continue;
1386		}
1387
1388		if (new_channel->target_vcpu == cur_vcpu){
1389			return new_channel;
1390		}
1391
1392		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
1393		    (outgoing_channel->target_vcpu - cur_vcpu) :
1394		    (cur_vcpu - outgoing_channel->target_vcpu));
1395
1396		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
1397		    (new_channel->target_vcpu - cur_vcpu) :
1398		    (cur_vcpu - new_channel->target_vcpu));
1399
1400		if (old_cpu_distance < new_cpu_distance) {
1401			continue;
1402		}
1403
1404		outgoing_channel = new_channel;
1405	}
1406
1407	return(outgoing_channel);
1408}
1409
1410struct hv_vmbus_channel **
1411vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1412{
1413	struct hv_vmbus_channel **ret, *chan;
1414	int i;
1415
1416	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1417	    M_WAITOK);
1418
1419	mtx_lock(&pri_chan->ch_subchan_lock);
1420
1421	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1422		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1423
1424	i = 0;
1425	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1426		/* TODO: refcnt chan */
1427		ret[i] = chan;
1428
1429		++i;
1430		if (i == subchan_cnt)
1431			break;
1432	}
1433	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1434	    pri_chan->ch_subchan_cnt, subchan_cnt));
1435
1436	mtx_unlock(&pri_chan->ch_subchan_lock);
1437
1438	return ret;
1439}
1440
1441void
1442vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1443{
1444
1445	free(subchan, M_TEMP);
1446}
1447
1448void
1449vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan)
1450{
1451	mtx_lock(&pri_chan->ch_subchan_lock);
1452	while (pri_chan->ch_subchan_cnt > 0)
1453		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1454	mtx_unlock(&pri_chan->ch_subchan_lock);
1455}
1456
1457void
1458vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1459{
1460	vmbus_chanmsg_proc_t msg_proc;
1461	uint32_t msg_type;
1462
1463	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1464	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1465	    ("invalid message type %u", msg_type));
1466
1467	msg_proc = vmbus_chan_msgprocs[msg_type];
1468	if (msg_proc != NULL)
1469		msg_proc(sc, msg);
1470}
1471