vmbus_chan.c revision 302872
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 302872 2016-07-15 05:51:58Z sephe $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40
41#include <machine/atomic.h>
42#include <machine/bus.h>
43
44#include <vm/vm.h>
45#include <vm/vm_param.h>
46#include <vm/pmap.h>
47
48#include <dev/hyperv/include/hyperv_busdma.h>
49#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
50#include <dev/hyperv/vmbus/hyperv_var.h>
51#include <dev/hyperv/vmbus/vmbus_reg.h>
52#include <dev/hyperv/vmbus/vmbus_var.h>
53
54static void 	vmbus_chan_send_event(hv_vmbus_channel* channel);
55static void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
56		    const struct hv_vmbus_channel *);
57
58static void	vmbus_chan_task(void *, int);
59static void	vmbus_chan_task_nobatch(void *, int);
60static void	vmbus_chan_detach_task(void *, int);
61
62static void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
63		    const struct vmbus_message *);
64static void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
65		    const struct vmbus_message *);
66
67/*
68 * Vmbus channel message processing.
69 */
70static const vmbus_chanmsg_proc_t
71vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
72	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
73	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
74
75	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
76	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
77	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
78};
79
80/**
81 *  @brief Trigger an event notification on the specified channel
82 */
83static void
84vmbus_chan_send_event(hv_vmbus_channel *channel)
85{
86	struct vmbus_softc *sc = channel->vmbus_sc;
87	uint32_t chanid = channel->ch_id;
88
89	atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT],
90	    1UL << (chanid & VMBUS_EVTFLAG_MASK));
91
92	if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) {
93		atomic_set_int(
94		&sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending,
95		channel->ch_montrig_mask);
96	} else {
97		hypercall_signal_event(channel->ch_monprm_dma.hv_paddr);
98	}
99}
100
101static int
102vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
103{
104	struct hv_vmbus_channel *chan = arg1;
105	int alloc = 0;
106
107	if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF)
108		alloc = 1;
109	return sysctl_handle_int(oidp, &alloc, 0, req);
110}
111
112static void
113vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
114{
115	device_t dev;
116	struct sysctl_oid *devch_sysctl;
117	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
118	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
119	struct sysctl_ctx_list *ctx;
120	uint32_t ch_id;
121	uint16_t sub_ch_id;
122	char name[16];
123
124	hv_vmbus_channel* primary_ch = channel->ch_prichan;
125
126	if (primary_ch == NULL) {
127		dev = channel->ch_dev;
128		ch_id = channel->ch_id;
129	} else {
130		dev = primary_ch->ch_dev;
131		ch_id = primary_ch->ch_id;
132		sub_ch_id = channel->ch_subidx;
133	}
134	ctx = &channel->ch_sysctl_ctx;
135	sysctl_ctx_init(ctx);
136	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
137	devch_sysctl = SYSCTL_ADD_NODE(ctx,
138		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
139		    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
140	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
141	snprintf(name, sizeof(name), "%d", ch_id);
142	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
143	    	    SYSCTL_CHILDREN(devch_sysctl),
144	    	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
145
146	if (primary_ch != NULL) {
147		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
148			SYSCTL_CHILDREN(devch_id_sysctl),
149			OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
150		snprintf(name, sizeof(name), "%d", sub_ch_id);
151		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
152			SYSCTL_CHILDREN(devch_sub_sysctl),
153			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
154
155		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
156		    OID_AUTO, "chanid", CTLFLAG_RD,
157		    &channel->ch_id, 0, "channel id");
158	}
159	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
160	    "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
161	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
162	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
163	    channel, 0, vmbus_channel_sysctl_monalloc, "I",
164	    "is monitor allocated to this channel");
165
166	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
167                    SYSCTL_CHILDREN(devch_id_sysctl),
168                    OID_AUTO,
169		    "in",
170		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
171	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
172                    SYSCTL_CHILDREN(devch_id_sysctl),
173                    OID_AUTO,
174		    "out",
175		    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
176	hv_ring_buffer_stat(ctx,
177		SYSCTL_CHILDREN(devch_id_in_sysctl),
178		&(channel->inbound),
179		"inbound ring buffer stats");
180	hv_ring_buffer_stat(ctx,
181		SYSCTL_CHILDREN(devch_id_out_sysctl),
182		&(channel->outbound),
183		"outbound ring buffer stats");
184}
185
186/**
187 * @brief Open the specified channel
188 */
189int
190hv_vmbus_channel_open(
191	hv_vmbus_channel*		new_channel,
192	uint32_t			send_ring_buffer_size,
193	uint32_t			recv_ring_buffer_size,
194	void*				user_data,
195	uint32_t			user_data_len,
196	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
197	void* 				context)
198{
199	struct vmbus_softc *sc = new_channel->vmbus_sc;
200	const struct vmbus_chanmsg_chopen_resp *resp;
201	const struct vmbus_message *msg;
202	struct vmbus_chanmsg_chopen *req;
203	struct vmbus_msghc *mh;
204	uint32_t status;
205	int ret = 0;
206	uint8_t *br;
207
208	if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
209		device_printf(sc->vmbus_dev,
210		    "invalid udata len %u for chan%u\n",
211		    user_data_len, new_channel->ch_id);
212		return EINVAL;
213	}
214	KASSERT((send_ring_buffer_size & PAGE_MASK) == 0,
215	    ("send bufring size is not multiple page"));
216	KASSERT((recv_ring_buffer_size & PAGE_MASK) == 0,
217	    ("recv bufring size is not multiple page"));
218
219	if (atomic_testandset_int(&new_channel->ch_stflags,
220	    VMBUS_CHAN_ST_OPENED_SHIFT))
221		panic("double-open chan%u", new_channel->ch_id);
222
223	new_channel->on_channel_callback = pfn_on_channel_callback;
224	new_channel->channel_callback_context = context;
225
226	vmbus_chan_update_evtflagcnt(sc, new_channel);
227
228	new_channel->rxq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq,
229	    new_channel->target_cpu);
230	if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) {
231		TASK_INIT(&new_channel->channel_task, 0,
232		    vmbus_chan_task, new_channel);
233	} else {
234		TASK_INIT(&new_channel->channel_task, 0,
235		    vmbus_chan_task_nobatch, new_channel);
236	}
237
238	/*
239	 * Allocate the TX+RX bufrings.
240	 * XXX should use ch_dev dtag
241	 */
242	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
243	    PAGE_SIZE, 0, send_ring_buffer_size + recv_ring_buffer_size,
244	    &new_channel->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
245	if (br == NULL) {
246		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
247		ret = ENOMEM;
248		goto failed;
249	}
250	new_channel->ch_bufring = br;
251
252	/* TX bufring comes first */
253	hv_vmbus_ring_buffer_init(&new_channel->outbound,
254	    br, send_ring_buffer_size);
255	/* RX bufring immediately follows TX bufring */
256	hv_vmbus_ring_buffer_init(&new_channel->inbound,
257	    br + send_ring_buffer_size, recv_ring_buffer_size);
258
259	/* Create sysctl tree for this channel */
260	vmbus_channel_sysctl_create(new_channel);
261
262	/*
263	 * Connect the bufrings, both RX and TX, to this channel.
264	 */
265	ret = vmbus_chan_gpadl_connect(new_channel,
266		new_channel->ch_bufring_dma.hv_paddr,
267		send_ring_buffer_size + recv_ring_buffer_size,
268		&new_channel->ch_bufring_gpadl);
269	if (ret != 0) {
270		device_printf(sc->vmbus_dev,
271		    "failed to connect bufring GPADL to chan%u\n",
272		    new_channel->ch_id);
273		goto failed;
274	}
275
276	/*
277	 * Open channel w/ the bufring GPADL on the target CPU.
278	 */
279	mh = vmbus_msghc_get(sc, sizeof(*req));
280	if (mh == NULL) {
281		device_printf(sc->vmbus_dev,
282		    "can not get msg hypercall for chopen(chan%u)\n",
283		    new_channel->ch_id);
284		ret = ENXIO;
285		goto failed;
286	}
287
288	req = vmbus_msghc_dataptr(mh);
289	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
290	req->chm_chanid = new_channel->ch_id;
291	req->chm_openid = new_channel->ch_id;
292	req->chm_gpadl = new_channel->ch_bufring_gpadl;
293	req->chm_vcpuid = new_channel->target_vcpu;
294	req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT;
295	if (user_data_len)
296		memcpy(req->chm_udata, user_data, user_data_len);
297
298	ret = vmbus_msghc_exec(sc, mh);
299	if (ret != 0) {
300		device_printf(sc->vmbus_dev,
301		    "chopen(chan%u) msg hypercall exec failed: %d\n",
302		    new_channel->ch_id, ret);
303		vmbus_msghc_put(sc, mh);
304		goto failed;
305	}
306
307	msg = vmbus_msghc_wait_result(sc, mh);
308	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
309	status = resp->chm_status;
310
311	vmbus_msghc_put(sc, mh);
312
313	if (status == 0) {
314		if (bootverbose) {
315			device_printf(sc->vmbus_dev, "chan%u opened\n",
316			    new_channel->ch_id);
317		}
318		return 0;
319	}
320
321	device_printf(sc->vmbus_dev, "failed to open chan%u\n",
322	    new_channel->ch_id);
323	ret = ENXIO;
324
325failed:
326	if (new_channel->ch_bufring_gpadl) {
327		hv_vmbus_channel_teardown_gpdal(new_channel,
328		    new_channel->ch_bufring_gpadl);
329		new_channel->ch_bufring_gpadl = 0;
330	}
331	if (new_channel->ch_bufring != NULL) {
332		hyperv_dmamem_free(&new_channel->ch_bufring_dma,
333		    new_channel->ch_bufring);
334		new_channel->ch_bufring = NULL;
335	}
336	atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
337	return ret;
338}
339
340/**
341 * @brief Establish a GPADL for the specified buffer
342 */
343int
344hv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel,
345    void *contig_buffer, uint32_t size, uint32_t *gpadl)
346{
347	return vmbus_chan_gpadl_connect(channel,
348	    hv_get_phys_addr(contig_buffer), size, gpadl);
349}
350
351int
352vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr,
353    int size, uint32_t *gpadl0)
354{
355	struct vmbus_softc *sc = chan->vmbus_sc;
356	struct vmbus_msghc *mh;
357	struct vmbus_chanmsg_gpadl_conn *req;
358	const struct vmbus_message *msg;
359	size_t reqsz;
360	uint32_t gpadl, status;
361	int page_count, range_len, i, cnt, error;
362	uint64_t page_id;
363
364	/*
365	 * Preliminary checks.
366	 */
367
368	KASSERT((size & PAGE_MASK) == 0,
369	    ("invalid GPA size %d, not multiple page size", size));
370	page_count = size >> PAGE_SHIFT;
371
372	KASSERT((paddr & PAGE_MASK) == 0,
373	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
374	page_id = paddr >> PAGE_SHIFT;
375
376	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
377	/*
378	 * We don't support multiple GPA ranges.
379	 */
380	if (range_len > UINT16_MAX) {
381		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
382		    page_count);
383		return EOPNOTSUPP;
384	}
385
386	/*
387	 * Allocate GPADL id.
388	 */
389	gpadl = vmbus_gpadl_alloc(sc);
390	*gpadl0 = gpadl;
391
392	/*
393	 * Connect this GPADL to the target channel.
394	 *
395	 * NOTE:
396	 * Since each message can only hold small set of page
397	 * addresses, several messages may be required to
398	 * complete the connection.
399	 */
400	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
401		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
402	else
403		cnt = page_count;
404	page_count -= cnt;
405
406	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
407	    chm_range.gpa_page[cnt]);
408	mh = vmbus_msghc_get(sc, reqsz);
409	if (mh == NULL) {
410		device_printf(sc->vmbus_dev,
411		    "can not get msg hypercall for gpadl->chan%u\n",
412		    chan->ch_id);
413		return EIO;
414	}
415
416	req = vmbus_msghc_dataptr(mh);
417	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
418	req->chm_chanid = chan->ch_id;
419	req->chm_gpadl = gpadl;
420	req->chm_range_len = range_len;
421	req->chm_range_cnt = 1;
422	req->chm_range.gpa_len = size;
423	req->chm_range.gpa_ofs = 0;
424	for (i = 0; i < cnt; ++i)
425		req->chm_range.gpa_page[i] = page_id++;
426
427	error = vmbus_msghc_exec(sc, mh);
428	if (error) {
429		device_printf(sc->vmbus_dev,
430		    "gpadl->chan%u msg hypercall exec failed: %d\n",
431		    chan->ch_id, error);
432		vmbus_msghc_put(sc, mh);
433		return error;
434	}
435
436	while (page_count > 0) {
437		struct vmbus_chanmsg_gpadl_subconn *subreq;
438
439		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
440			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
441		else
442			cnt = page_count;
443		page_count -= cnt;
444
445		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
446		    chm_gpa_page[cnt]);
447		vmbus_msghc_reset(mh, reqsz);
448
449		subreq = vmbus_msghc_dataptr(mh);
450		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
451		subreq->chm_gpadl = gpadl;
452		for (i = 0; i < cnt; ++i)
453			subreq->chm_gpa_page[i] = page_id++;
454
455		vmbus_msghc_exec_noresult(mh);
456	}
457	KASSERT(page_count == 0, ("invalid page count %d", page_count));
458
459	msg = vmbus_msghc_wait_result(sc, mh);
460	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
461	    msg->msg_data)->chm_status;
462
463	vmbus_msghc_put(sc, mh);
464
465	if (status != 0) {
466		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
467		    "status %u\n", chan->ch_id, status);
468		return EIO;
469	} else {
470		if (bootverbose) {
471			device_printf(sc->vmbus_dev, "gpadl->chan%u "
472			    "succeeded\n", chan->ch_id);
473		}
474	}
475	return 0;
476}
477
478/*
479 * Disconnect the GPA from the target channel
480 */
481int
482hv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl)
483{
484	struct vmbus_softc *sc = chan->vmbus_sc;
485	struct vmbus_msghc *mh;
486	struct vmbus_chanmsg_gpadl_disconn *req;
487	int error;
488
489	mh = vmbus_msghc_get(sc, sizeof(*req));
490	if (mh == NULL) {
491		device_printf(sc->vmbus_dev,
492		    "can not get msg hypercall for gpa x->chan%u\n",
493		    chan->ch_id);
494		return EBUSY;
495	}
496
497	req = vmbus_msghc_dataptr(mh);
498	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
499	req->chm_chanid = chan->ch_id;
500	req->chm_gpadl = gpadl;
501
502	error = vmbus_msghc_exec(sc, mh);
503	if (error) {
504		device_printf(sc->vmbus_dev,
505		    "gpa x->chan%u msg hypercall exec failed: %d\n",
506		    chan->ch_id, error);
507		vmbus_msghc_put(sc, mh);
508		return error;
509	}
510
511	vmbus_msghc_wait_result(sc, mh);
512	/* Discard result; no useful information */
513	vmbus_msghc_put(sc, mh);
514
515	return 0;
516}
517
518static void
519hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
520{
521	struct vmbus_softc *sc = channel->vmbus_sc;
522	struct vmbus_msghc *mh;
523	struct vmbus_chanmsg_chclose *req;
524	struct taskqueue *rxq = channel->rxq;
525	int error;
526
527	/* TODO: stringent check */
528	atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED);
529
530	sysctl_ctx_free(&channel->ch_sysctl_ctx);
531
532	/*
533	 * set rxq to NULL to avoid more requests be scheduled
534	 */
535	channel->rxq = NULL;
536	taskqueue_drain(rxq, &channel->channel_task);
537	channel->on_channel_callback = NULL;
538
539	/**
540	 * Send a closing message
541	 */
542
543	mh = vmbus_msghc_get(sc, sizeof(*req));
544	if (mh == NULL) {
545		device_printf(sc->vmbus_dev,
546		    "can not get msg hypercall for chclose(chan%u)\n",
547		    channel->ch_id);
548		return;
549	}
550
551	req = vmbus_msghc_dataptr(mh);
552	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
553	req->chm_chanid = channel->ch_id;
554
555	error = vmbus_msghc_exec_noresult(mh);
556	vmbus_msghc_put(sc, mh);
557
558	if (error) {
559		device_printf(sc->vmbus_dev,
560		    "chclose(chan%u) msg hypercall exec failed: %d\n",
561		    channel->ch_id, error);
562		return;
563	} else if (bootverbose) {
564		device_printf(sc->vmbus_dev, "close chan%u\n",
565		    channel->ch_id);
566	}
567
568	/* Tear down the gpadl for the channel's ring buffer */
569	if (channel->ch_bufring_gpadl) {
570		hv_vmbus_channel_teardown_gpdal(channel,
571		    channel->ch_bufring_gpadl);
572		channel->ch_bufring_gpadl = 0;
573	}
574
575	/* TODO: Send a msg to release the childRelId */
576
577	/* cleanup the ring buffers for this channel */
578	hv_ring_buffer_cleanup(&channel->outbound);
579	hv_ring_buffer_cleanup(&channel->inbound);
580
581	if (channel->ch_bufring != NULL) {
582		hyperv_dmamem_free(&channel->ch_bufring_dma,
583		    channel->ch_bufring);
584		channel->ch_bufring = NULL;
585	}
586}
587
588/*
589 * Caller should make sure that all sub-channels have
590 * been added to 'chan' and all to-be-closed channels
591 * are not being opened.
592 */
593void
594hv_vmbus_channel_close(struct hv_vmbus_channel *chan)
595{
596	int subchan_cnt;
597
598	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
599		/*
600		 * Sub-channel is closed when its primary channel
601		 * is closed; done.
602		 */
603		return;
604	}
605
606	/*
607	 * Close all sub-channels, if any.
608	 */
609	subchan_cnt = chan->ch_subchan_cnt;
610	if (subchan_cnt > 0) {
611		struct hv_vmbus_channel **subchan;
612		int i;
613
614		subchan = vmbus_get_subchan(chan, subchan_cnt);
615		for (i = 0; i < subchan_cnt; ++i)
616			hv_vmbus_channel_close_internal(subchan[i]);
617		vmbus_rel_subchan(subchan, subchan_cnt);
618	}
619
620	/* Then close the primary channel. */
621	hv_vmbus_channel_close_internal(chan);
622}
623
624/**
625 * @brief Send the specified buffer on the given channel
626 */
627int
628hv_vmbus_channel_send_packet(
629	hv_vmbus_channel*	channel,
630	void*			buffer,
631	uint32_t		buffer_len,
632	uint64_t		request_id,
633	hv_vmbus_packet_type	type,
634	uint32_t		flags)
635{
636	int			ret = 0;
637	hv_vm_packet_descriptor	desc;
638	uint32_t		packet_len;
639	uint64_t		aligned_data;
640	uint32_t		packet_len_aligned;
641	boolean_t		need_sig;
642	struct iovec		iov[3];
643
644	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
645	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
646	aligned_data = 0;
647
648	/* Setup the descriptor */
649	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
650	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
651			    /* in 8-bytes granularity */
652	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
653	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
654	desc.transaction_id = request_id;
655
656	iov[0].iov_base = &desc;
657	iov[0].iov_len = sizeof(hv_vm_packet_descriptor);
658
659	iov[1].iov_base = buffer;
660	iov[1].iov_len = buffer_len;
661
662	iov[2].iov_base = &aligned_data;
663	iov[2].iov_len = packet_len_aligned - packet_len;
664
665	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
666
667	/* TODO: We should determine if this is optional */
668	if (ret == 0 && need_sig)
669		vmbus_chan_send_event(channel);
670
671	return (ret);
672}
673
674/**
675 * @brief Send a range of single-page buffer packets using
676 * a GPADL Direct packet type
677 */
678int
679hv_vmbus_channel_send_packet_pagebuffer(
680	hv_vmbus_channel*	channel,
681	hv_vmbus_page_buffer	page_buffers[],
682	uint32_t		page_count,
683	void*			buffer,
684	uint32_t		buffer_len,
685	uint64_t		request_id)
686{
687
688	int					ret = 0;
689	boolean_t				need_sig;
690	uint32_t				packet_len;
691	uint32_t				page_buflen;
692	uint32_t				packetLen_aligned;
693	struct iovec				iov[4];
694	hv_vmbus_channel_packet_page_buffer	desc;
695	uint32_t				descSize;
696	uint64_t				alignedData = 0;
697
698	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
699		return (EINVAL);
700
701	/*
702	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
703	 *  is the largest size we support
704	 */
705	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
706	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
707	packet_len = descSize + page_buflen + buffer_len;
708	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
709
710	/* Setup the descriptor */
711	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
712	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
713	/* in 8-bytes granularity */
714	desc.data_offset8 = (descSize + page_buflen) >> 3;
715	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
716	desc.transaction_id = request_id;
717	desc.range_count = page_count;
718
719	iov[0].iov_base = &desc;
720	iov[0].iov_len = descSize;
721
722	iov[1].iov_base = page_buffers;
723	iov[1].iov_len = page_buflen;
724
725	iov[2].iov_base = buffer;
726	iov[2].iov_len = buffer_len;
727
728	iov[3].iov_base = &alignedData;
729	iov[3].iov_len = packetLen_aligned - packet_len;
730
731	ret = hv_ring_buffer_write(&channel->outbound, iov, 4, &need_sig);
732
733	/* TODO: We should determine if this is optional */
734	if (ret == 0 && need_sig)
735		vmbus_chan_send_event(channel);
736
737	return (ret);
738}
739
740/**
741 * @brief Send a multi-page buffer packet using a GPADL Direct packet type
742 */
743int
744hv_vmbus_channel_send_packet_multipagebuffer(
745	hv_vmbus_channel*		channel,
746	hv_vmbus_multipage_buffer*	multi_page_buffer,
747	void*				buffer,
748	uint32_t			buffer_len,
749	uint64_t			request_id)
750{
751
752	int			ret = 0;
753	uint32_t		desc_size;
754	boolean_t		need_sig;
755	uint32_t		packet_len;
756	uint32_t		packet_len_aligned;
757	uint32_t		pfn_count;
758	uint64_t		aligned_data = 0;
759	struct iovec		iov[3];
760	hv_vmbus_channel_packet_multipage_buffer desc;
761
762	pfn_count =
763	    HV_NUM_PAGES_SPANNED(
764		    multi_page_buffer->offset,
765		    multi_page_buffer->length);
766
767	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
768	    return (EINVAL);
769	/*
770	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
771	 * is the largest size we support
772	 */
773	desc_size =
774	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
775		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
776			sizeof(uint64_t));
777	packet_len = desc_size + buffer_len;
778	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
779
780	/*
781	 * Setup the descriptor
782	 */
783	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
784	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
785	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
786	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
787	desc.transaction_id = request_id;
788	desc.range_count = 1;
789
790	desc.range.length = multi_page_buffer->length;
791	desc.range.offset = multi_page_buffer->offset;
792
793	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
794		pfn_count * sizeof(uint64_t));
795
796	iov[0].iov_base = &desc;
797	iov[0].iov_len = desc_size;
798
799	iov[1].iov_base = buffer;
800	iov[1].iov_len = buffer_len;
801
802	iov[2].iov_base = &aligned_data;
803	iov[2].iov_len = packet_len_aligned - packet_len;
804
805	ret = hv_ring_buffer_write(&channel->outbound, iov, 3, &need_sig);
806
807	/* TODO: We should determine if this is optional */
808	if (ret == 0 && need_sig)
809		vmbus_chan_send_event(channel);
810
811	return (ret);
812}
813
814/**
815 * @brief Retrieve the user packet on the specified channel
816 */
817int
818hv_vmbus_channel_recv_packet(
819	hv_vmbus_channel*	channel,
820	void*			Buffer,
821	uint32_t		buffer_len,
822	uint32_t*		buffer_actual_len,
823	uint64_t*		request_id)
824{
825	int			ret;
826	uint32_t		user_len;
827	uint32_t		packet_len;
828	hv_vm_packet_descriptor	desc;
829
830	*buffer_actual_len = 0;
831	*request_id = 0;
832
833	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
834		sizeof(hv_vm_packet_descriptor));
835	if (ret != 0)
836		return (0);
837
838	packet_len = desc.length8 << 3;
839	user_len = packet_len - (desc.data_offset8 << 3);
840
841	*buffer_actual_len = user_len;
842
843	if (user_len > buffer_len)
844		return (EINVAL);
845
846	*request_id = desc.transaction_id;
847
848	/* Copy over the packet to the user buffer */
849	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
850		(desc.data_offset8 << 3));
851
852	return (0);
853}
854
855/**
856 * @brief Retrieve the raw packet on the specified channel
857 */
858int
859hv_vmbus_channel_recv_packet_raw(
860	hv_vmbus_channel*	channel,
861	void*			buffer,
862	uint32_t		buffer_len,
863	uint32_t*		buffer_actual_len,
864	uint64_t*		request_id)
865{
866	int		ret;
867	uint32_t	packetLen;
868	hv_vm_packet_descriptor	desc;
869
870	*buffer_actual_len = 0;
871	*request_id = 0;
872
873	ret = hv_ring_buffer_peek(
874		&channel->inbound, &desc,
875		sizeof(hv_vm_packet_descriptor));
876
877	if (ret != 0)
878	    return (0);
879
880	packetLen = desc.length8 << 3;
881	*buffer_actual_len = packetLen;
882
883	if (packetLen > buffer_len)
884	    return (ENOBUFS);
885
886	*request_id = desc.transaction_id;
887
888	/* Copy over the entire packet to the user buffer */
889	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
890
891	return (0);
892}
893
894static void
895vmbus_chan_task(void *xchan, int pending __unused)
896{
897	struct hv_vmbus_channel *chan = xchan;
898	void (*callback)(void *);
899	void *arg;
900
901	arg = chan->channel_callback_context;
902	callback = chan->on_channel_callback;
903
904	/*
905	 * Optimize host to guest signaling by ensuring:
906	 * 1. While reading the channel, we disable interrupts from
907	 *    host.
908	 * 2. Ensure that we process all posted messages from the host
909	 *    before returning from this callback.
910	 * 3. Once we return, enable signaling from the host. Once this
911	 *    state is set we check to see if additional packets are
912	 *    available to read. In this case we repeat the process.
913	 *
914	 * NOTE: Interrupt has been disabled in the ISR.
915	 */
916	for (;;) {
917		uint32_t left;
918
919		callback(arg);
920
921		left = hv_ring_buffer_read_end(&chan->inbound);
922		if (left == 0) {
923			/* No more data in RX bufring; done */
924			break;
925		}
926		hv_ring_buffer_read_begin(&chan->inbound);
927	}
928}
929
930static void
931vmbus_chan_task_nobatch(void *xchan, int pending __unused)
932{
933	struct hv_vmbus_channel *chan = xchan;
934
935	chan->on_channel_callback(chan->channel_callback_context);
936}
937
938static __inline void
939vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
940    int flag_cnt)
941{
942	int f;
943
944	for (f = 0; f < flag_cnt; ++f) {
945		uint32_t chid_base;
946		u_long flags;
947		int chid_ofs;
948
949		if (event_flags[f] == 0)
950			continue;
951
952		flags = atomic_swap_long(&event_flags[f], 0);
953		chid_base = f << VMBUS_EVTFLAG_SHIFT;
954
955		while ((chid_ofs = ffsl(flags)) != 0) {
956			struct hv_vmbus_channel *channel;
957
958			--chid_ofs; /* NOTE: ffsl is 1-based */
959			flags &= ~(1UL << chid_ofs);
960
961			channel = sc->vmbus_chmap[chid_base + chid_ofs];
962
963			/* if channel is closed or closing */
964			if (channel == NULL || channel->rxq == NULL)
965				continue;
966
967			if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
968				hv_ring_buffer_read_begin(&channel->inbound);
969			taskqueue_enqueue(channel->rxq, &channel->channel_task);
970		}
971	}
972}
973
974void
975vmbus_event_proc(struct vmbus_softc *sc, int cpu)
976{
977	struct vmbus_evtflags *eventf;
978
979	/*
980	 * On Host with Win8 or above, the event page can be checked directly
981	 * to get the id of the channel that has the pending interrupt.
982	 */
983	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
984	vmbus_event_flags_proc(sc, eventf->evt_flags,
985	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
986}
987
988void
989vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
990{
991	struct vmbus_evtflags *eventf;
992
993	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
994	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
995		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
996		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
997	}
998}
999
1000static void
1001vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
1002    const struct hv_vmbus_channel *chan)
1003{
1004	volatile int *flag_cnt_ptr;
1005	int flag_cnt;
1006
1007	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
1008	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->target_cpu);
1009
1010	for (;;) {
1011		int old_flag_cnt;
1012
1013		old_flag_cnt = *flag_cnt_ptr;
1014		if (old_flag_cnt >= flag_cnt)
1015			break;
1016		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
1017			if (bootverbose) {
1018				device_printf(sc->vmbus_dev,
1019				    "channel%u update cpu%d flag_cnt to %d\n",
1020				    chan->ch_id,
1021				    chan->target_cpu, flag_cnt);
1022			}
1023			break;
1024		}
1025	}
1026}
1027
1028static struct hv_vmbus_channel *
1029vmbus_chan_alloc(struct vmbus_softc *sc)
1030{
1031	struct hv_vmbus_channel *chan;
1032
1033	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
1034
1035	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
1036	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
1037	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
1038	if (chan->ch_monprm == NULL) {
1039		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
1040		free(chan, M_DEVBUF);
1041		return NULL;
1042	}
1043
1044	chan->vmbus_sc = sc;
1045	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
1046	TAILQ_INIT(&chan->ch_subchans);
1047	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
1048
1049	return chan;
1050}
1051
1052static void
1053vmbus_chan_free(struct hv_vmbus_channel *chan)
1054{
1055	/* TODO: assert sub-channel list is empty */
1056	/* TODO: asset no longer on the primary channel's sub-channel list */
1057	/* TODO: asset no longer on the vmbus channel list */
1058	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
1059	mtx_destroy(&chan->ch_subchan_lock);
1060	free(chan, M_DEVBUF);
1061}
1062
1063static int
1064vmbus_chan_add(struct hv_vmbus_channel *newchan)
1065{
1066	struct vmbus_softc *sc = newchan->vmbus_sc;
1067	struct hv_vmbus_channel *prichan;
1068
1069	if (newchan->ch_id == 0) {
1070		/*
1071		 * XXX
1072		 * Chan0 will neither be processed nor should be offered;
1073		 * skip it.
1074		 */
1075		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
1076		return EINVAL;
1077	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
1078		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
1079		    newchan->ch_id);
1080		return EINVAL;
1081	}
1082	sc->vmbus_chmap[newchan->ch_id] = newchan;
1083
1084	if (bootverbose) {
1085		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
1086		    newchan->ch_id, newchan->ch_subidx);
1087	}
1088
1089	mtx_lock(&sc->vmbus_prichan_lock);
1090	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
1091		/*
1092		 * Sub-channel will have the same type GUID and instance
1093		 * GUID as its primary channel.
1094		 */
1095		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
1096		    sizeof(struct hyperv_guid)) == 0 &&
1097		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
1098		    sizeof(struct hyperv_guid)) == 0)
1099			break;
1100	}
1101	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
1102		if (prichan == NULL) {
1103			/* Install the new primary channel */
1104			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
1105			    ch_prilink);
1106			mtx_unlock(&sc->vmbus_prichan_lock);
1107			return 0;
1108		} else {
1109			mtx_unlock(&sc->vmbus_prichan_lock);
1110			device_printf(sc->vmbus_dev, "duplicated primary "
1111			    "chan%u\n", newchan->ch_id);
1112			return EINVAL;
1113		}
1114	} else { /* Sub-channel */
1115		if (prichan == NULL) {
1116			mtx_unlock(&sc->vmbus_prichan_lock);
1117			device_printf(sc->vmbus_dev, "no primary chan for "
1118			    "chan%u\n", newchan->ch_id);
1119			return EINVAL;
1120		}
1121		/*
1122		 * Found the primary channel for this sub-channel and
1123		 * move on.
1124		 *
1125		 * XXX refcnt prichan
1126		 */
1127	}
1128	mtx_unlock(&sc->vmbus_prichan_lock);
1129
1130	/*
1131	 * This is a sub-channel; link it with the primary channel.
1132	 */
1133	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
1134	    ("new channel is not sub-channel"));
1135	KASSERT(prichan != NULL, ("no primary channel"));
1136
1137	newchan->ch_prichan = prichan;
1138	newchan->ch_dev = prichan->ch_dev;
1139
1140	mtx_lock(&prichan->ch_subchan_lock);
1141	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
1142	/*
1143	 * Bump up sub-channel count and notify anyone that is
1144	 * interested in this sub-channel, after this sub-channel
1145	 * is setup.
1146	 */
1147	prichan->ch_subchan_cnt++;
1148	mtx_unlock(&prichan->ch_subchan_lock);
1149	wakeup(prichan);
1150
1151	return 0;
1152}
1153
1154void
1155vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
1156{
1157	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
1158
1159	if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1160	    chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) {
1161		/* Only cpu0 is supported */
1162		cpu = 0;
1163	}
1164
1165	chan->target_cpu = cpu;
1166	chan->target_vcpu = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
1167
1168	if (bootverbose) {
1169		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
1170		    chan->ch_id,
1171		    chan->target_cpu, chan->target_vcpu);
1172	}
1173}
1174
1175void
1176vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
1177{
1178	static uint32_t vmbus_chan_nextcpu;
1179	int cpu;
1180
1181	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
1182	vmbus_channel_cpu_set(chan, cpu);
1183}
1184
1185static void
1186vmbus_chan_cpu_default(struct hv_vmbus_channel *chan)
1187{
1188	/*
1189	 * By default, pin the channel to cpu0.  Devices having
1190	 * special channel-cpu mapping requirement should call
1191	 * vmbus_channel_cpu_{set,rr}().
1192	 */
1193	vmbus_channel_cpu_set(chan, 0);
1194}
1195
1196static void
1197vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1198    const struct vmbus_message *msg)
1199{
1200	const struct vmbus_chanmsg_choffer *offer;
1201	struct hv_vmbus_channel *chan;
1202	int error;
1203
1204	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
1205
1206	chan = vmbus_chan_alloc(sc);
1207	if (chan == NULL) {
1208		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
1209		    offer->chm_chanid);
1210		return;
1211	}
1212
1213	chan->ch_id = offer->chm_chanid;
1214	chan->ch_subidx = offer->chm_subidx;
1215	chan->ch_guid_type = offer->chm_chtype;
1216	chan->ch_guid_inst = offer->chm_chinst;
1217
1218	/* Batch reading is on by default */
1219	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
1220
1221	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
1222	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
1223		chan->ch_monprm->mp_connid = offer->chm_connid;
1224
1225	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1226		/*
1227		 * Setup MNF stuffs.
1228		 */
1229		chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF;
1230		chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
1231		if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX)
1232			panic("invalid monitor trigger %u", offer->chm_montrig);
1233		chan->ch_montrig_mask =
1234		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
1235	}
1236
1237	/* Select default cpu for this channel. */
1238	vmbus_chan_cpu_default(chan);
1239
1240	error = vmbus_chan_add(chan);
1241	if (error) {
1242		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
1243		    chan->ch_id, error);
1244		vmbus_chan_free(chan);
1245		return;
1246	}
1247
1248	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1249		/*
1250		 * Add device for this primary channel.
1251		 *
1252		 * NOTE:
1253		 * Error is ignored here; don't have much to do if error
1254		 * really happens.
1255		 */
1256		vmbus_add_child(chan);
1257	}
1258}
1259
1260/*
1261 * XXX pretty broken; need rework.
1262 */
1263static void
1264vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
1265    const struct vmbus_message *msg)
1266{
1267	const struct vmbus_chanmsg_chrescind *note;
1268	struct hv_vmbus_channel *chan;
1269
1270	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
1271	if (note->chm_chanid > VMBUS_CHAN_MAX) {
1272		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
1273		    note->chm_chanid);
1274		return;
1275	}
1276
1277	if (bootverbose) {
1278		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
1279		    note->chm_chanid);
1280	}
1281
1282	chan = sc->vmbus_chmap[note->chm_chanid];
1283	if (chan == NULL)
1284		return;
1285	sc->vmbus_chmap[note->chm_chanid] = NULL;
1286
1287	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
1288}
1289
1290static void
1291vmbus_chan_detach_task(void *xchan, int pending __unused)
1292{
1293	struct hv_vmbus_channel *chan = xchan;
1294
1295	if (VMBUS_CHAN_ISPRIMARY(chan)) {
1296		/* Only primary channel owns the device */
1297		vmbus_delete_child(chan);
1298		/* NOTE: DO NOT free primary channel for now */
1299	} else {
1300		struct vmbus_softc *sc = chan->vmbus_sc;
1301		struct hv_vmbus_channel *pri_chan = chan->ch_prichan;
1302		struct vmbus_chanmsg_chfree *req;
1303		struct vmbus_msghc *mh;
1304		int error;
1305
1306		mh = vmbus_msghc_get(sc, sizeof(*req));
1307		if (mh == NULL) {
1308			device_printf(sc->vmbus_dev,
1309			    "can not get msg hypercall for chfree(chan%u)\n",
1310			    chan->ch_id);
1311			goto remove;
1312		}
1313
1314		req = vmbus_msghc_dataptr(mh);
1315		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
1316		req->chm_chanid = chan->ch_id;
1317
1318		error = vmbus_msghc_exec_noresult(mh);
1319		vmbus_msghc_put(sc, mh);
1320
1321		if (error) {
1322			device_printf(sc->vmbus_dev,
1323			    "chfree(chan%u) failed: %d",
1324			    chan->ch_id, error);
1325			/* NOTE: Move on! */
1326		} else {
1327			if (bootverbose) {
1328				device_printf(sc->vmbus_dev, "chan%u freed\n",
1329				    chan->ch_id);
1330			}
1331		}
1332remove:
1333		mtx_lock(&pri_chan->ch_subchan_lock);
1334		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
1335		KASSERT(pri_chan->ch_subchan_cnt > 0,
1336		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
1337		pri_chan->ch_subchan_cnt--;
1338		mtx_unlock(&pri_chan->ch_subchan_lock);
1339		wakeup(pri_chan);
1340
1341		vmbus_chan_free(chan);
1342	}
1343}
1344
1345/*
1346 * Detach all devices and destroy the corresponding primary channels.
1347 */
1348void
1349vmbus_chan_destroy_all(struct vmbus_softc *sc)
1350{
1351	struct hv_vmbus_channel *chan;
1352
1353	mtx_lock(&sc->vmbus_prichan_lock);
1354	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
1355		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
1356		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
1357		mtx_unlock(&sc->vmbus_prichan_lock);
1358
1359		vmbus_delete_child(chan);
1360		vmbus_chan_free(chan);
1361
1362		mtx_lock(&sc->vmbus_prichan_lock);
1363	}
1364	bzero(sc->vmbus_chmap,
1365	    sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX);
1366	mtx_unlock(&sc->vmbus_prichan_lock);
1367}
1368
1369/**
1370 * @brief Select the best outgoing channel
1371 *
1372 * The channel whose vcpu binding is closest to the currect vcpu will
1373 * be selected.
1374 * If no multi-channel, always select primary channel
1375 *
1376 * @param primary - primary channel
1377 */
1378struct hv_vmbus_channel *
1379vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
1380{
1381	hv_vmbus_channel *new_channel = NULL;
1382	hv_vmbus_channel *outgoing_channel = primary;
1383	int old_cpu_distance = 0;
1384	int new_cpu_distance = 0;
1385	int cur_vcpu = 0;
1386	int smp_pro_id = PCPU_GET(cpuid);
1387
1388	if (TAILQ_EMPTY(&primary->ch_subchans)) {
1389		return outgoing_channel;
1390	}
1391
1392	if (smp_pro_id >= MAXCPU) {
1393		return outgoing_channel;
1394	}
1395
1396	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
1397
1398	/* XXX need lock */
1399	TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) {
1400		if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) {
1401			continue;
1402		}
1403
1404		if (new_channel->target_vcpu == cur_vcpu){
1405			return new_channel;
1406		}
1407
1408		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
1409		    (outgoing_channel->target_vcpu - cur_vcpu) :
1410		    (cur_vcpu - outgoing_channel->target_vcpu));
1411
1412		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
1413		    (new_channel->target_vcpu - cur_vcpu) :
1414		    (cur_vcpu - new_channel->target_vcpu));
1415
1416		if (old_cpu_distance < new_cpu_distance) {
1417			continue;
1418		}
1419
1420		outgoing_channel = new_channel;
1421	}
1422
1423	return(outgoing_channel);
1424}
1425
1426struct hv_vmbus_channel **
1427vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
1428{
1429	struct hv_vmbus_channel **ret, *chan;
1430	int i;
1431
1432	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
1433	    M_WAITOK);
1434
1435	mtx_lock(&pri_chan->ch_subchan_lock);
1436
1437	while (pri_chan->ch_subchan_cnt < subchan_cnt)
1438		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
1439
1440	i = 0;
1441	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
1442		/* TODO: refcnt chan */
1443		ret[i] = chan;
1444
1445		++i;
1446		if (i == subchan_cnt)
1447			break;
1448	}
1449	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
1450	    pri_chan->ch_subchan_cnt, subchan_cnt));
1451
1452	mtx_unlock(&pri_chan->ch_subchan_lock);
1453
1454	return ret;
1455}
1456
1457void
1458vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
1459{
1460
1461	free(subchan, M_TEMP);
1462}
1463
1464void
1465vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan)
1466{
1467	mtx_lock(&pri_chan->ch_subchan_lock);
1468	while (pri_chan->ch_subchan_cnt > 0)
1469		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
1470	mtx_unlock(&pri_chan->ch_subchan_lock);
1471}
1472
1473void
1474vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1475{
1476	vmbus_chanmsg_proc_t msg_proc;
1477	uint32_t msg_type;
1478
1479	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
1480	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
1481	    ("invalid message type %u", msg_type));
1482
1483	msg_proc = vmbus_chan_msgprocs[msg_type];
1484	if (msg_proc != NULL)
1485		msg_proc(sc, msg);
1486}
1487