vmbus_chan.c revision 296290
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hv_channel.c 296290 2016-03-02 01:33:30Z sephe $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/lock.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40#include <machine/bus.h>
41#include <vm/vm.h>
42#include <vm/vm_param.h>
43#include <vm/pmap.h>
44
45#include "hv_vmbus_priv.h"
46
47static int 	vmbus_channel_create_gpadl_header(
48			/* must be phys and virt contiguous*/
49			void*				contig_buffer,
50			/* page-size multiple */
51			uint32_t 			size,
52			hv_vmbus_channel_msg_info**	msg_info,
53			uint32_t*			message_count);
54
55static void 	vmbus_channel_set_event(hv_vmbus_channel* channel);
56static void	VmbusProcessChannelEvent(void* channel, int pending);
57
58/**
59 *  @brief Trigger an event notification on the specified channel
60 */
61static void
62vmbus_channel_set_event(hv_vmbus_channel *channel)
63{
64	hv_vmbus_monitor_page *monitor_page;
65
66	if (channel->offer_msg.monitor_allocated) {
67		/* Each uint32_t represents 32 channels */
68		synch_set_bit((channel->offer_msg.child_rel_id & 31),
69			((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
70				+ ((channel->offer_msg.child_rel_id >> 5))));
71
72		monitor_page = (hv_vmbus_monitor_page *)
73			hv_vmbus_g_connection.monitor_page_2;
74
75		synch_set_bit(channel->monitor_bit,
76			(uint32_t *)&monitor_page->
77				trigger_group[channel->monitor_group].u.pending);
78	} else {
79		hv_vmbus_set_event(channel);
80	}
81
82}
83
84static int
85vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
86{
87	struct hv_vmbus_channel *chan = arg1;
88	int alloc = 0;
89
90	if (chan->offer_msg.monitor_allocated)
91		alloc = 1;
92	return sysctl_handle_int(oidp, &alloc, 0, req);
93}
94
95static void
96vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
97{
98	device_t dev;
99	struct sysctl_oid *devch_sysctl;
100	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
101	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
102	struct sysctl_ctx_list *ctx;
103	uint32_t ch_id;
104	uint16_t sub_ch_id;
105	char name[16];
106
107	hv_vmbus_channel* primary_ch = channel->primary_channel;
108
109	if (primary_ch == NULL) {
110		dev = channel->device->device;
111		ch_id = channel->offer_msg.child_rel_id;
112	} else {
113		dev = primary_ch->device->device;
114		ch_id = primary_ch->offer_msg.child_rel_id;
115		sub_ch_id = channel->offer_msg.offer.sub_channel_index;
116	}
117	ctx = device_get_sysctl_ctx(dev);
118	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
119	devch_sysctl = SYSCTL_ADD_NODE(ctx,
120		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
121		    OID_AUTO, "channel", CTLFLAG_RD, 0, "");
122	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
123	snprintf(name, sizeof(name), "%d", ch_id);
124	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
125	    	    SYSCTL_CHILDREN(devch_sysctl),
126	    	    OID_AUTO, name, CTLFLAG_RD, 0, "");
127
128	if (primary_ch != NULL) {
129		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
130			SYSCTL_CHILDREN(devch_id_sysctl),
131			OID_AUTO, "sub", CTLFLAG_RD, 0, "");
132		snprintf(name, sizeof(name), "%d", sub_ch_id);
133		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
134			SYSCTL_CHILDREN(devch_sub_sysctl),
135			OID_AUTO, name, CTLFLAG_RD, 0, "");
136
137		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
138		    OID_AUTO, "chanid", CTLFLAG_RD,
139		    &channel->offer_msg.child_rel_id, 0, "channel id");
140	}
141	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
142	    "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
143	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
144	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD, channel, 0,
145	    vmbus_channel_sysctl_monalloc, "I",
146	    "is monitor allocated to this channel");
147
148	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
149                    SYSCTL_CHILDREN(devch_id_sysctl),
150                    OID_AUTO,
151		    "in",
152		    CTLFLAG_RD, 0, "");
153	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
154                    SYSCTL_CHILDREN(devch_id_sysctl),
155                    OID_AUTO,
156		    "out",
157		    CTLFLAG_RD, 0, "");
158	hv_ring_buffer_stat(ctx,
159		SYSCTL_CHILDREN(devch_id_in_sysctl),
160		&(channel->inbound),
161		"inbound ring buffer stats");
162	hv_ring_buffer_stat(ctx,
163		SYSCTL_CHILDREN(devch_id_out_sysctl),
164		&(channel->outbound),
165		"outbound ring buffer stats");
166}
167
168/**
169 * @brief Open the specified channel
170 */
171int
172hv_vmbus_channel_open(
173	hv_vmbus_channel*		new_channel,
174	uint32_t			send_ring_buffer_size,
175	uint32_t			recv_ring_buffer_size,
176	void*				user_data,
177	uint32_t			user_data_len,
178	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
179	void* 				context)
180{
181
182	int ret = 0;
183	void *in, *out;
184	hv_vmbus_channel_open_channel*	open_msg;
185	hv_vmbus_channel_msg_info* 	open_info;
186
187	mtx_lock(&new_channel->sc_lock);
188	if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
189	    new_channel->state = HV_CHANNEL_OPENING_STATE;
190	} else {
191	    mtx_unlock(&new_channel->sc_lock);
192	    if(bootverbose)
193		printf("VMBUS: Trying to open channel <%p> which in "
194		    "%d state.\n", new_channel, new_channel->state);
195	    return (EINVAL);
196	}
197	mtx_unlock(&new_channel->sc_lock);
198
199	new_channel->on_channel_callback = pfn_on_channel_callback;
200	new_channel->channel_callback_context = context;
201
202	new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu];
203	TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel);
204
205	/* Allocate the ring buffer */
206	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
207	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
208	KASSERT(out != NULL,
209	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
210	if (out == NULL)
211		return (ENOMEM);
212
213	in = ((uint8_t *) out + send_ring_buffer_size);
214
215	new_channel->ring_buffer_pages = out;
216	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
217	    recv_ring_buffer_size) >> PAGE_SHIFT;
218	new_channel->ring_buffer_size = send_ring_buffer_size +
219	    recv_ring_buffer_size;
220
221	hv_vmbus_ring_buffer_init(
222		&new_channel->outbound,
223		out,
224		send_ring_buffer_size);
225
226	hv_vmbus_ring_buffer_init(
227		&new_channel->inbound,
228		in,
229		recv_ring_buffer_size);
230
231	/* Create sysctl tree for this channel */
232	vmbus_channel_sysctl_create(new_channel);
233
234	/**
235	 * Establish the gpadl for the ring buffer
236	 */
237	new_channel->ring_buffer_gpadl_handle = 0;
238
239	ret = hv_vmbus_channel_establish_gpadl(new_channel,
240		new_channel->outbound.ring_buffer,
241		send_ring_buffer_size + recv_ring_buffer_size,
242		&new_channel->ring_buffer_gpadl_handle);
243
244	/**
245	 * Create and init the channel open message
246	 */
247	open_info = (hv_vmbus_channel_msg_info*) malloc(
248		sizeof(hv_vmbus_channel_msg_info) +
249			sizeof(hv_vmbus_channel_open_channel),
250		M_DEVBUF,
251		M_NOWAIT);
252	KASSERT(open_info != NULL,
253	    ("Error VMBUS: malloc failed to allocate Open Channel message!"));
254
255	if (open_info == NULL)
256		return (ENOMEM);
257
258	sema_init(&open_info->wait_sema, 0, "Open Info Sema");
259
260	open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
261	open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
262	open_msg->open_id = new_channel->offer_msg.child_rel_id;
263	open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
264	open_msg->ring_buffer_gpadl_handle =
265		new_channel->ring_buffer_gpadl_handle;
266	open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
267		>> PAGE_SHIFT;
268	open_msg->target_vcpu = new_channel->target_vcpu;
269
270	if (user_data_len)
271		memcpy(open_msg->user_data, user_data, user_data_len);
272
273	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
274	TAILQ_INSERT_TAIL(
275		&hv_vmbus_g_connection.channel_msg_anchor,
276		open_info,
277		msg_list_entry);
278	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
279
280	ret = hv_vmbus_post_message(
281		open_msg, sizeof(hv_vmbus_channel_open_channel));
282
283	if (ret != 0)
284	    goto cleanup;
285
286	ret = sema_timedwait(&open_info->wait_sema, 5 * hz); /* KYS 5 seconds */
287
288	if (ret) {
289	    if(bootverbose)
290		printf("VMBUS: channel <%p> open timeout.\n", new_channel);
291	    goto cleanup;
292	}
293
294	if (open_info->response.open_result.status == 0) {
295	    new_channel->state = HV_CHANNEL_OPENED_STATE;
296	    if(bootverbose)
297		printf("VMBUS: channel <%p> open success.\n", new_channel);
298	} else {
299	    if(bootverbose)
300		printf("Error VMBUS: channel <%p> open failed - %d!\n",
301			new_channel, open_info->response.open_result.status);
302	}
303
304	cleanup:
305	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
306	TAILQ_REMOVE(
307		&hv_vmbus_g_connection.channel_msg_anchor,
308		open_info,
309		msg_list_entry);
310	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
311	sema_destroy(&open_info->wait_sema);
312	free(open_info, M_DEVBUF);
313
314	return (ret);
315}
316
317/**
318 * @brief Create a gpadl for the specified buffer
319 */
320static int
321vmbus_channel_create_gpadl_header(
322	void*				contig_buffer,
323	uint32_t			size,	/* page-size multiple */
324	hv_vmbus_channel_msg_info**	msg_info,
325	uint32_t*			message_count)
326{
327	int				i;
328	int				page_count;
329	unsigned long long 		pfn;
330	uint32_t			msg_size;
331	hv_vmbus_channel_gpadl_header*	gpa_header;
332	hv_vmbus_channel_gpadl_body*	gpadl_body;
333	hv_vmbus_channel_msg_info*	msg_header;
334	hv_vmbus_channel_msg_info*	msg_body;
335
336	int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
337
338	page_count = size >> PAGE_SHIFT;
339	pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
340
341	/*do we need a gpadl body msg */
342	pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
343	    - sizeof(hv_vmbus_channel_gpadl_header)
344	    - sizeof(hv_gpa_range);
345	pfnCount = pfnSize / sizeof(uint64_t);
346
347	if (page_count > pfnCount) { /* if(we need a gpadl body)	*/
348	    /* fill in the header		*/
349	    msg_size = sizeof(hv_vmbus_channel_msg_info)
350		+ sizeof(hv_vmbus_channel_gpadl_header)
351		+ sizeof(hv_gpa_range)
352		+ pfnCount * sizeof(uint64_t);
353	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
354	    KASSERT(
355		msg_header != NULL,
356		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
357	    if (msg_header == NULL)
358		return (ENOMEM);
359
360	    TAILQ_INIT(&msg_header->sub_msg_list_anchor);
361	    msg_header->message_size = msg_size;
362
363	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
364	    gpa_header->range_count = 1;
365	    gpa_header->range_buf_len = sizeof(hv_gpa_range)
366		+ page_count * sizeof(uint64_t);
367	    gpa_header->range[0].byte_offset = 0;
368	    gpa_header->range[0].byte_count = size;
369	    for (i = 0; i < pfnCount; i++) {
370		gpa_header->range[0].pfn_array[i] = pfn + i;
371	    }
372	    *msg_info = msg_header;
373	    *message_count = 1;
374
375	    pfnSum = pfnCount;
376	    pfnLeft = page_count - pfnCount;
377
378	    /*
379	     *  figure out how many pfns we can fit
380	     */
381	    pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
382		- sizeof(hv_vmbus_channel_gpadl_body);
383	    pfnCount = pfnSize / sizeof(uint64_t);
384
385	    /*
386	     * fill in the body
387	     */
388	    while (pfnLeft) {
389		if (pfnLeft > pfnCount) {
390		    pfnCurr = pfnCount;
391		} else {
392		    pfnCurr = pfnLeft;
393		}
394
395		msg_size = sizeof(hv_vmbus_channel_msg_info) +
396		    sizeof(hv_vmbus_channel_gpadl_body) +
397		    pfnCurr * sizeof(uint64_t);
398		msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
399		KASSERT(
400		    msg_body != NULL,
401		    ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
402		if (msg_body == NULL)
403		    return (ENOMEM);
404
405		msg_body->message_size = msg_size;
406		(*message_count)++;
407		gpadl_body =
408		    (hv_vmbus_channel_gpadl_body*) msg_body->msg;
409		/*
410		 * gpadl_body->gpadl = kbuffer;
411		 */
412		for (i = 0; i < pfnCurr; i++) {
413		    gpadl_body->pfn[i] = pfn + pfnSum + i;
414		}
415
416		TAILQ_INSERT_TAIL(
417		    &msg_header->sub_msg_list_anchor,
418		    msg_body,
419		    msg_list_entry);
420		pfnSum += pfnCurr;
421		pfnLeft -= pfnCurr;
422	    }
423	} else { /* else everything fits in a header */
424
425	    msg_size = sizeof(hv_vmbus_channel_msg_info) +
426		sizeof(hv_vmbus_channel_gpadl_header) +
427		sizeof(hv_gpa_range) +
428		page_count * sizeof(uint64_t);
429	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
430	    KASSERT(
431		msg_header != NULL,
432		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
433	    if (msg_header == NULL)
434		return (ENOMEM);
435
436	    msg_header->message_size = msg_size;
437
438	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
439	    gpa_header->range_count = 1;
440	    gpa_header->range_buf_len = sizeof(hv_gpa_range) +
441		page_count * sizeof(uint64_t);
442	    gpa_header->range[0].byte_offset = 0;
443	    gpa_header->range[0].byte_count = size;
444	    for (i = 0; i < page_count; i++) {
445		gpa_header->range[0].pfn_array[i] = pfn + i;
446	    }
447
448	    *msg_info = msg_header;
449	    *message_count = 1;
450	}
451
452	return (0);
453}
454
455/**
456 * @brief Establish a GPADL for the specified buffer
457 */
458int
459hv_vmbus_channel_establish_gpadl(
460	hv_vmbus_channel*	channel,
461	void*			contig_buffer,
462	uint32_t		size, /* page-size multiple */
463	uint32_t*		gpadl_handle)
464
465{
466	int ret = 0;
467	hv_vmbus_channel_gpadl_header*	gpadl_msg;
468	hv_vmbus_channel_gpadl_body*	gpadl_body;
469	hv_vmbus_channel_msg_info*	msg_info;
470	hv_vmbus_channel_msg_info*	sub_msg_info;
471	uint32_t			msg_count;
472	hv_vmbus_channel_msg_info*	curr;
473	uint32_t			next_gpadl_handle;
474
475	next_gpadl_handle = atomic_fetchadd_int(
476	    &hv_vmbus_g_connection.next_gpadl_handle, 1);
477
478	ret = vmbus_channel_create_gpadl_header(
479		contig_buffer, size, &msg_info, &msg_count);
480
481	if(ret != 0) {
482		/*
483		 * XXX
484		 * We can _not_ even revert the above incremental,
485		 * if multiple GPADL establishments are running
486		 * parallelly, decrement the global next_gpadl_handle
487		 * is calling for _big_ trouble.  A better solution
488		 * is to have a 0-based GPADL id bitmap ...
489		 */
490		return ret;
491	}
492
493	sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
494	gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
495	gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
496	gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
497	gpadl_msg->gpadl = next_gpadl_handle;
498
499	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
500	TAILQ_INSERT_TAIL(
501		&hv_vmbus_g_connection.channel_msg_anchor,
502		msg_info,
503		msg_list_entry);
504
505	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
506
507	ret = hv_vmbus_post_message(
508		gpadl_msg,
509		msg_info->message_size -
510		    (uint32_t) sizeof(hv_vmbus_channel_msg_info));
511
512	if (ret != 0)
513	    goto cleanup;
514
515	if (msg_count > 1) {
516	    TAILQ_FOREACH(curr,
517		    &msg_info->sub_msg_list_anchor, msg_list_entry) {
518		sub_msg_info = curr;
519		gpadl_body =
520		    (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
521
522		gpadl_body->header.message_type =
523		    HV_CHANNEL_MESSAGE_GPADL_BODY;
524		gpadl_body->gpadl = next_gpadl_handle;
525
526		ret = hv_vmbus_post_message(
527			gpadl_body,
528			sub_msg_info->message_size
529			    - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
530		 /* if (the post message failed) give up and clean up */
531		if(ret != 0)
532		    goto cleanup;
533	    }
534	}
535
536	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds*/
537	if (ret != 0)
538	    goto cleanup;
539
540	*gpadl_handle = gpadl_msg->gpadl;
541
542cleanup:
543
544	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
545	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
546		msg_info, msg_list_entry);
547	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
548
549	sema_destroy(&msg_info->wait_sema);
550	free(msg_info, M_DEVBUF);
551
552	return (ret);
553}
554
555/**
556 * @brief Teardown the specified GPADL handle
557 */
558int
559hv_vmbus_channel_teardown_gpdal(
560	hv_vmbus_channel*	channel,
561	uint32_t		gpadl_handle)
562{
563	int					ret = 0;
564	hv_vmbus_channel_gpadl_teardown*	msg;
565	hv_vmbus_channel_msg_info*		info;
566
567	info = (hv_vmbus_channel_msg_info *)
568		malloc(	sizeof(hv_vmbus_channel_msg_info) +
569			sizeof(hv_vmbus_channel_gpadl_teardown),
570				M_DEVBUF, M_NOWAIT);
571	KASSERT(info != NULL,
572	    ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
573	if (info == NULL) {
574	    ret = ENOMEM;
575	    goto cleanup;
576	}
577
578	sema_init(&info->wait_sema, 0, "Open Info Sema");
579
580	msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
581
582	msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
583	msg->child_rel_id = channel->offer_msg.child_rel_id;
584	msg->gpadl = gpadl_handle;
585
586	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
587	TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
588			info, msg_list_entry);
589	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
590
591	ret = hv_vmbus_post_message(msg,
592			sizeof(hv_vmbus_channel_gpadl_teardown));
593	if (ret != 0)
594	    goto cleanup;
595
596	ret = sema_timedwait(&info->wait_sema, 5 * hz); /* KYS 5 seconds */
597
598cleanup:
599	/*
600	 * Received a torndown response
601	 */
602	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
603	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
604			info, msg_list_entry);
605	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
606	sema_destroy(&info->wait_sema);
607	free(info, M_DEVBUF);
608
609	return (ret);
610}
611
612static void
613hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
614{
615	int ret = 0;
616	struct taskqueue *rxq = channel->rxq;
617	hv_vmbus_channel_close_channel* msg;
618	hv_vmbus_channel_msg_info* info;
619
620	channel->state = HV_CHANNEL_OPEN_STATE;
621	channel->sc_creation_callback = NULL;
622
623	/*
624	 * set rxq to NULL to avoid more requests be scheduled
625	 */
626	channel->rxq = NULL;
627	taskqueue_drain(rxq, &channel->channel_task);
628	channel->on_channel_callback = NULL;
629
630	/**
631	 * Send a closing message
632	 */
633	info = (hv_vmbus_channel_msg_info *)
634		malloc(	sizeof(hv_vmbus_channel_msg_info) +
635			sizeof(hv_vmbus_channel_close_channel),
636				M_DEVBUF, M_NOWAIT);
637	KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
638	if(info == NULL)
639	    return;
640
641	msg = (hv_vmbus_channel_close_channel*) info->msg;
642	msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
643	msg->child_rel_id = channel->offer_msg.child_rel_id;
644
645	ret = hv_vmbus_post_message(
646		msg, sizeof(hv_vmbus_channel_close_channel));
647
648	/* Tear down the gpadl for the channel's ring buffer */
649	if (channel->ring_buffer_gpadl_handle) {
650		hv_vmbus_channel_teardown_gpdal(channel,
651			channel->ring_buffer_gpadl_handle);
652	}
653
654	/* TODO: Send a msg to release the childRelId */
655
656	/* cleanup the ring buffers for this channel */
657	hv_ring_buffer_cleanup(&channel->outbound);
658	hv_ring_buffer_cleanup(&channel->inbound);
659
660	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
661	    M_DEVBUF);
662
663	free(info, M_DEVBUF);
664}
665
666/**
667 * @brief Close the specified channel
668 */
669void
670hv_vmbus_channel_close(hv_vmbus_channel *channel)
671{
672	hv_vmbus_channel*	sub_channel;
673
674	if (channel->primary_channel != NULL) {
675		/*
676		 * We only close multi-channels when the primary is
677		 * closed.
678		 */
679		return;
680	}
681
682	/*
683	 * Close all multi-channels first.
684	 */
685	TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
686	    sc_list_entry) {
687		if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
688			continue;
689		hv_vmbus_channel_close_internal(sub_channel);
690	}
691	/*
692	 * Then close the primary channel.
693	 */
694	hv_vmbus_channel_close_internal(channel);
695}
696
697/**
698 * @brief Send the specified buffer on the given channel
699 */
700int
701hv_vmbus_channel_send_packet(
702	hv_vmbus_channel*	channel,
703	void*			buffer,
704	uint32_t		buffer_len,
705	uint64_t		request_id,
706	hv_vmbus_packet_type	type,
707	uint32_t		flags)
708{
709	int			ret = 0;
710	hv_vm_packet_descriptor	desc;
711	uint32_t		packet_len;
712	uint64_t		aligned_data;
713	uint32_t		packet_len_aligned;
714	boolean_t		need_sig;
715	hv_vmbus_sg_buffer_list	buffer_list[3];
716
717	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
718	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
719	aligned_data = 0;
720
721	/* Setup the descriptor */
722	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
723	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
724			    /* in 8-bytes granularity */
725	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
726	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
727	desc.transaction_id = request_id;
728
729	buffer_list[0].data = &desc;
730	buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
731
732	buffer_list[1].data = buffer;
733	buffer_list[1].length = buffer_len;
734
735	buffer_list[2].data = &aligned_data;
736	buffer_list[2].length = packet_len_aligned - packet_len;
737
738	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
739	    &need_sig);
740
741	/* TODO: We should determine if this is optional */
742	if (ret == 0 && need_sig) {
743		vmbus_channel_set_event(channel);
744	}
745
746	return (ret);
747}
748
749/**
750 * @brief Send a range of single-page buffer packets using
751 * a GPADL Direct packet type
752 */
753int
754hv_vmbus_channel_send_packet_pagebuffer(
755	hv_vmbus_channel*	channel,
756	hv_vmbus_page_buffer	page_buffers[],
757	uint32_t		page_count,
758	void*			buffer,
759	uint32_t		buffer_len,
760	uint64_t		request_id)
761{
762
763	int					ret = 0;
764	boolean_t				need_sig;
765	uint32_t				packet_len;
766	uint32_t				page_buflen;
767	uint32_t				packetLen_aligned;
768	hv_vmbus_sg_buffer_list			buffer_list[4];
769	hv_vmbus_channel_packet_page_buffer	desc;
770	uint32_t				descSize;
771	uint64_t				alignedData = 0;
772
773	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
774		return (EINVAL);
775
776	/*
777	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
778	 *  is the largest size we support
779	 */
780	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
781	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
782	packet_len = descSize + page_buflen + buffer_len;
783	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
784
785	/* Setup the descriptor */
786	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
787	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
788	/* in 8-bytes granularity */
789	desc.data_offset8 = (descSize + page_buflen) >> 3;
790	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
791	desc.transaction_id = request_id;
792	desc.range_count = page_count;
793
794	buffer_list[0].data = &desc;
795	buffer_list[0].length = descSize;
796
797	buffer_list[1].data = page_buffers;
798	buffer_list[1].length = page_buflen;
799
800	buffer_list[2].data = buffer;
801	buffer_list[2].length = buffer_len;
802
803	buffer_list[3].data = &alignedData;
804	buffer_list[3].length = packetLen_aligned - packet_len;
805
806	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
807	    &need_sig);
808
809	/* TODO: We should determine if this is optional */
810	if (ret == 0 && need_sig) {
811		vmbus_channel_set_event(channel);
812	}
813
814	return (ret);
815}
816
817/**
818 * @brief Send a multi-page buffer packet using a GPADL Direct packet type
819 */
820int
821hv_vmbus_channel_send_packet_multipagebuffer(
822	hv_vmbus_channel*		channel,
823	hv_vmbus_multipage_buffer*	multi_page_buffer,
824	void*				buffer,
825	uint32_t			buffer_len,
826	uint64_t			request_id)
827{
828
829	int			ret = 0;
830	uint32_t		desc_size;
831	boolean_t		need_sig;
832	uint32_t		packet_len;
833	uint32_t		packet_len_aligned;
834	uint32_t		pfn_count;
835	uint64_t		aligned_data = 0;
836	hv_vmbus_sg_buffer_list	buffer_list[3];
837	hv_vmbus_channel_packet_multipage_buffer desc;
838
839	pfn_count =
840	    HV_NUM_PAGES_SPANNED(
841		    multi_page_buffer->offset,
842		    multi_page_buffer->length);
843
844	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
845	    return (EINVAL);
846	/*
847	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
848	 * is the largest size we support
849	 */
850	desc_size =
851	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
852		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
853			sizeof(uint64_t));
854	packet_len = desc_size + buffer_len;
855	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
856
857	/*
858	 * Setup the descriptor
859	 */
860	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
861	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
862	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
863	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
864	desc.transaction_id = request_id;
865	desc.range_count = 1;
866
867	desc.range.length = multi_page_buffer->length;
868	desc.range.offset = multi_page_buffer->offset;
869
870	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
871		pfn_count * sizeof(uint64_t));
872
873	buffer_list[0].data = &desc;
874	buffer_list[0].length = desc_size;
875
876	buffer_list[1].data = buffer;
877	buffer_list[1].length = buffer_len;
878
879	buffer_list[2].data = &aligned_data;
880	buffer_list[2].length = packet_len_aligned - packet_len;
881
882	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
883	    &need_sig);
884
885	/* TODO: We should determine if this is optional */
886	if (ret == 0 && need_sig) {
887	    vmbus_channel_set_event(channel);
888	}
889
890	return (ret);
891}
892
893/**
894 * @brief Retrieve the user packet on the specified channel
895 */
896int
897hv_vmbus_channel_recv_packet(
898	hv_vmbus_channel*	channel,
899	void*			Buffer,
900	uint32_t		buffer_len,
901	uint32_t*		buffer_actual_len,
902	uint64_t*		request_id)
903{
904	int			ret;
905	uint32_t		user_len;
906	uint32_t		packet_len;
907	hv_vm_packet_descriptor	desc;
908
909	*buffer_actual_len = 0;
910	*request_id = 0;
911
912	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
913		sizeof(hv_vm_packet_descriptor));
914	if (ret != 0)
915		return (0);
916
917	packet_len = desc.length8 << 3;
918	user_len = packet_len - (desc.data_offset8 << 3);
919
920	*buffer_actual_len = user_len;
921
922	if (user_len > buffer_len)
923		return (EINVAL);
924
925	*request_id = desc.transaction_id;
926
927	/* Copy over the packet to the user buffer */
928	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
929		(desc.data_offset8 << 3));
930
931	return (0);
932}
933
934/**
935 * @brief Retrieve the raw packet on the specified channel
936 */
937int
938hv_vmbus_channel_recv_packet_raw(
939	hv_vmbus_channel*	channel,
940	void*			buffer,
941	uint32_t		buffer_len,
942	uint32_t*		buffer_actual_len,
943	uint64_t*		request_id)
944{
945	int		ret;
946	uint32_t	packetLen;
947	hv_vm_packet_descriptor	desc;
948
949	*buffer_actual_len = 0;
950	*request_id = 0;
951
952	ret = hv_ring_buffer_peek(
953		&channel->inbound, &desc,
954		sizeof(hv_vm_packet_descriptor));
955
956	if (ret != 0)
957	    return (0);
958
959	packetLen = desc.length8 << 3;
960	*buffer_actual_len = packetLen;
961
962	if (packetLen > buffer_len)
963	    return (ENOBUFS);
964
965	*request_id = desc.transaction_id;
966
967	/* Copy over the entire packet to the user buffer */
968	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
969
970	return (0);
971}
972
973
974/**
975 * Process a channel event notification
976 */
977static void
978VmbusProcessChannelEvent(void* context, int pending)
979{
980	void* arg;
981	uint32_t bytes_to_read;
982	hv_vmbus_channel* channel = (hv_vmbus_channel*)context;
983	boolean_t is_batched_reading;
984
985	/**
986	 * Find the channel based on this relid and invokes
987	 * the channel callback to process the event
988	 */
989
990	if (channel == NULL) {
991		return;
992	}
993	/**
994	 * To deal with the race condition where we might
995	 * receive a packet while the relevant driver is
996	 * being unloaded, dispatch the callback while
997	 * holding the channel lock. The unloading driver
998	 * will acquire the same channel lock to set the
999	 * callback to NULL. This closes the window.
1000	 */
1001
1002	if (channel->on_channel_callback != NULL) {
1003		arg = channel->channel_callback_context;
1004		is_batched_reading = channel->batched_reading;
1005		/*
1006		 * Optimize host to guest signaling by ensuring:
1007		 * 1. While reading the channel, we disable interrupts from
1008		 *    host.
1009		 * 2. Ensure that we process all posted messages from the host
1010		 *    before returning from this callback.
1011		 * 3. Once we return, enable signaling from the host. Once this
1012		 *    state is set we check to see if additional packets are
1013		 *    available to read. In this case we repeat the process.
1014		 */
1015		do {
1016			if (is_batched_reading)
1017				hv_ring_buffer_read_begin(&channel->inbound);
1018
1019			channel->on_channel_callback(arg);
1020
1021			if (is_batched_reading)
1022				bytes_to_read =
1023				    hv_ring_buffer_read_end(&channel->inbound);
1024			else
1025				bytes_to_read = 0;
1026		} while (is_batched_reading && (bytes_to_read != 0));
1027	}
1028}
1029