vmbus.c revision 307278
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/dev/hyperv/vmbus/vmbus.c 307278 2016-10-14 07:27:29Z sephe $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/proc.h>
42#include <sys/sysctl.h>
43#include <sys/syslog.h>
44#include <sys/systm.h>
45#include <sys/rtprio.h>
46#include <sys/interrupt.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49#include <sys/mutex.h>
50#include <sys/smp.h>
51
52#include <machine/resource.h>
53#include <sys/rman.h>
54
55#include <machine/stdarg.h>
56#include <machine/intr_machdep.h>
57#include <machine/md_var.h>
58#include <machine/segments.h>
59#include <sys/pcpu.h>
60#include <x86/apicvar.h>
61
62#include <dev/hyperv/include/hyperv.h>
63#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
64#include <dev/hyperv/vmbus/hyperv_reg.h>
65#include <dev/hyperv/vmbus/hyperv_var.h>
66#include <dev/hyperv/vmbus/vmbus_reg.h>
67#include <dev/hyperv/vmbus/vmbus_var.h>
68
69#include <contrib/dev/acpica/include/acpi.h>
70#include "acpi_if.h"
71
72/*
73 * NOTE: DO NOT CHANGE THESE
74 */
75#define VMBUS_CONNID_MESSAGE		1
76#define VMBUS_CONNID_EVENT		2
77
78struct vmbus_msghc {
79	struct hypercall_postmsg_in	*mh_inprm;
80	struct hypercall_postmsg_in	mh_inprm_save;
81	struct hyperv_dma		mh_inprm_dma;
82
83	struct vmbus_message		*mh_resp;
84	struct vmbus_message		mh_resp0;
85};
86
87struct vmbus_msghc_ctx {
88	struct vmbus_msghc		*mhc_free;
89	struct mtx			mhc_free_lock;
90	uint32_t			mhc_flags;
91
92	struct vmbus_msghc		*mhc_active;
93	struct mtx			mhc_active_lock;
94};
95
96#define VMBUS_MSGHC_CTXF_DESTROY	0x0001
97
98static int			vmbus_init(struct vmbus_softc *);
99static int			vmbus_init_contact(struct vmbus_softc *,
100				    uint32_t);
101
102static struct vmbus_msghc_ctx	*vmbus_msghc_ctx_create(bus_dma_tag_t);
103static void			vmbus_msghc_ctx_destroy(
104				    struct vmbus_msghc_ctx *);
105static void			vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *);
106static struct vmbus_msghc	*vmbus_msghc_alloc(bus_dma_tag_t);
107static void			vmbus_msghc_free(struct vmbus_msghc *);
108static struct vmbus_msghc	*vmbus_msghc_get1(struct vmbus_msghc_ctx *,
109				    uint32_t);
110
111struct vmbus_softc	*vmbus_sc;
112
113extern inthand_t IDTVEC(vmbus_isr);
114
115static const uint32_t		vmbus_version[] = {
116	HV_VMBUS_VERSION_WIN8_1,
117	HV_VMBUS_VERSION_WIN8,
118	HV_VMBUS_VERSION_WIN7,
119	HV_VMBUS_VERSION_WS2008
120};
121
122static struct vmbus_msghc *
123vmbus_msghc_alloc(bus_dma_tag_t parent_dtag)
124{
125	struct vmbus_msghc *mh;
126
127	mh = malloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO);
128
129	mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag,
130	    HYPERCALL_POSTMSGIN_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE,
131	    &mh->mh_inprm_dma, BUS_DMA_WAITOK);
132	if (mh->mh_inprm == NULL) {
133		free(mh, M_DEVBUF);
134		return NULL;
135	}
136	return mh;
137}
138
139static void
140vmbus_msghc_free(struct vmbus_msghc *mh)
141{
142	hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm);
143	free(mh, M_DEVBUF);
144}
145
146static void
147vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc)
148{
149	KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall"));
150	KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg"));
151
152	mtx_destroy(&mhc->mhc_free_lock);
153	mtx_destroy(&mhc->mhc_active_lock);
154	free(mhc, M_DEVBUF);
155}
156
157static struct vmbus_msghc_ctx *
158vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag)
159{
160	struct vmbus_msghc_ctx *mhc;
161
162	mhc = malloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO);
163	mtx_init(&mhc->mhc_free_lock, "vmbus msghc free", NULL, MTX_DEF);
164	mtx_init(&mhc->mhc_active_lock, "vmbus msghc act", NULL, MTX_DEF);
165
166	mhc->mhc_free = vmbus_msghc_alloc(parent_dtag);
167	if (mhc->mhc_free == NULL) {
168		vmbus_msghc_ctx_free(mhc);
169		return NULL;
170	}
171	return mhc;
172}
173
174static struct vmbus_msghc *
175vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag)
176{
177	struct vmbus_msghc *mh;
178
179	mtx_lock(&mhc->mhc_free_lock);
180
181	while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL) {
182		mtx_sleep(&mhc->mhc_free, &mhc->mhc_free_lock, 0,
183		    "gmsghc", 0);
184	}
185	if (mhc->mhc_flags & dtor_flag) {
186		/* Being destroyed */
187		mh = NULL;
188	} else {
189		mh = mhc->mhc_free;
190		KASSERT(mh != NULL, ("no free hypercall msg"));
191		KASSERT(mh->mh_resp == NULL,
192		    ("hypercall msg has pending response"));
193		mhc->mhc_free = NULL;
194	}
195
196	mtx_unlock(&mhc->mhc_free_lock);
197
198	return mh;
199}
200
201struct vmbus_msghc *
202vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
203{
204	struct hypercall_postmsg_in *inprm;
205	struct vmbus_msghc *mh;
206
207	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
208		return NULL;
209
210	mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY);
211	if (mh == NULL)
212		return NULL;
213
214	inprm = mh->mh_inprm;
215	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
216	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
217	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
218	inprm->hc_dsize = dsize;
219
220	return mh;
221}
222
223void
224vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh)
225{
226	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
227
228	KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active"));
229	mh->mh_resp = NULL;
230
231	mtx_lock(&mhc->mhc_free_lock);
232	KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg"));
233	mhc->mhc_free = mh;
234	mtx_unlock(&mhc->mhc_free_lock);
235	wakeup(&mhc->mhc_free);
236}
237
238void *
239vmbus_msghc_dataptr(struct vmbus_msghc *mh)
240{
241	return mh->mh_inprm->hc_data;
242}
243
244static void
245vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc)
246{
247	struct vmbus_msghc *mh;
248
249	mtx_lock(&mhc->mhc_free_lock);
250	mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY;
251	mtx_unlock(&mhc->mhc_free_lock);
252	wakeup(&mhc->mhc_free);
253
254	mh = vmbus_msghc_get1(mhc, 0);
255	if (mh == NULL)
256		panic("can't get msghc");
257
258	vmbus_msghc_free(mh);
259	vmbus_msghc_ctx_free(mhc);
260}
261
262int
263vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
264{
265	sbintime_t time = SBT_1MS;
266	int i;
267
268	/*
269	 * Save the input parameter so that we could restore the input
270	 * parameter if the Hypercall failed.
271	 *
272	 * XXX
273	 * Is this really necessary?!  i.e. Will the Hypercall ever
274	 * overwrite the input parameter?
275	 */
276	memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE);
277
278	/*
279	 * In order to cope with transient failures, e.g. insufficient
280	 * resources on host side, we retry the post message Hypercall
281	 * several times.  20 retries seem sufficient.
282	 */
283#define HC_RETRY_MAX	20
284
285	for (i = 0; i < HC_RETRY_MAX; ++i) {
286		uint64_t status;
287
288		status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr);
289		if (status == HYPERCALL_STATUS_SUCCESS)
290			return 0;
291
292		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
293		if (time < SBT_1S * 2)
294			time *= 2;
295
296		/* Restore input parameter and try again */
297		memcpy(mh->mh_inprm, &mh->mh_inprm_save,
298		    HYPERCALL_POSTMSGIN_SIZE);
299	}
300
301#undef HC_RETRY_MAX
302
303	return EIO;
304}
305
306int
307vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh)
308{
309	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
310	int error;
311
312	KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response"));
313
314	mtx_lock(&mhc->mhc_active_lock);
315	KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall"));
316	mhc->mhc_active = mh;
317	mtx_unlock(&mhc->mhc_active_lock);
318
319	error = vmbus_msghc_exec_noresult(mh);
320	if (error) {
321		mtx_lock(&mhc->mhc_active_lock);
322		KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
323		mhc->mhc_active = NULL;
324		mtx_unlock(&mhc->mhc_active_lock);
325	}
326	return error;
327}
328
329const struct vmbus_message *
330vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh)
331{
332	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
333
334	mtx_lock(&mhc->mhc_active_lock);
335
336	KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
337	while (mh->mh_resp == NULL) {
338		mtx_sleep(&mhc->mhc_active, &mhc->mhc_active_lock, 0,
339		    "wmsghc", 0);
340	}
341	mhc->mhc_active = NULL;
342
343	mtx_unlock(&mhc->mhc_active_lock);
344
345	return mh->mh_resp;
346}
347
348void
349vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
350{
351	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
352	struct vmbus_msghc *mh;
353
354	mtx_lock(&mhc->mhc_active_lock);
355
356	mh = mhc->mhc_active;
357	KASSERT(mh != NULL, ("no pending msg hypercall"));
358	memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0));
359	mh->mh_resp = &mh->mh_resp0;
360
361	mtx_unlock(&mhc->mhc_active_lock);
362	wakeup(&mhc->mhc_active);
363}
364
365static int
366vmbus_init_contact(struct vmbus_softc *sc, uint32_t version)
367{
368	struct vmbus_chanmsg_init_contact *req;
369	const struct vmbus_chanmsg_version_resp *resp;
370	const struct vmbus_message *msg;
371	struct vmbus_msghc *mh;
372	int error, supp = 0;
373
374	mh = vmbus_msghc_get(sc, sizeof(*req));
375	if (mh == NULL)
376		return ENXIO;
377
378	req = vmbus_msghc_dataptr(mh);
379	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_INIT_CONTACT;
380	req->chm_ver = version;
381	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
382	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
383	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
384
385	error = vmbus_msghc_exec(sc, mh);
386	if (error) {
387		vmbus_msghc_put(sc, mh);
388		return error;
389	}
390
391	msg = vmbus_msghc_wait_result(sc, mh);
392	resp = (const struct vmbus_chanmsg_version_resp *)msg->msg_data;
393	supp = resp->chm_supp;
394
395	vmbus_msghc_put(sc, mh);
396
397	return (supp ? 0 : EOPNOTSUPP);
398}
399
400static int
401vmbus_init(struct vmbus_softc *sc)
402{
403	int i;
404
405	for (i = 0; i < nitems(vmbus_version); ++i) {
406		int error;
407
408		error = vmbus_init_contact(sc, vmbus_version[i]);
409		if (!error) {
410			hv_vmbus_protocal_version = vmbus_version[i];
411			device_printf(sc->vmbus_dev, "version %u.%u\n",
412			    (hv_vmbus_protocal_version >> 16),
413			    (hv_vmbus_protocal_version & 0xffff));
414			return 0;
415		}
416	}
417	return ENXIO;
418}
419
420static void
421vmbus_msg_task(void *xsc, int pending __unused)
422{
423	struct vmbus_softc *sc = xsc;
424	volatile struct vmbus_message *msg;
425
426	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
427	for (;;) {
428		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
429			/* No message */
430			break;
431		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
432			/* Channel message */
433			vmbus_chan_msgproc(sc,
434			    __DEVOLATILE(const struct vmbus_message *, msg));
435		}
436
437		msg->msg_type = HYPERV_MSGTYPE_NONE;
438		/*
439		 * Make sure the write to msg_type (i.e. set to
440		 * HYPERV_MSGTYPE_NONE) happens before we read the
441		 * msg_flags and EOMing. Otherwise, the EOMing will
442		 * not deliver any more messages since there is no
443		 * empty slot
444		 *
445		 * NOTE:
446		 * mb() is used here, since atomic_thread_fence_seq_cst()
447		 * will become compiler fence on UP kernel.
448		 */
449		mb();
450		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
451			/*
452			 * This will cause message queue rescan to possibly
453			 * deliver another msg from the hypervisor
454			 */
455			wrmsr(MSR_HV_EOM, 0);
456		}
457	}
458}
459
460static __inline int
461vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
462{
463	volatile struct vmbus_message *msg;
464	struct vmbus_message *msg_base;
465
466	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
467
468	/*
469	 * Check event timer.
470	 *
471	 * TODO: move this to independent IDT vector.
472	 */
473	msg = msg_base + VMBUS_SINT_TIMER;
474	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
475		msg->msg_type = HYPERV_MSGTYPE_NONE;
476
477		vmbus_et_intr(frame);
478
479		/*
480		 * Make sure the write to msg_type (i.e. set to
481		 * HYPERV_MSGTYPE_NONE) happens before we read the
482		 * msg_flags and EOMing. Otherwise, the EOMing will
483		 * not deliver any more messages since there is no
484		 * empty slot
485		 *
486		 * NOTE:
487		 * mb() is used here, since atomic_thread_fence_seq_cst()
488		 * will become compiler fence on UP kernel.
489		 */
490		mb();
491		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
492			/*
493			 * This will cause message queue rescan to possibly
494			 * deliver another msg from the hypervisor
495			 */
496			wrmsr(MSR_HV_EOM, 0);
497		}
498	}
499
500	/*
501	 * Check events.  Hot path for network and storage I/O data; high rate.
502	 *
503	 * NOTE:
504	 * As recommended by the Windows guest fellows, we check events before
505	 * checking messages.
506	 */
507	sc->vmbus_event_proc(sc, cpu);
508
509	/*
510	 * Check messages.  Mainly management stuffs; ultra low rate.
511	 */
512	msg = msg_base + VMBUS_SINT_MESSAGE;
513	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
514		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
515		    VMBUS_PCPU_PTR(sc, message_task, cpu));
516	}
517
518	return (FILTER_HANDLED);
519}
520
521void
522vmbus_handle_intr(struct trapframe *trap_frame)
523{
524	struct vmbus_softc *sc = vmbus_get_softc();
525	int cpu = curcpu;
526
527	/*
528	 * Disable preemption.
529	 */
530	critical_enter();
531
532	/*
533	 * Do a little interrupt counting.
534	 */
535	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
536
537	vmbus_handle_intr1(sc, trap_frame, cpu);
538
539	/*
540	 * Enable preemption.
541	 */
542	critical_exit();
543}
544
545static void
546vmbus_synic_setup(void *xsc)
547{
548	struct vmbus_softc *sc = xsc;
549	int cpu = curcpu;
550	uint64_t val, orig;
551	uint32_t sint;
552
553	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
554		/*
555		 * Save virtual processor id.
556		 */
557		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
558	} else {
559		/*
560		 * XXX
561		 * Virtual processoor id is only used by a pretty broken
562		 * channel selection code from storvsc.  It's nothing
563		 * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
564		 * moving on.
565		 */
566		VMBUS_PCPU_GET(sc, vcpuid, cpu) = cpu;
567	}
568
569	/*
570	 * Setup the SynIC message.
571	 */
572	orig = rdmsr(MSR_HV_SIMP);
573	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
574	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
575	     MSR_HV_SIMP_PGSHIFT);
576	wrmsr(MSR_HV_SIMP, val);
577
578	/*
579	 * Setup the SynIC event flags.
580	 */
581	orig = rdmsr(MSR_HV_SIEFP);
582	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
583	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
584	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
585	wrmsr(MSR_HV_SIEFP, val);
586
587
588	/*
589	 * Configure and unmask SINT for message and event flags.
590	 */
591	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
592	orig = rdmsr(sint);
593	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
594	    (orig & MSR_HV_SINT_RSVD_MASK);
595	wrmsr(sint, val);
596
597	/*
598	 * Configure and unmask SINT for timer.
599	 */
600	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
601	orig = rdmsr(sint);
602	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
603	    (orig & MSR_HV_SINT_RSVD_MASK);
604	wrmsr(sint, val);
605
606	/*
607	 * All done; enable SynIC.
608	 */
609	orig = rdmsr(MSR_HV_SCONTROL);
610	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
611	wrmsr(MSR_HV_SCONTROL, val);
612}
613
614static void
615vmbus_synic_teardown(void *arg)
616{
617	uint64_t orig;
618	uint32_t sint;
619
620	/*
621	 * Disable SynIC.
622	 */
623	orig = rdmsr(MSR_HV_SCONTROL);
624	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
625
626	/*
627	 * Mask message and event flags SINT.
628	 */
629	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
630	orig = rdmsr(sint);
631	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
632
633	/*
634	 * Mask timer SINT.
635	 */
636	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
637	orig = rdmsr(sint);
638	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
639
640	/*
641	 * Teardown SynIC message.
642	 */
643	orig = rdmsr(MSR_HV_SIMP);
644	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
645
646	/*
647	 * Teardown SynIC event flags.
648	 */
649	orig = rdmsr(MSR_HV_SIEFP);
650	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
651}
652
653static int
654vmbus_dma_alloc(struct vmbus_softc *sc)
655{
656	bus_dma_tag_t parent_dtag;
657	uint8_t *evtflags;
658	int cpu;
659
660	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
661	CPU_FOREACH(cpu) {
662		void *ptr;
663
664		/*
665		 * Per-cpu messages and event flags.
666		 */
667		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
668		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
669		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
670		if (ptr == NULL)
671			return ENOMEM;
672		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
673
674		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
675		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
676		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
677		if (ptr == NULL)
678			return ENOMEM;
679		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
680	}
681
682	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
683	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
684	if (evtflags == NULL)
685		return ENOMEM;
686	sc->vmbus_rx_evtflags = (u_long *)evtflags;
687	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
688	sc->vmbus_evtflags = evtflags;
689
690	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
691	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
692	if (sc->vmbus_mnf1 == NULL)
693		return ENOMEM;
694
695	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
696	    PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
697	if (sc->vmbus_mnf2 == NULL)
698		return ENOMEM;
699
700	return 0;
701}
702
703static void
704vmbus_dma_free(struct vmbus_softc *sc)
705{
706	int cpu;
707
708	if (sc->vmbus_evtflags != NULL) {
709		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
710		sc->vmbus_evtflags = NULL;
711		sc->vmbus_rx_evtflags = NULL;
712		sc->vmbus_tx_evtflags = NULL;
713	}
714	if (sc->vmbus_mnf1 != NULL) {
715		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
716		sc->vmbus_mnf1 = NULL;
717	}
718	if (sc->vmbus_mnf2 != NULL) {
719		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
720		sc->vmbus_mnf2 = NULL;
721	}
722
723	CPU_FOREACH(cpu) {
724		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
725			hyperv_dmamem_free(
726			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
727			    VMBUS_PCPU_GET(sc, message, cpu));
728			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
729		}
730		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
731			hyperv_dmamem_free(
732			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
733			    VMBUS_PCPU_GET(sc, event_flags, cpu));
734			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
735		}
736	}
737}
738
739static int
740vmbus_intr_setup(struct vmbus_softc *sc)
741{
742	int cpu;
743
744	CPU_FOREACH(cpu) {
745		char buf[MAXCOMLEN + 1];
746		cpuset_t cpu_mask;
747
748		/* Allocate an interrupt counter for Hyper-V interrupt */
749		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
750		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
751
752		/*
753		 * Setup taskqueue to handle events.  Task will be per-
754		 * channel.
755		 */
756		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
757		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
758		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
759		CPU_SETOF(cpu, &cpu_mask);
760		taskqueue_start_threads_cpuset(
761		    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
762		    "hvevent%d", cpu);
763
764		/*
765		 * Setup tasks and taskqueues to handle messages.
766		 */
767		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
768		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
769		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
770		CPU_SETOF(cpu, &cpu_mask);
771		taskqueue_start_threads_cpuset(
772		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
773		    "hvmsg%d", cpu);
774		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
775		    vmbus_msg_task, sc);
776	}
777
778	/*
779	 * All Hyper-V ISR required resources are setup, now let's find a
780	 * free IDT vector for Hyper-V ISR and set it up.
781	 */
782	sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
783	if (sc->vmbus_idtvec < 0) {
784		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
785		return ENXIO;
786	}
787	if(bootverbose) {
788		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
789		    sc->vmbus_idtvec);
790	}
791	return 0;
792}
793
794static void
795vmbus_intr_teardown(struct vmbus_softc *sc)
796{
797	int cpu;
798
799	if (sc->vmbus_idtvec >= 0) {
800		lapic_ipi_free(sc->vmbus_idtvec);
801		sc->vmbus_idtvec = -1;
802	}
803
804	CPU_FOREACH(cpu) {
805		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
806			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
807			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
808		}
809		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
810			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
811			    VMBUS_PCPU_PTR(sc, message_task, cpu));
812			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
813			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
814		}
815	}
816}
817
818static int
819vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
820{
821	struct hv_device *child_dev_ctx = device_get_ivars(child);
822
823	switch (index) {
824	case HV_VMBUS_IVAR_TYPE:
825		*result = (uintptr_t)&child_dev_ctx->class_id;
826		return (0);
827
828	case HV_VMBUS_IVAR_INSTANCE:
829		*result = (uintptr_t)&child_dev_ctx->device_id;
830		return (0);
831
832	case HV_VMBUS_IVAR_DEVCTX:
833		*result = (uintptr_t)child_dev_ctx;
834		return (0);
835
836	case HV_VMBUS_IVAR_NODE:
837		*result = (uintptr_t)child_dev_ctx->device;
838		return (0);
839	}
840	return (ENOENT);
841}
842
843static int
844vmbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
845{
846	switch (index) {
847	case HV_VMBUS_IVAR_TYPE:
848	case HV_VMBUS_IVAR_INSTANCE:
849	case HV_VMBUS_IVAR_DEVCTX:
850	case HV_VMBUS_IVAR_NODE:
851		/* read-only */
852		return (EINVAL);
853	}
854	return (ENOENT);
855}
856
857static int
858vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
859{
860	struct hv_device *dev_ctx = device_get_ivars(child);
861	char guidbuf[HYPERV_GUID_STRLEN];
862
863	if (dev_ctx == NULL)
864		return (0);
865
866	strlcat(buf, "classid=", buflen);
867	hyperv_guid2str(&dev_ctx->class_id, guidbuf, sizeof(guidbuf));
868	strlcat(buf, guidbuf, buflen);
869
870	strlcat(buf, " deviceid=", buflen);
871	hyperv_guid2str(&dev_ctx->device_id, guidbuf, sizeof(guidbuf));
872	strlcat(buf, guidbuf, buflen);
873
874	return (0);
875}
876
877struct hv_device *
878hv_vmbus_child_device_create(hv_guid type, hv_guid instance,
879    hv_vmbus_channel *channel)
880{
881	hv_device *child_dev;
882
883	/*
884	 * Allocate the new child device
885	 */
886	child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_WAITOK | M_ZERO);
887
888	child_dev->channel = channel;
889	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
890	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
891
892	return (child_dev);
893}
894
895int
896hv_vmbus_child_device_register(struct hv_device *child_dev)
897{
898	device_t child, parent;
899
900	parent = vmbus_get_device();
901	if (bootverbose) {
902		char name[HYPERV_GUID_STRLEN];
903
904		hyperv_guid2str(&child_dev->class_id, name, sizeof(name));
905		device_printf(parent, "add device, classid: %s\n", name);
906	}
907
908	child = device_add_child(parent, NULL, -1);
909	child_dev->device = child;
910	device_set_ivars(child, child_dev);
911
912	return (0);
913}
914
915int
916hv_vmbus_child_device_unregister(struct hv_device *child_dev)
917{
918	int ret = 0;
919	/*
920	 * XXXKYS: Ensure that this is the opposite of
921	 * device_add_child()
922	 */
923	mtx_lock(&Giant);
924	ret = device_delete_child(vmbus_get_device(), child_dev->device);
925	mtx_unlock(&Giant);
926	return(ret);
927}
928
929static int
930vmbus_probe(device_t dev)
931{
932	char *id[] = { "VMBUS", NULL };
933
934	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
935	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
936	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
937		return (ENXIO);
938
939	device_set_desc(dev, "Hyper-V Vmbus");
940
941	return (BUS_PROBE_DEFAULT);
942}
943
944/**
945 * @brief Main vmbus driver initialization routine.
946 *
947 * Here, we
948 * - initialize the vmbus driver context
949 * - setup various driver entry points
950 * - invoke the vmbus hv main init routine
951 * - get the irq resource
952 * - invoke the vmbus to add the vmbus root device
953 * - setup the vmbus root device
954 * - retrieve the channel offers
955 */
956static int
957vmbus_bus_init(void)
958{
959	struct vmbus_softc *sc = vmbus_get_softc();
960	int ret;
961
962	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
963		return (0);
964	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
965
966	/*
967	 * Create context for "post message" Hypercalls
968	 */
969	sc->vmbus_msg_hc = vmbus_msghc_ctx_create(
970	    bus_get_dma_tag(sc->vmbus_dev));
971	if (sc->vmbus_msg_hc == NULL) {
972		ret = ENXIO;
973		goto cleanup;
974	}
975
976	/*
977	 * Allocate DMA stuffs.
978	 */
979	ret = vmbus_dma_alloc(sc);
980	if (ret != 0)
981		goto cleanup;
982
983	/*
984	 * Setup interrupt.
985	 */
986	ret = vmbus_intr_setup(sc);
987	if (ret != 0)
988		goto cleanup;
989
990	/*
991	 * Setup SynIC.
992	 */
993	if (bootverbose)
994		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
995	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
996	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
997
998	/*
999	 * Connect to VMBus in the root partition
1000	 */
1001	ret = hv_vmbus_connect(sc);
1002	if (ret != 0)
1003		goto cleanup;
1004
1005	ret = vmbus_init(sc);
1006	if (ret != 0)
1007		goto cleanup;
1008
1009	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
1010	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)
1011		sc->vmbus_event_proc = vmbus_event_proc_compat;
1012	else
1013		sc->vmbus_event_proc = vmbus_event_proc;
1014
1015	hv_vmbus_request_channel_offers();
1016
1017	vmbus_scan();
1018	bus_generic_attach(sc->vmbus_dev);
1019	device_printf(sc->vmbus_dev, "device scan, probe and attach done\n");
1020
1021	return (ret);
1022
1023cleanup:
1024	vmbus_intr_teardown(sc);
1025	vmbus_dma_free(sc);
1026	if (sc->vmbus_msg_hc != NULL) {
1027		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
1028		sc->vmbus_msg_hc = NULL;
1029	}
1030
1031	return (ret);
1032}
1033
1034static void
1035vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1036{
1037}
1038
1039static int
1040vmbus_attach(device_t dev)
1041{
1042	vmbus_sc = device_get_softc(dev);
1043	vmbus_sc->vmbus_dev = dev;
1044	vmbus_sc->vmbus_idtvec = -1;
1045
1046	/*
1047	 * Event processing logic will be configured:
1048	 * - After the vmbus protocol version negotiation.
1049	 * - Before we request channel offers.
1050	 */
1051	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1052
1053#ifndef EARLY_AP_STARTUP
1054	/*
1055	 * If the system has already booted and thread
1056	 * scheduling is possible indicated by the global
1057	 * cold set to zero, we just call the driver
1058	 * initialization directly.
1059	 */
1060	if (!cold)
1061#endif
1062		vmbus_bus_init();
1063
1064	bus_generic_probe(dev);
1065	return (0);
1066}
1067
1068static void
1069vmbus_sysinit(void *arg __unused)
1070{
1071	if (vm_guest != VM_GUEST_HV || vmbus_get_softc() == NULL)
1072		return;
1073
1074#ifndef EARLY_AP_STARTUP
1075	/*
1076	 * If the system has already booted and thread
1077	 * scheduling is possible, as indicated by the
1078	 * global cold set to zero, we just call the driver
1079	 * initialization directly.
1080	 */
1081	if (!cold)
1082#endif
1083		vmbus_bus_init();
1084}
1085
1086static int
1087vmbus_detach(device_t dev)
1088{
1089	struct vmbus_softc *sc = device_get_softc(dev);
1090
1091	hv_vmbus_release_unattached_channels();
1092	hv_vmbus_disconnect();
1093
1094	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1095		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1096		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1097	}
1098
1099	vmbus_intr_teardown(sc);
1100	vmbus_dma_free(sc);
1101
1102	if (sc->vmbus_msg_hc != NULL) {
1103		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
1104		sc->vmbus_msg_hc = NULL;
1105	}
1106
1107	return (0);
1108}
1109
1110static device_method_t vmbus_methods[] = {
1111	/* Device interface */
1112	DEVMETHOD(device_probe,			vmbus_probe),
1113	DEVMETHOD(device_attach,		vmbus_attach),
1114	DEVMETHOD(device_detach,		vmbus_detach),
1115	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
1116	DEVMETHOD(device_suspend,		bus_generic_suspend),
1117	DEVMETHOD(device_resume,		bus_generic_resume),
1118
1119	/* Bus interface */
1120	DEVMETHOD(bus_add_child,		bus_generic_add_child),
1121	DEVMETHOD(bus_print_child,		bus_generic_print_child),
1122	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
1123	DEVMETHOD(bus_write_ivar,		vmbus_write_ivar),
1124	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
1125
1126	DEVMETHOD_END
1127};
1128
1129static driver_t vmbus_driver = {
1130	"vmbus",
1131	vmbus_methods,
1132	sizeof(struct vmbus_softc)
1133};
1134
1135static devclass_t vmbus_devclass;
1136
1137DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
1138MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
1139MODULE_VERSION(vmbus, 1);
1140
1141#ifndef EARLY_AP_STARTUP
1142/*
1143 * NOTE:
1144 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1145 * initialized.
1146 */
1147SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1148#endif
1149