vmbus.c revision 307164
1/*-
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * VM Bus Driver Implementation
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus.c 307164 2016-10-13 02:28:40Z sephe $");
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/mutex.h>
42#include <sys/proc.h>
43#include <sys/smp.h>
44#include <sys/sysctl.h>
45#include <sys/systm.h>
46#include <sys/taskqueue.h>
47
48#include <machine/intr_machdep.h>
49#include <machine/apicvar.h>
50#include <machine/md_var.h>
51
52#include <contrib/dev/acpica/include/acpi.h>
53
54#include <dev/hyperv/include/hyperv.h>
55#include <dev/hyperv/include/vmbus_xact.h>
56#include <dev/hyperv/vmbus/hyperv_reg.h>
57#include <dev/hyperv/vmbus/hyperv_var.h>
58#include <dev/hyperv/vmbus/vmbus_reg.h>
59#include <dev/hyperv/vmbus/vmbus_var.h>
60#include <dev/hyperv/vmbus/vmbus_chanvar.h>
61
62#include "acpi_if.h"
63#include "vmbus_if.h"
64
65#define VMBUS_GPADL_START		0xe1e10
66
67struct vmbus_msghc {
68	struct vmbus_xact		*mh_xact;
69	struct hypercall_postmsg_in	mh_inprm_save;
70};
71
72static int			vmbus_probe(device_t);
73static int			vmbus_attach(device_t);
74static int			vmbus_detach(device_t);
75static int			vmbus_read_ivar(device_t, device_t, int,
76				    uintptr_t *);
77static int			vmbus_child_pnpinfo_str(device_t, device_t,
78				    char *, size_t);
79static uint32_t			vmbus_get_version_method(device_t, device_t);
80static int			vmbus_probe_guid_method(device_t, device_t,
81				    const struct hyperv_guid *);
82
83static int			vmbus_init(struct vmbus_softc *);
84static int			vmbus_connect(struct vmbus_softc *, uint32_t);
85static int			vmbus_req_channels(struct vmbus_softc *sc);
86static void			vmbus_disconnect(struct vmbus_softc *);
87static int			vmbus_scan(struct vmbus_softc *);
88static void			vmbus_scan_wait(struct vmbus_softc *);
89static void			vmbus_scan_newchan(struct vmbus_softc *);
90static void			vmbus_scan_newdev(struct vmbus_softc *);
91static void			vmbus_scan_done(struct vmbus_softc *,
92				    const struct vmbus_message *);
93static void			vmbus_chanmsg_handle(struct vmbus_softc *,
94				    const struct vmbus_message *);
95static void			vmbus_msg_task(void *, int);
96static void			vmbus_synic_setup(void *);
97static void			vmbus_synic_teardown(void *);
98static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
99static int			vmbus_dma_alloc(struct vmbus_softc *);
100static void			vmbus_dma_free(struct vmbus_softc *);
101static int			vmbus_intr_setup(struct vmbus_softc *);
102static void			vmbus_intr_teardown(struct vmbus_softc *);
103static int			vmbus_doattach(struct vmbus_softc *);
104static void			vmbus_event_proc_dummy(struct vmbus_softc *,
105				    int);
106
107static struct vmbus_softc	*vmbus_sc;
108
109extern inthand_t IDTVEC(rsvd), IDTVEC(vmbus_isr);
110
111static const uint32_t		vmbus_version[] = {
112	VMBUS_VERSION_WIN8_1,
113	VMBUS_VERSION_WIN8,
114	VMBUS_VERSION_WIN7,
115	VMBUS_VERSION_WS2008
116};
117
118static const vmbus_chanmsg_proc_t
119vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
120	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
121	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
122};
123
124static device_method_t vmbus_methods[] = {
125	/* Device interface */
126	DEVMETHOD(device_probe,			vmbus_probe),
127	DEVMETHOD(device_attach,		vmbus_attach),
128	DEVMETHOD(device_detach,		vmbus_detach),
129	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
130	DEVMETHOD(device_suspend,		bus_generic_suspend),
131	DEVMETHOD(device_resume,		bus_generic_resume),
132
133	/* Bus interface */
134	DEVMETHOD(bus_add_child,		bus_generic_add_child),
135	DEVMETHOD(bus_print_child,		bus_generic_print_child),
136	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
137	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
138
139	/* Vmbus interface */
140	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
141	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
142
143	DEVMETHOD_END
144};
145
146static driver_t vmbus_driver = {
147	"vmbus",
148	vmbus_methods,
149	sizeof(struct vmbus_softc)
150};
151
152static devclass_t vmbus_devclass;
153
154DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
155MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
156MODULE_VERSION(vmbus, 1);
157
158static __inline struct vmbus_softc *
159vmbus_get_softc(void)
160{
161	return vmbus_sc;
162}
163
164void
165vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
166{
167	struct hypercall_postmsg_in *inprm;
168
169	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
170		panic("invalid data size %zu", dsize);
171
172	inprm = vmbus_xact_req_data(mh->mh_xact);
173	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
174	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
175	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
176	inprm->hc_dsize = dsize;
177}
178
179struct vmbus_msghc *
180vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
181{
182	struct vmbus_msghc *mh;
183	struct vmbus_xact *xact;
184
185	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
186		panic("invalid data size %zu", dsize);
187
188	xact = vmbus_xact_get(sc->vmbus_xc,
189	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
190	if (xact == NULL)
191		return (NULL);
192
193	mh = vmbus_xact_priv(xact, sizeof(*mh));
194	mh->mh_xact = xact;
195
196	vmbus_msghc_reset(mh, dsize);
197	return (mh);
198}
199
200void
201vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
202{
203
204	vmbus_xact_put(mh->mh_xact);
205}
206
207void *
208vmbus_msghc_dataptr(struct vmbus_msghc *mh)
209{
210	struct hypercall_postmsg_in *inprm;
211
212	inprm = vmbus_xact_req_data(mh->mh_xact);
213	return (inprm->hc_data);
214}
215
216int
217vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
218{
219	sbintime_t time = SBT_1MS;
220	struct hypercall_postmsg_in *inprm;
221	bus_addr_t inprm_paddr;
222	int i;
223
224	inprm = vmbus_xact_req_data(mh->mh_xact);
225	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
226
227	/*
228	 * Save the input parameter so that we could restore the input
229	 * parameter if the Hypercall failed.
230	 *
231	 * XXX
232	 * Is this really necessary?!  i.e. Will the Hypercall ever
233	 * overwrite the input parameter?
234	 */
235	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
236
237	/*
238	 * In order to cope with transient failures, e.g. insufficient
239	 * resources on host side, we retry the post message Hypercall
240	 * several times.  20 retries seem sufficient.
241	 */
242#define HC_RETRY_MAX	20
243
244	for (i = 0; i < HC_RETRY_MAX; ++i) {
245		uint64_t status;
246
247		status = hypercall_post_message(inprm_paddr);
248		if (status == HYPERCALL_STATUS_SUCCESS)
249			return 0;
250
251		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
252		if (time < SBT_1S * 2)
253			time *= 2;
254
255		/* Restore input parameter and try again */
256		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
257	}
258
259#undef HC_RETRY_MAX
260
261	return EIO;
262}
263
264int
265vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
266{
267	int error;
268
269	vmbus_xact_activate(mh->mh_xact);
270	error = vmbus_msghc_exec_noresult(mh);
271	if (error)
272		vmbus_xact_deactivate(mh->mh_xact);
273	return error;
274}
275
276const struct vmbus_message *
277vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
278{
279	size_t resp_len;
280
281	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
282}
283
284void
285vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
286{
287
288	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
289}
290
291uint32_t
292vmbus_gpadl_alloc(struct vmbus_softc *sc)
293{
294	return atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
295}
296
297static int
298vmbus_connect(struct vmbus_softc *sc, uint32_t version)
299{
300	struct vmbus_chanmsg_connect *req;
301	const struct vmbus_message *msg;
302	struct vmbus_msghc *mh;
303	int error, done = 0;
304
305	mh = vmbus_msghc_get(sc, sizeof(*req));
306	if (mh == NULL)
307		return ENXIO;
308
309	req = vmbus_msghc_dataptr(mh);
310	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
311	req->chm_ver = version;
312	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
313	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
314	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
315
316	error = vmbus_msghc_exec(sc, mh);
317	if (error) {
318		vmbus_msghc_put(sc, mh);
319		return error;
320	}
321
322	msg = vmbus_msghc_wait_result(sc, mh);
323	done = ((const struct vmbus_chanmsg_connect_resp *)
324	    msg->msg_data)->chm_done;
325
326	vmbus_msghc_put(sc, mh);
327
328	return (done ? 0 : EOPNOTSUPP);
329}
330
331static int
332vmbus_init(struct vmbus_softc *sc)
333{
334	int i;
335
336	for (i = 0; i < nitems(vmbus_version); ++i) {
337		int error;
338
339		error = vmbus_connect(sc, vmbus_version[i]);
340		if (!error) {
341			sc->vmbus_version = vmbus_version[i];
342			device_printf(sc->vmbus_dev, "version %u.%u\n",
343			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
344			    VMBUS_VERSION_MINOR(sc->vmbus_version));
345			return 0;
346		}
347	}
348	return ENXIO;
349}
350
351static void
352vmbus_disconnect(struct vmbus_softc *sc)
353{
354	struct vmbus_chanmsg_disconnect *req;
355	struct vmbus_msghc *mh;
356	int error;
357
358	mh = vmbus_msghc_get(sc, sizeof(*req));
359	if (mh == NULL) {
360		device_printf(sc->vmbus_dev,
361		    "can not get msg hypercall for disconnect\n");
362		return;
363	}
364
365	req = vmbus_msghc_dataptr(mh);
366	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
367
368	error = vmbus_msghc_exec_noresult(mh);
369	vmbus_msghc_put(sc, mh);
370
371	if (error) {
372		device_printf(sc->vmbus_dev,
373		    "disconnect msg hypercall failed\n");
374	}
375}
376
377static int
378vmbus_req_channels(struct vmbus_softc *sc)
379{
380	struct vmbus_chanmsg_chrequest *req;
381	struct vmbus_msghc *mh;
382	int error;
383
384	mh = vmbus_msghc_get(sc, sizeof(*req));
385	if (mh == NULL)
386		return ENXIO;
387
388	req = vmbus_msghc_dataptr(mh);
389	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
390
391	error = vmbus_msghc_exec_noresult(mh);
392	vmbus_msghc_put(sc, mh);
393
394	return error;
395}
396
397static void
398vmbus_scan_newchan(struct vmbus_softc *sc)
399{
400	mtx_lock(&sc->vmbus_scan_lock);
401	if ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0)
402		sc->vmbus_scan_chcnt++;
403	mtx_unlock(&sc->vmbus_scan_lock);
404}
405
406static void
407vmbus_scan_done(struct vmbus_softc *sc,
408    const struct vmbus_message *msg __unused)
409{
410	mtx_lock(&sc->vmbus_scan_lock);
411	sc->vmbus_scan_chcnt |= VMBUS_SCAN_CHCNT_DONE;
412	mtx_unlock(&sc->vmbus_scan_lock);
413	wakeup(&sc->vmbus_scan_chcnt);
414}
415
416static void
417vmbus_scan_newdev(struct vmbus_softc *sc)
418{
419	mtx_lock(&sc->vmbus_scan_lock);
420	sc->vmbus_scan_devcnt++;
421	mtx_unlock(&sc->vmbus_scan_lock);
422	wakeup(&sc->vmbus_scan_devcnt);
423}
424
425static void
426vmbus_scan_wait(struct vmbus_softc *sc)
427{
428	uint32_t chancnt;
429
430	mtx_lock(&sc->vmbus_scan_lock);
431	while ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) {
432		mtx_sleep(&sc->vmbus_scan_chcnt, &sc->vmbus_scan_lock, 0,
433		    "waitch", 0);
434	}
435	chancnt = sc->vmbus_scan_chcnt & ~VMBUS_SCAN_CHCNT_DONE;
436
437	while (sc->vmbus_scan_devcnt != chancnt) {
438		mtx_sleep(&sc->vmbus_scan_devcnt, &sc->vmbus_scan_lock, 0,
439		    "waitdev", 0);
440	}
441	mtx_unlock(&sc->vmbus_scan_lock);
442}
443
444static int
445vmbus_scan(struct vmbus_softc *sc)
446{
447	int error;
448
449	/*
450	 * Start vmbus scanning.
451	 */
452	error = vmbus_req_channels(sc);
453	if (error) {
454		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
455		    error);
456		return error;
457	}
458
459	/*
460	 * Wait for all devices are added to vmbus.
461	 */
462	vmbus_scan_wait(sc);
463
464	/*
465	 * Identify, probe and attach.
466	 */
467	bus_generic_probe(sc->vmbus_dev);
468	bus_generic_attach(sc->vmbus_dev);
469
470	if (bootverbose) {
471		device_printf(sc->vmbus_dev, "device scan, probe and attach "
472		    "done\n");
473	}
474	return 0;
475}
476
477static void
478vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
479{
480	vmbus_chanmsg_proc_t msg_proc;
481	uint32_t msg_type;
482
483	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
484	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
485		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
486		    msg_type);
487		return;
488	}
489
490	msg_proc = vmbus_chanmsg_handlers[msg_type];
491	if (msg_proc != NULL)
492		msg_proc(sc, msg);
493
494	/* Channel specific processing */
495	vmbus_chan_msgproc(sc, msg);
496}
497
498static void
499vmbus_msg_task(void *xsc, int pending __unused)
500{
501	struct vmbus_softc *sc = xsc;
502	volatile struct vmbus_message *msg;
503
504	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
505	for (;;) {
506		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
507			/* No message */
508			break;
509		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
510			/* Channel message */
511			vmbus_chanmsg_handle(sc,
512			    __DEVOLATILE(const struct vmbus_message *, msg));
513		}
514
515		msg->msg_type = HYPERV_MSGTYPE_NONE;
516		/*
517		 * Make sure the write to msg_type (i.e. set to
518		 * HYPERV_MSGTYPE_NONE) happens before we read the
519		 * msg_flags and EOMing. Otherwise, the EOMing will
520		 * not deliver any more messages since there is no
521		 * empty slot
522		 *
523		 * NOTE:
524		 * mb() is used here, since atomic_thread_fence_seq_cst()
525		 * will become compiler fence on UP kernel.
526		 */
527		mb();
528		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
529			/*
530			 * This will cause message queue rescan to possibly
531			 * deliver another msg from the hypervisor
532			 */
533			wrmsr(MSR_HV_EOM, 0);
534		}
535	}
536}
537
538static __inline int
539vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
540{
541	volatile struct vmbus_message *msg;
542	struct vmbus_message *msg_base;
543
544	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
545
546	/*
547	 * Check event timer.
548	 *
549	 * TODO: move this to independent IDT vector.
550	 */
551	msg = msg_base + VMBUS_SINT_TIMER;
552	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
553		msg->msg_type = HYPERV_MSGTYPE_NONE;
554
555		vmbus_et_intr(frame);
556
557		/*
558		 * Make sure the write to msg_type (i.e. set to
559		 * HYPERV_MSGTYPE_NONE) happens before we read the
560		 * msg_flags and EOMing. Otherwise, the EOMing will
561		 * not deliver any more messages since there is no
562		 * empty slot
563		 *
564		 * NOTE:
565		 * mb() is used here, since atomic_thread_fence_seq_cst()
566		 * will become compiler fence on UP kernel.
567		 */
568		mb();
569		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
570			/*
571			 * This will cause message queue rescan to possibly
572			 * deliver another msg from the hypervisor
573			 */
574			wrmsr(MSR_HV_EOM, 0);
575		}
576	}
577
578	/*
579	 * Check events.  Hot path for network and storage I/O data; high rate.
580	 *
581	 * NOTE:
582	 * As recommended by the Windows guest fellows, we check events before
583	 * checking messages.
584	 */
585	sc->vmbus_event_proc(sc, cpu);
586
587	/*
588	 * Check messages.  Mainly management stuffs; ultra low rate.
589	 */
590	msg = msg_base + VMBUS_SINT_MESSAGE;
591	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
592		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
593		    VMBUS_PCPU_PTR(sc, message_task, cpu));
594	}
595
596	return (FILTER_HANDLED);
597}
598
599void
600vmbus_handle_intr(struct trapframe *trap_frame)
601{
602	struct vmbus_softc *sc = vmbus_get_softc();
603	int cpu = curcpu;
604
605	/*
606	 * Disable preemption.
607	 */
608	critical_enter();
609
610	/*
611	 * Do a little interrupt counting.
612	 */
613	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
614
615	vmbus_handle_intr1(sc, trap_frame, cpu);
616
617	/*
618	 * Enable preemption.
619	 */
620	critical_exit();
621}
622
623static void
624vmbus_synic_setup(void *xsc)
625{
626	struct vmbus_softc *sc = xsc;
627	int cpu = curcpu;
628	uint64_t val, orig;
629	uint32_t sint;
630
631	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
632		/* Save virtual processor id. */
633		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
634	} else {
635		/* Set virtual processor id to 0 for compatibility. */
636		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
637	}
638
639	/*
640	 * Setup the SynIC message.
641	 */
642	orig = rdmsr(MSR_HV_SIMP);
643	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
644	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
645	     MSR_HV_SIMP_PGSHIFT);
646	wrmsr(MSR_HV_SIMP, val);
647
648	/*
649	 * Setup the SynIC event flags.
650	 */
651	orig = rdmsr(MSR_HV_SIEFP);
652	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
653	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
654	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
655	wrmsr(MSR_HV_SIEFP, val);
656
657
658	/*
659	 * Configure and unmask SINT for message and event flags.
660	 */
661	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
662	orig = rdmsr(sint);
663	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
664	    (orig & MSR_HV_SINT_RSVD_MASK);
665	wrmsr(sint, val);
666
667	/*
668	 * Configure and unmask SINT for timer.
669	 */
670	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
671	orig = rdmsr(sint);
672	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
673	    (orig & MSR_HV_SINT_RSVD_MASK);
674	wrmsr(sint, val);
675
676	/*
677	 * All done; enable SynIC.
678	 */
679	orig = rdmsr(MSR_HV_SCONTROL);
680	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
681	wrmsr(MSR_HV_SCONTROL, val);
682}
683
684static void
685vmbus_synic_teardown(void *arg)
686{
687	uint64_t orig;
688	uint32_t sint;
689
690	/*
691	 * Disable SynIC.
692	 */
693	orig = rdmsr(MSR_HV_SCONTROL);
694	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
695
696	/*
697	 * Mask message and event flags SINT.
698	 */
699	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
700	orig = rdmsr(sint);
701	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
702
703	/*
704	 * Mask timer SINT.
705	 */
706	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
707	orig = rdmsr(sint);
708	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
709
710	/*
711	 * Teardown SynIC message.
712	 */
713	orig = rdmsr(MSR_HV_SIMP);
714	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
715
716	/*
717	 * Teardown SynIC event flags.
718	 */
719	orig = rdmsr(MSR_HV_SIEFP);
720	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
721}
722
723static int
724vmbus_dma_alloc(struct vmbus_softc *sc)
725{
726	bus_dma_tag_t parent_dtag;
727	uint8_t *evtflags;
728	int cpu;
729
730	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
731	CPU_FOREACH(cpu) {
732		void *ptr;
733
734		/*
735		 * Per-cpu messages and event flags.
736		 */
737		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
738		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
739		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
740		if (ptr == NULL)
741			return ENOMEM;
742		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
743
744		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
745		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
746		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
747		if (ptr == NULL)
748			return ENOMEM;
749		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
750	}
751
752	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
753	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
754	if (evtflags == NULL)
755		return ENOMEM;
756	sc->vmbus_rx_evtflags = (u_long *)evtflags;
757	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
758	sc->vmbus_evtflags = evtflags;
759
760	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
761	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
762	if (sc->vmbus_mnf1 == NULL)
763		return ENOMEM;
764
765	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
766	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
767	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
768	if (sc->vmbus_mnf2 == NULL)
769		return ENOMEM;
770
771	return 0;
772}
773
774static void
775vmbus_dma_free(struct vmbus_softc *sc)
776{
777	int cpu;
778
779	if (sc->vmbus_evtflags != NULL) {
780		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
781		sc->vmbus_evtflags = NULL;
782		sc->vmbus_rx_evtflags = NULL;
783		sc->vmbus_tx_evtflags = NULL;
784	}
785	if (sc->vmbus_mnf1 != NULL) {
786		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
787		sc->vmbus_mnf1 = NULL;
788	}
789	if (sc->vmbus_mnf2 != NULL) {
790		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
791		sc->vmbus_mnf2 = NULL;
792	}
793
794	CPU_FOREACH(cpu) {
795		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
796			hyperv_dmamem_free(
797			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
798			    VMBUS_PCPU_GET(sc, message, cpu));
799			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
800		}
801		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
802			hyperv_dmamem_free(
803			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
804			    VMBUS_PCPU_GET(sc, event_flags, cpu));
805			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
806		}
807	}
808}
809
810/**
811 * @brief Find a free IDT slot and setup the interrupt handler.
812 */
813static int
814vmbus_vector_alloc(void)
815{
816	int vector;
817	uintptr_t func;
818	struct gate_descriptor *ip;
819
820	/*
821	 * Search backwards form the highest IDT vector available for use
822	 * as vmbus channel callback vector. We install 'vmbus_isr'
823	 * handler at that vector and use it to interrupt vcpus.
824	 */
825	vector = APIC_SPURIOUS_INT;
826	while (--vector >= APIC_IPI_INTS) {
827		ip = &idt[vector];
828		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
829		if (func == (uintptr_t)&IDTVEC(rsvd)) {
830#ifdef __i386__
831			setidt(vector , IDTVEC(vmbus_isr), SDT_SYS386IGT,
832			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
833#else
834			setidt(vector , IDTVEC(vmbus_isr), SDT_SYSIGT,
835			    SEL_KPL, 0);
836#endif
837
838			return (vector);
839		}
840	}
841	return (0);
842}
843
844/**
845 * @brief Restore the IDT slot to rsvd.
846 */
847static void
848vmbus_vector_free(int vector)
849{
850	uintptr_t func;
851	struct gate_descriptor *ip;
852
853	if (vector == 0)
854		return;
855
856	KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
857	    ("invalid vector %d", vector));
858
859	ip = &idt[vector];
860	func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
861	KASSERT(func == (uintptr_t)&IDTVEC(vmbus_isr),
862	    ("invalid vector %d", vector));
863
864	setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
865}
866
867static void
868vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
869{
870	cpuset_t *mask = xmask;
871	int error;
872
873	error = cpuset_setthread(curthread->td_tid, mask);
874	if (error) {
875		panic("curthread=%ju: can't pin; error=%d",
876		    (uintmax_t)curthread->td_tid, error);
877	}
878}
879
880static int
881vmbus_intr_setup(struct vmbus_softc *sc)
882{
883	int cpu;
884
885	CPU_FOREACH(cpu) {
886		struct task cpuset_task;
887		char buf[MAXCOMLEN + 1];
888		cpuset_t cpu_mask;
889
890		/* Allocate an interrupt counter for Hyper-V interrupt */
891		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
892		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
893
894		/*
895		 * Setup taskqueue to handle events.  Task will be per-
896		 * channel.
897		 */
898		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
899		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
900		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
901		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, event_tq, cpu),
902		    1, PI_NET, "hvevent%d", cpu);
903
904		CPU_SETOF(cpu, &cpu_mask);
905		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
906		    &cpu_mask);
907		taskqueue_enqueue(VMBUS_PCPU_GET(sc, event_tq, cpu),
908		    &cpuset_task);
909		taskqueue_drain(VMBUS_PCPU_GET(sc, event_tq, cpu),
910		    &cpuset_task);
911
912		/*
913		 * Setup tasks and taskqueues to handle messages.
914		 */
915		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
916		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
917		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
918		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, message_tq, cpu), 1,
919		    PI_NET, "hvmsg%d", cpu);
920		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
921		    vmbus_msg_task, sc);
922
923		CPU_SETOF(cpu, &cpu_mask);
924		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
925		    &cpu_mask);
926		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
927		    &cpuset_task);
928		taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
929		    &cpuset_task);
930	}
931
932	/*
933	 * All Hyper-V ISR required resources are setup, now let's find a
934	 * free IDT vector for Hyper-V ISR and set it up.
935	 */
936	sc->vmbus_idtvec = vmbus_vector_alloc();
937	if (sc->vmbus_idtvec == 0) {
938		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
939		return ENXIO;
940	}
941	if(bootverbose) {
942		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
943		    sc->vmbus_idtvec);
944	}
945	return 0;
946}
947
948static void
949vmbus_intr_teardown(struct vmbus_softc *sc)
950{
951	int cpu;
952
953	vmbus_vector_free(sc->vmbus_idtvec);
954
955	CPU_FOREACH(cpu) {
956		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
957			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
958			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
959		}
960		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
961			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
962			    VMBUS_PCPU_PTR(sc, message_task, cpu));
963			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
964			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
965		}
966	}
967}
968
969static int
970vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
971{
972	return (ENOENT);
973}
974
975static int
976vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
977{
978	const struct vmbus_channel *chan;
979	char guidbuf[HYPERV_GUID_STRLEN];
980
981	chan = vmbus_get_channel(child);
982	if (chan == NULL) {
983		/* Event timer device, which does not belong to a channel */
984		return (0);
985	}
986
987	strlcat(buf, "classid=", buflen);
988	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
989	strlcat(buf, guidbuf, buflen);
990
991	strlcat(buf, " deviceid=", buflen);
992	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
993	strlcat(buf, guidbuf, buflen);
994
995	return (0);
996}
997
998int
999vmbus_add_child(struct vmbus_channel *chan)
1000{
1001	struct vmbus_softc *sc = chan->ch_vmbus;
1002	device_t parent = sc->vmbus_dev;
1003	int error = 0;
1004
1005	/* New channel has been offered */
1006	vmbus_scan_newchan(sc);
1007
1008	chan->ch_dev = device_add_child(parent, NULL, -1);
1009	if (chan->ch_dev == NULL) {
1010		device_printf(parent, "device_add_child for chan%u failed\n",
1011		    chan->ch_id);
1012		error = ENXIO;
1013		goto done;
1014	}
1015	device_set_ivars(chan->ch_dev, chan);
1016
1017done:
1018	/* New device has been/should be added to vmbus. */
1019	vmbus_scan_newdev(sc);
1020	return error;
1021}
1022
1023int
1024vmbus_delete_child(struct vmbus_channel *chan)
1025{
1026	int error;
1027
1028	if (chan->ch_dev == NULL) {
1029		/* Failed to add a device. */
1030		return 0;
1031	}
1032
1033	/*
1034	 * XXXKYS: Ensure that this is the opposite of
1035	 * device_add_child()
1036	 */
1037	mtx_lock(&Giant);
1038	error = device_delete_child(chan->ch_vmbus->vmbus_dev, chan->ch_dev);
1039	mtx_unlock(&Giant);
1040
1041	return error;
1042}
1043
1044static int
1045vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1046{
1047	struct vmbus_softc *sc = arg1;
1048	char verstr[16];
1049
1050	snprintf(verstr, sizeof(verstr), "%u.%u",
1051	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1052	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1053	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1054}
1055
1056static uint32_t
1057vmbus_get_version_method(device_t bus, device_t dev)
1058{
1059	struct vmbus_softc *sc = device_get_softc(bus);
1060
1061	return sc->vmbus_version;
1062}
1063
1064static int
1065vmbus_probe_guid_method(device_t bus, device_t dev,
1066    const struct hyperv_guid *guid)
1067{
1068	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1069
1070	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1071		return 0;
1072	return ENXIO;
1073}
1074
1075static int
1076vmbus_probe(device_t dev)
1077{
1078	char *id[] = { "VMBUS", NULL };
1079
1080	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
1081	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1082	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1083		return (ENXIO);
1084
1085	device_set_desc(dev, "Hyper-V Vmbus");
1086
1087	return (BUS_PROBE_DEFAULT);
1088}
1089
1090/**
1091 * @brief Main vmbus driver initialization routine.
1092 *
1093 * Here, we
1094 * - initialize the vmbus driver context
1095 * - setup various driver entry points
1096 * - invoke the vmbus hv main init routine
1097 * - get the irq resource
1098 * - invoke the vmbus to add the vmbus root device
1099 * - setup the vmbus root device
1100 * - retrieve the channel offers
1101 */
1102static int
1103vmbus_doattach(struct vmbus_softc *sc)
1104{
1105	struct sysctl_oid_list *child;
1106	struct sysctl_ctx_list *ctx;
1107	int ret;
1108
1109	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1110		return (0);
1111	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1112
1113	mtx_init(&sc->vmbus_scan_lock, "vmbus scan", NULL, MTX_DEF);
1114	sc->vmbus_gpadl = VMBUS_GPADL_START;
1115	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1116	TAILQ_INIT(&sc->vmbus_prichans);
1117	sc->vmbus_chmap = malloc(
1118	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1119	    M_WAITOK | M_ZERO);
1120
1121	/*
1122	 * Create context for "post message" Hypercalls
1123	 */
1124	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1125	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1126	    sizeof(struct vmbus_msghc));
1127	if (sc->vmbus_xc == NULL) {
1128		ret = ENXIO;
1129		goto cleanup;
1130	}
1131
1132	/*
1133	 * Allocate DMA stuffs.
1134	 */
1135	ret = vmbus_dma_alloc(sc);
1136	if (ret != 0)
1137		goto cleanup;
1138
1139	/*
1140	 * Setup interrupt.
1141	 */
1142	ret = vmbus_intr_setup(sc);
1143	if (ret != 0)
1144		goto cleanup;
1145
1146	/*
1147	 * Setup SynIC.
1148	 */
1149	if (bootverbose)
1150		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1151	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1152	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1153
1154	/*
1155	 * Initialize vmbus, e.g. connect to Hypervisor.
1156	 */
1157	ret = vmbus_init(sc);
1158	if (ret != 0)
1159		goto cleanup;
1160
1161	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1162	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1163		sc->vmbus_event_proc = vmbus_event_proc_compat;
1164	else
1165		sc->vmbus_event_proc = vmbus_event_proc;
1166
1167	ret = vmbus_scan(sc);
1168	if (ret != 0)
1169		goto cleanup;
1170
1171	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1172	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1173	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1174	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1175	    vmbus_sysctl_version, "A", "vmbus version");
1176
1177	return (ret);
1178
1179cleanup:
1180	vmbus_intr_teardown(sc);
1181	vmbus_dma_free(sc);
1182	if (sc->vmbus_xc != NULL) {
1183		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1184		sc->vmbus_xc = NULL;
1185	}
1186	free(sc->vmbus_chmap, M_DEVBUF);
1187	mtx_destroy(&sc->vmbus_scan_lock);
1188	mtx_destroy(&sc->vmbus_prichan_lock);
1189
1190	return (ret);
1191}
1192
1193static void
1194vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1195{
1196}
1197
1198static int
1199vmbus_attach(device_t dev)
1200{
1201	vmbus_sc = device_get_softc(dev);
1202	vmbus_sc->vmbus_dev = dev;
1203
1204	/*
1205	 * Event processing logic will be configured:
1206	 * - After the vmbus protocol version negotiation.
1207	 * - Before we request channel offers.
1208	 */
1209	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1210
1211	/*
1212	 * If the system has already booted and thread
1213	 * scheduling is possible indicated by the global
1214	 * cold set to zero, we just call the driver
1215	 * initialization directly.
1216	 */
1217	if (!cold)
1218		vmbus_doattach(vmbus_sc);
1219
1220	return (0);
1221}
1222
1223static int
1224vmbus_detach(device_t dev)
1225{
1226	struct vmbus_softc *sc = device_get_softc(dev);
1227
1228	vmbus_chan_destroy_all(sc);
1229
1230	vmbus_disconnect(sc);
1231
1232	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1233		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1234		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1235	}
1236
1237	vmbus_intr_teardown(sc);
1238	vmbus_dma_free(sc);
1239
1240	if (sc->vmbus_xc != NULL) {
1241		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1242		sc->vmbus_xc = NULL;
1243	}
1244
1245	free(sc->vmbus_chmap, M_DEVBUF);
1246	mtx_destroy(&sc->vmbus_scan_lock);
1247	mtx_destroy(&sc->vmbus_prichan_lock);
1248
1249	return (0);
1250}
1251
1252static void
1253vmbus_sysinit(void *arg __unused)
1254{
1255	struct vmbus_softc *sc = vmbus_get_softc();
1256
1257	if (vm_guest != VM_GUEST_HV || sc == NULL)
1258		return;
1259
1260	/*
1261	 * If the system has already booted and thread
1262	 * scheduling is possible, as indicated by the
1263	 * global cold set to zero, we just call the driver
1264	 * initialization directly.
1265	 */
1266	if (!cold)
1267		vmbus_doattach(sc);
1268}
1269/*
1270 * NOTE:
1271 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1272 * initialized.
1273 */
1274SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1275