vmbus.c revision 309313
1283625Sdim/*-
2283625Sdim * Copyright (c) 2009-2012,2016 Microsoft Corp.
3353358Sdim * Copyright (c) 2012 NetApp Inc.
4353358Sdim * Copyright (c) 2012 Citrix Inc.
5353358Sdim * All rights reserved.
6283625Sdim *
7283625Sdim * Redistribution and use in source and binary forms, with or without
8283625Sdim * modification, are permitted provided that the following conditions
9283625Sdim * are met:
10283625Sdim * 1. Redistributions of source code must retain the above copyright
11283625Sdim *    notice unmodified, this list of conditions, and the following
12283625Sdim *    disclaimer.
13283625Sdim * 2. Redistributions in binary form must reproduce the above copyright
14283625Sdim *    notice, this list of conditions and the following disclaimer in the
15283625Sdim *    documentation and/or other materials provided with the distribution.
16283625Sdim *
17283625Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18314564Sdim * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19283625Sdim * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20283625Sdim * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21344779Sdim * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22341825Sdim * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23314564Sdim * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24314564Sdim * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25283625Sdim * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26283625Sdim * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27309124Sdim */
28309124Sdim
29283625Sdim/*
30341825Sdim * VM Bus Driver Implementation
31283625Sdim */
32321369Sdim#include <sys/cdefs.h>
33321369Sdim__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/vmbus/vmbus.c 309313 2016-11-30 06:26:37Z dexuan $");
34353358Sdim
35353358Sdim#include <sys/param.h>
36353358Sdim#include <sys/bus.h>
37353358Sdim#include <sys/kernel.h>
38353358Sdim#include <sys/lock.h>
39353358Sdim#include <sys/malloc.h>
40353358Sdim#include <sys/module.h>
41353358Sdim#include <sys/mutex.h>
42353358Sdim#include <sys/proc.h>
43353358Sdim#include <sys/smp.h>
44353358Sdim#include <sys/sysctl.h>
45353358Sdim#include <sys/systm.h>
46353358Sdim#include <sys/taskqueue.h>
47353358Sdim
48353358Sdim#include <machine/bus.h>
49353358Sdim#include <machine/intr_machdep.h>
50353358Sdim#include <machine/resource.h>
51353358Sdim#include <machine/apicvar.h>
52353358Sdim#include <machine/md_var.h>
53353358Sdim
54353358Sdim#include <contrib/dev/acpica/include/acpi.h>
55353358Sdim#include <dev/acpica/acpivar.h>
56353358Sdim
57353358Sdim#include <dev/hyperv/include/hyperv.h>
58353358Sdim#include <dev/hyperv/include/vmbus_xact.h>
59353358Sdim#include <dev/hyperv/vmbus/hyperv_reg.h>
60327952Sdim#include <dev/hyperv/vmbus/hyperv_var.h>
61353358Sdim#include <dev/hyperv/vmbus/vmbus_reg.h>
62353358Sdim#include <dev/hyperv/vmbus/vmbus_var.h>
63344779Sdim#include <dev/hyperv/vmbus/vmbus_chanvar.h>
64353358Sdim
65353358Sdim#include "acpi_if.h"
66327952Sdim#include "pcib_if.h"
67321369Sdim#include "vmbus_if.h"
68321369Sdim
69353358Sdim#define VMBUS_GPADL_START		0xe1e10
70353358Sdim
71353358Sdimstruct vmbus_msghc {
72353358Sdim	struct vmbus_xact		*mh_xact;
73353358Sdim	struct hypercall_postmsg_in	mh_inprm_save;
74353358Sdim};
75353358Sdim
76353358Sdimstatic int			vmbus_probe(device_t);
77353358Sdimstatic int			vmbus_attach(device_t);
78353358Sdimstatic int			vmbus_detach(device_t);
79353358Sdimstatic int			vmbus_read_ivar(device_t, device_t, int,
80353358Sdim				    uintptr_t *);
81353358Sdimstatic int			vmbus_child_pnpinfo_str(device_t, device_t,
82353358Sdim				    char *, size_t);
83353358Sdimstatic struct resource		*vmbus_alloc_resource(device_t dev,
84353358Sdim				    device_t child, int type, int *rid,
85353358Sdim				    rman_res_t start, rman_res_t end,
86353358Sdim				    rman_res_t count, u_int flags);
87353358Sdimstatic int			vmbus_alloc_msi(device_t bus, device_t dev,
88353358Sdim				    int count, int maxcount, int *irqs);
89353358Sdimstatic int			vmbus_release_msi(device_t bus, device_t dev,
90353358Sdim				    int count, int *irqs);
91353358Sdimstatic int			vmbus_alloc_msix(device_t bus, device_t dev,
92353358Sdim				    int *irq);
93353358Sdimstatic int			vmbus_release_msix(device_t bus, device_t dev,
94353358Sdim				    int irq);
95353358Sdimstatic int			vmbus_map_msi(device_t bus, device_t dev,
96353358Sdim				    int irq, uint64_t *addr, uint32_t *data);
97353358Sdimstatic uint32_t			vmbus_get_version_method(device_t, device_t);
98353358Sdimstatic int			vmbus_probe_guid_method(device_t, device_t,
99353358Sdim				    const struct hyperv_guid *);
100353358Sdimstatic uint32_t			vmbus_get_vcpu_id_method(device_t bus,
101353358Sdim				    device_t dev, int cpu);
102353358Sdim
103353358Sdimstatic int			vmbus_init(struct vmbus_softc *);
104341825Sdimstatic int			vmbus_connect(struct vmbus_softc *, uint32_t);
105283625Sdimstatic int			vmbus_req_channels(struct vmbus_softc *sc);
106353358Sdimstatic void			vmbus_disconnect(struct vmbus_softc *);
107283625Sdimstatic int			vmbus_scan(struct vmbus_softc *);
108283625Sdimstatic void			vmbus_scan_teardown(struct vmbus_softc *);
109283625Sdimstatic void			vmbus_scan_done(struct vmbus_softc *,
110283625Sdim				    const struct vmbus_message *);
111283625Sdimstatic void			vmbus_chanmsg_handle(struct vmbus_softc *,
112353358Sdim				    const struct vmbus_message *);
113321369Sdimstatic void			vmbus_msg_task(void *, int);
114344779Sdimstatic void			vmbus_synic_setup(void *);
115283625Sdimstatic void			vmbus_synic_teardown(void *);
116283625Sdimstatic int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
117341825Sdimstatic int			vmbus_dma_alloc(struct vmbus_softc *);
118321369Sdimstatic void			vmbus_dma_free(struct vmbus_softc *);
119321369Sdimstatic int			vmbus_intr_setup(struct vmbus_softc *);
120321369Sdimstatic void			vmbus_intr_teardown(struct vmbus_softc *);
121321369Sdimstatic int			vmbus_doattach(struct vmbus_softc *);
122321369Sdimstatic void			vmbus_event_proc_dummy(struct vmbus_softc *,
123321369Sdim				    int);
124321369Sdim
125321369Sdimstatic struct vmbus_softc	*vmbus_sc;
126321369Sdim
127321369Sdimextern inthand_t IDTVEC(rsvd), IDTVEC(vmbus_isr);
128321369Sdim
129321369Sdimstatic const uint32_t		vmbus_version[] = {
130341825Sdim	VMBUS_VERSION_WIN8_1,
131327952Sdim	VMBUS_VERSION_WIN8,
132327952Sdim	VMBUS_VERSION_WIN7,
133327952Sdim	VMBUS_VERSION_WS2008
134327952Sdim};
135327952Sdim
136353358Sdimstatic const vmbus_chanmsg_proc_t
137327952Sdimvmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
138353358Sdim	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
139327952Sdim	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
140327952Sdim};
141327952Sdim
142341825Sdimstatic device_method_t vmbus_methods[] = {
143309124Sdim	/* Device interface */
144309124Sdim	DEVMETHOD(device_probe,			vmbus_probe),
145309124Sdim	DEVMETHOD(device_attach,		vmbus_attach),
146309124Sdim	DEVMETHOD(device_detach,		vmbus_detach),
147309124Sdim	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
148309124Sdim	DEVMETHOD(device_suspend,		bus_generic_suspend),
149309124Sdim	DEVMETHOD(device_resume,		bus_generic_resume),
150309124Sdim
151309124Sdim	/* Bus interface */
152309124Sdim	DEVMETHOD(bus_add_child,		bus_generic_add_child),
153309124Sdim	DEVMETHOD(bus_print_child,		bus_generic_print_child),
154309124Sdim	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
155309124Sdim	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
156309124Sdim	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
157309124Sdim	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
158309124Sdim	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
159309124Sdim	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
160309124Sdim	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
161309124Sdim	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
162309124Sdim#if __FreeBSD_version >= 1100000
163360784Sdim	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
164360784Sdim#endif
165360784Sdim
166309124Sdim	/* pcib interface */
167309124Sdim	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
168309124Sdim	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
169309124Sdim	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
170309124Sdim	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
171309124Sdim	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
172309124Sdim
173309124Sdim	/* Vmbus interface */
174309124Sdim	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
175309124Sdim	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
176309124Sdim	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
177309124Sdim
178309124Sdim	DEVMETHOD_END
179309124Sdim};
180309124Sdim
181309124Sdimstatic driver_t vmbus_driver = {
182309124Sdim	"vmbus",
183309124Sdim	vmbus_methods,
184309124Sdim	sizeof(struct vmbus_softc)
185309124Sdim};
186309124Sdim
187309124Sdimstatic devclass_t vmbus_devclass;
188309124Sdim
189309124SdimDRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
190309124SdimMODULE_DEPEND(vmbus, acpi, 1, 1, 1);
191309124SdimMODULE_DEPEND(vmbus, pci, 1, 1, 1);
192309124SdimMODULE_VERSION(vmbus, 1);
193309124Sdim
194309124Sdimstatic __inline struct vmbus_softc *
195309124Sdimvmbus_get_softc(void)
196309124Sdim{
197309124Sdim	return vmbus_sc;
198309124Sdim}
199309124Sdim
200309124Sdimvoid
201309124Sdimvmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
202309124Sdim{
203309124Sdim	struct hypercall_postmsg_in *inprm;
204309124Sdim
205309124Sdim	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
206309124Sdim		panic("invalid data size %zu", dsize);
207309124Sdim
208309124Sdim	inprm = vmbus_xact_req_data(mh->mh_xact);
209309124Sdim	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
210309124Sdim	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
211309124Sdim	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
212309124Sdim	inprm->hc_dsize = dsize;
213309124Sdim}
214309124Sdim
215309124Sdimstruct vmbus_msghc *
216309124Sdimvmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
217309124Sdim{
218309124Sdim	struct vmbus_msghc *mh;
219309124Sdim	struct vmbus_xact *xact;
220309124Sdim
221309124Sdim	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
222309124Sdim		panic("invalid data size %zu", dsize);
223309124Sdim
224321369Sdim	xact = vmbus_xact_get(sc->vmbus_xc,
225353358Sdim	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
226344779Sdim	if (xact == NULL)
227344779Sdim		return (NULL);
228353358Sdim
229283625Sdim	mh = vmbus_xact_priv(xact, sizeof(*mh));
230341825Sdim	mh->mh_xact = xact;
231309124Sdim
232309124Sdim	vmbus_msghc_reset(mh, dsize);
233353358Sdim	return (mh);
234309124Sdim}
235309124Sdim
236309124Sdimvoid
237309124Sdimvmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
238309124Sdim{
239341825Sdim
240283625Sdim	vmbus_xact_put(mh->mh_xact);
241283625Sdim}
242283625Sdim
243309124Sdimvoid *
244309124Sdimvmbus_msghc_dataptr(struct vmbus_msghc *mh)
245283625Sdim{
246283625Sdim	struct hypercall_postmsg_in *inprm;
247341825Sdim
248283625Sdim	inprm = vmbus_xact_req_data(mh->mh_xact);
249283625Sdim	return (inprm->hc_data);
250283625Sdim}
251309124Sdim
252309124Sdimint
253283625Sdimvmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
254283625Sdim{
255341825Sdim	sbintime_t time = SBT_1MS;
256283625Sdim	struct hypercall_postmsg_in *inprm;
257283625Sdim	bus_addr_t inprm_paddr;
258283625Sdim	int i;
259309124Sdim
260309124Sdim	inprm = vmbus_xact_req_data(mh->mh_xact);
261283625Sdim	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
262283625Sdim
263341825Sdim	/*
264309124Sdim	 * Save the input parameter so that we could restore the input
265309124Sdim	 * parameter if the Hypercall failed.
266309124Sdim	 *
267309124Sdim	 * XXX
268309124Sdim	 * Is this really necessary?!  i.e. Will the Hypercall ever
269309124Sdim	 * overwrite the input parameter?
270314564Sdim	 */
271314564Sdim	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
272309124Sdim
273314564Sdim	/*
274314564Sdim	 * In order to cope with transient failures, e.g. insufficient
275314564Sdim	 * resources on host side, we retry the post message Hypercall
276314564Sdim	 * several times.  20 retries seem sufficient.
277314564Sdim	 */
278314564Sdim#define HC_RETRY_MAX	20
279314564Sdim
280314564Sdim	for (i = 0; i < HC_RETRY_MAX; ++i) {
281314564Sdim		uint64_t status;
282321369Sdim
283327952Sdim		status = hypercall_post_message(inprm_paddr);
284314564Sdim		if (status == HYPERCALL_STATUS_SUCCESS)
285314564Sdim			return 0;
286327952Sdim
287327952Sdim		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
288314564Sdim		if (time < SBT_1S * 2)
289321369Sdim			time *= 2;
290321369Sdim
291321369Sdim		/* Restore input parameter and try again */
292321369Sdim		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
293321369Sdim	}
294321369Sdim
295321369Sdim#undef HC_RETRY_MAX
296321369Sdim
297321369Sdim	return EIO;
298321369Sdim}
299321369Sdim
300321369Sdimint
301321369Sdimvmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
302321369Sdim{
303327952Sdim	int error;
304321369Sdim
305321369Sdim	vmbus_xact_activate(mh->mh_xact);
306327952Sdim	error = vmbus_msghc_exec_noresult(mh);
307327952Sdim	if (error)
308321369Sdim		vmbus_xact_deactivate(mh->mh_xact);
309321369Sdim	return error;
310321369Sdim}
311321369Sdim
312321369Sdimconst struct vmbus_message *
313321369Sdimvmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
314321369Sdim{
315321369Sdim	size_t resp_len;
316321369Sdim
317321369Sdim	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
318321369Sdim}
319321369Sdim
320321369Sdimvoid
321321369Sdimvmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
322321369Sdim{
323353358Sdim
324353358Sdim	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
325321369Sdim}
326314564Sdim
327314564Sdimuint32_t
328309124Sdimvmbus_gpadl_alloc(struct vmbus_softc *sc)
329309124Sdim{
330309124Sdim	return atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
331309124Sdim}
332314564Sdim
333314564Sdimstatic int
334314564Sdimvmbus_connect(struct vmbus_softc *sc, uint32_t version)
335314564Sdim{
336314564Sdim	struct vmbus_chanmsg_connect *req;
337314564Sdim	const struct vmbus_message *msg;
338353358Sdim	struct vmbus_msghc *mh;
339353358Sdim	int error, done = 0;
340309124Sdim
341321369Sdim	mh = vmbus_msghc_get(sc, sizeof(*req));
342321369Sdim	if (mh == NULL)
343321369Sdim		return ENXIO;
344321369Sdim
345321369Sdim	req = vmbus_msghc_dataptr(mh);
346321369Sdim	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
347321369Sdim	req->chm_ver = version;
348321369Sdim	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
349321369Sdim	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
350321369Sdim	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
351321369Sdim
352321369Sdim	error = vmbus_msghc_exec(sc, mh);
353321369Sdim	if (error) {
354321369Sdim		vmbus_msghc_put(sc, mh);
355321369Sdim		return error;
356321369Sdim	}
357321369Sdim
358321369Sdim	msg = vmbus_msghc_wait_result(sc, mh);
359321369Sdim	done = ((const struct vmbus_chanmsg_connect_resp *)
360321369Sdim	    msg->msg_data)->chm_done;
361321369Sdim
362321369Sdim	vmbus_msghc_put(sc, mh);
363321369Sdim
364321369Sdim	return (done ? 0 : EOPNOTSUPP);
365321369Sdim}
366321369Sdim
367321369Sdimstatic int
368341825Sdimvmbus_init(struct vmbus_softc *sc)
369341825Sdim{
370341825Sdim	int i;
371321369Sdim
372314564Sdim	for (i = 0; i < nitems(vmbus_version); ++i) {
373314564Sdim		int error;
374309124Sdim
375309124Sdim		error = vmbus_connect(sc, vmbus_version[i]);
376309124Sdim		if (!error) {
377309124Sdim			sc->vmbus_version = vmbus_version[i];
378309124Sdim			device_printf(sc->vmbus_dev, "version %u.%u\n",
379314564Sdim			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
380314564Sdim			    VMBUS_VERSION_MINOR(sc->vmbus_version));
381314564Sdim			return 0;
382314564Sdim		}
383314564Sdim	}
384314564Sdim	return ENXIO;
385314564Sdim}
386309124Sdim
387314564Sdimstatic void
388309124Sdimvmbus_disconnect(struct vmbus_softc *sc)
389309124Sdim{
390309124Sdim	struct vmbus_chanmsg_disconnect *req;
391309124Sdim	struct vmbus_msghc *mh;
392309124Sdim	int error;
393314564Sdim
394314564Sdim	mh = vmbus_msghc_get(sc, sizeof(*req));
395314564Sdim	if (mh == NULL) {
396314564Sdim		device_printf(sc->vmbus_dev,
397314564Sdim		    "can not get msg hypercall for disconnect\n");
398314564Sdim		return;
399341825Sdim	}
400341825Sdim
401309124Sdim	req = vmbus_msghc_dataptr(mh);
402314564Sdim	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
403314564Sdim
404314564Sdim	error = vmbus_msghc_exec_noresult(mh);
405314564Sdim	vmbus_msghc_put(sc, mh);
406341825Sdim
407321369Sdim	if (error) {
408283625Sdim		device_printf(sc->vmbus_dev,
409283625Sdim		    "disconnect msg hypercall failed\n");
410283625Sdim	}
411283625Sdim}
412283625Sdim
413283625Sdimstatic int
414283625Sdimvmbus_req_channels(struct vmbus_softc *sc)
415283625Sdim{
416283625Sdim	struct vmbus_chanmsg_chrequest *req;
417321369Sdim	struct vmbus_msghc *mh;
418321369Sdim	int error;
419283625Sdim
420283625Sdim	mh = vmbus_msghc_get(sc, sizeof(*req));
421283625Sdim	if (mh == NULL)
422283625Sdim		return ENXIO;
423283625Sdim
424283625Sdim	req = vmbus_msghc_dataptr(mh);
425283625Sdim	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
426283625Sdim
427283625Sdim	error = vmbus_msghc_exec_noresult(mh);
428283625Sdim	vmbus_msghc_put(sc, mh);
429283625Sdim
430283625Sdim	return error;
431321369Sdim}
432321369Sdim
433321369Sdimstatic void
434321369Sdimvmbus_scan_done_task(void *xsc, int pending __unused)
435344779Sdim{
436344779Sdim	struct vmbus_softc *sc = xsc;
437344779Sdim
438283625Sdim	mtx_lock(&Giant);
439321369Sdim	sc->vmbus_scandone = true;
440321369Sdim	mtx_unlock(&Giant);
441321369Sdim	wakeup(&sc->vmbus_scandone);
442353358Sdim}
443321369Sdim
444321369Sdimstatic void
445321369Sdimvmbus_scan_done(struct vmbus_softc *sc,
446344779Sdim    const struct vmbus_message *msg __unused)
447344779Sdim{
448344779Sdim
449344779Sdim	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
450344779Sdim}
451344779Sdim
452344779Sdimstatic int
453344779Sdimvmbus_scan(struct vmbus_softc *sc)
454344779Sdim{
455321369Sdim	int error;
456321369Sdim
457309124Sdim	/*
458309124Sdim	 * Identify, probe and attach for non-channel devices.
459309124Sdim	 */
460309124Sdim	bus_generic_probe(sc->vmbus_dev);
461309124Sdim	bus_generic_attach(sc->vmbus_dev);
462309124Sdim
463309124Sdim	/*
464309124Sdim	 * This taskqueue serializes vmbus devices' attach and detach
465309124Sdim	 * for channel offer and rescind messages.
466309124Sdim	 */
467309124Sdim	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
468309124Sdim	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
469309124Sdim	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
470309124Sdim	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
471309124Sdim
472344779Sdim	/*
473309124Sdim	 * This taskqueue handles sub-channel detach, so that vmbus
474341825Sdim	 * device's detach running in vmbus_devtq can drain its sub-
475321369Sdim	 * channels.
476321369Sdim	 */
477321369Sdim	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
478321369Sdim	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
479321369Sdim	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
480321369Sdim
481321369Sdim	/*
482321369Sdim	 * Start vmbus scanning.
483321369Sdim	 */
484321369Sdim	error = vmbus_req_channels(sc);
485341825Sdim	if (error) {
486321369Sdim		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
487321369Sdim		    error);
488321369Sdim		return (error);
489321369Sdim	}
490321369Sdim
491321369Sdim	/*
492321369Sdim	 * Wait for all vmbus devices from the initial channel offers to be
493321369Sdim	 * attached.
494321369Sdim	 */
495321369Sdim	GIANT_REQUIRED;
496321369Sdim	while (!sc->vmbus_scandone)
497321369Sdim		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
498321369Sdim
499341825Sdim	if (bootverbose) {
500321369Sdim		device_printf(sc->vmbus_dev, "device scan, probe and attach "
501321369Sdim		    "done\n");
502321369Sdim	}
503321369Sdim	return (0);
504321369Sdim}
505321369Sdim
506321369Sdimstatic void
507321369Sdimvmbus_scan_teardown(struct vmbus_softc *sc)
508321369Sdim{
509341825Sdim
510321369Sdim	GIANT_REQUIRED;
511321369Sdim	if (sc->vmbus_devtq != NULL) {
512321369Sdim		mtx_unlock(&Giant);
513321369Sdim		taskqueue_free(sc->vmbus_devtq);
514321369Sdim		mtx_lock(&Giant);
515321369Sdim		sc->vmbus_devtq = NULL;
516321369Sdim	}
517321369Sdim	if (sc->vmbus_subchtq != NULL) {
518321369Sdim		mtx_unlock(&Giant);
519341825Sdim		taskqueue_free(sc->vmbus_subchtq);
520321369Sdim		mtx_lock(&Giant);
521321369Sdim		sc->vmbus_subchtq = NULL;
522321369Sdim	}
523321369Sdim}
524321369Sdim
525321369Sdimstatic void
526321369Sdimvmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
527321369Sdim{
528321369Sdim	vmbus_chanmsg_proc_t msg_proc;
529321369Sdim	uint32_t msg_type;
530341825Sdim
531321369Sdim	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
532321369Sdim	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
533321369Sdim		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
534321369Sdim		    msg_type);
535321369Sdim		return;
536321369Sdim	}
537321369Sdim
538321369Sdim	msg_proc = vmbus_chanmsg_handlers[msg_type];
539321369Sdim	if (msg_proc != NULL)
540321369Sdim		msg_proc(sc, msg);
541341825Sdim
542341825Sdim	/* Channel specific processing */
543341825Sdim	vmbus_chan_msgproc(sc, msg);
544341825Sdim}
545341825Sdim
546341825Sdimstatic void
547341825Sdimvmbus_msg_task(void *xsc, int pending __unused)
548341825Sdim{
549341825Sdim	struct vmbus_softc *sc = xsc;
550341825Sdim	volatile struct vmbus_message *msg;
551344779Sdim
552344779Sdim	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
553344779Sdim	for (;;) {
554344779Sdim		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
555344779Sdim			/* No message */
556344779Sdim			break;
557344779Sdim		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
558344779Sdim			/* Channel message */
559344779Sdim			vmbus_chanmsg_handle(sc,
560344779Sdim			    __DEVOLATILE(const struct vmbus_message *, msg));
561344779Sdim		}
562344779Sdim
563341825Sdim		msg->msg_type = HYPERV_MSGTYPE_NONE;
564321369Sdim		/*
565321369Sdim		 * Make sure the write to msg_type (i.e. set to
566321369Sdim		 * HYPERV_MSGTYPE_NONE) happens before we read the
567321369Sdim		 * msg_flags and EOMing. Otherwise, the EOMing will
568321369Sdim		 * not deliver any more messages since there is no
569321369Sdim		 * empty slot
570321369Sdim		 *
571321369Sdim		 * NOTE:
572321369Sdim		 * mb() is used here, since atomic_thread_fence_seq_cst()
573321369Sdim		 * will become compiler fence on UP kernel.
574321369Sdim		 */
575321369Sdim		mb();
576321369Sdim		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
577321369Sdim			/*
578321369Sdim			 * This will cause message queue rescan to possibly
579321369Sdim			 * deliver another msg from the hypervisor
580321369Sdim			 */
581321369Sdim			wrmsr(MSR_HV_EOM, 0);
582321369Sdim		}
583321369Sdim	}
584321369Sdim}
585321369Sdim
586321369Sdimstatic __inline int
587321369Sdimvmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
588321369Sdim{
589321369Sdim	volatile struct vmbus_message *msg;
590321369Sdim	struct vmbus_message *msg_base;
591321369Sdim
592321369Sdim	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
593321369Sdim
594321369Sdim	/*
595321369Sdim	 * Check event timer.
596321369Sdim	 *
597321369Sdim	 * TODO: move this to independent IDT vector.
598321369Sdim	 */
599321369Sdim	msg = msg_base + VMBUS_SINT_TIMER;
600321369Sdim	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
601321369Sdim		msg->msg_type = HYPERV_MSGTYPE_NONE;
602321369Sdim
603321369Sdim		vmbus_et_intr(frame);
604321369Sdim
605321369Sdim		/*
606321369Sdim		 * Make sure the write to msg_type (i.e. set to
607321369Sdim		 * HYPERV_MSGTYPE_NONE) happens before we read the
608321369Sdim		 * msg_flags and EOMing. Otherwise, the EOMing will
609321369Sdim		 * not deliver any more messages since there is no
610321369Sdim		 * empty slot
611321369Sdim		 *
612321369Sdim		 * NOTE:
613321369Sdim		 * mb() is used here, since atomic_thread_fence_seq_cst()
614321369Sdim		 * will become compiler fence on UP kernel.
615321369Sdim		 */
616321369Sdim		mb();
617341825Sdim		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
618321369Sdim			/*
619321369Sdim			 * This will cause message queue rescan to possibly
620321369Sdim			 * deliver another msg from the hypervisor
621321369Sdim			 */
622321369Sdim			wrmsr(MSR_HV_EOM, 0);
623321369Sdim		}
624321369Sdim	}
625321369Sdim
626321369Sdim	/*
627321369Sdim	 * Check events.  Hot path for network and storage I/O data; high rate.
628360784Sdim	 *
629360784Sdim	 * NOTE:
630360784Sdim	 * As recommended by the Windows guest fellows, we check events before
631360784Sdim	 * checking messages.
632360784Sdim	 */
633360784Sdim	sc->vmbus_event_proc(sc, cpu);
634360784Sdim
635360784Sdim	/*
636360784Sdim	 * Check messages.  Mainly management stuffs; ultra low rate.
637360784Sdim	 */
638360784Sdim	msg = msg_base + VMBUS_SINT_MESSAGE;
639360784Sdim	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
640360784Sdim		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
641283625Sdim		    VMBUS_PCPU_PTR(sc, message_task, cpu));
642314564Sdim	}
643314564Sdim
644314564Sdim	return (FILTER_HANDLED);
645344779Sdim}
646344779Sdim
647344779Sdimvoid
648314564Sdimvmbus_handle_intr(struct trapframe *trap_frame)
649344779Sdim{
650344779Sdim	struct vmbus_softc *sc = vmbus_get_softc();
651344779Sdim	int cpu = curcpu;
652314564Sdim
653309124Sdim	/*
654314564Sdim	 * Disable preemption.
655344779Sdim	 */
656314564Sdim	critical_enter();
657283625Sdim
658344779Sdim	/*
659344779Sdim	 * Do a little interrupt counting.
660344779Sdim	 */
661344779Sdim	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
662314564Sdim
663283625Sdim	vmbus_handle_intr1(sc, trap_frame, cpu);
664344779Sdim
665344779Sdim	/*
666344779Sdim	 * Enable preemption.
667321369Sdim	 */
668321369Sdim	critical_exit();
669353358Sdim}
670353358Sdim
671344779Sdimstatic void
672321369Sdimvmbus_synic_setup(void *xsc)
673321369Sdim{
674321369Sdim	struct vmbus_softc *sc = xsc;
675321369Sdim	int cpu = curcpu;
676321369Sdim	uint64_t val, orig;
677321369Sdim	uint32_t sint;
678321369Sdim
679321369Sdim	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
680321369Sdim		/* Save virtual processor id. */
681321369Sdim		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
682321369Sdim	} else {
683321369Sdim		/* Set virtual processor id to 0 for compatibility. */
684321369Sdim		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
685321369Sdim	}
686321369Sdim
687344779Sdim	/*
688344779Sdim	 * Setup the SynIC message.
689321369Sdim	 */
690341825Sdim	orig = rdmsr(MSR_HV_SIMP);
691341825Sdim	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
692321369Sdim	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
693321369Sdim	     MSR_HV_SIMP_PGSHIFT);
694321369Sdim	wrmsr(MSR_HV_SIMP, val);
695321369Sdim
696321369Sdim	/*
697321369Sdim	 * Setup the SynIC event flags.
698321369Sdim	 */
699321369Sdim	orig = rdmsr(MSR_HV_SIEFP);
700321369Sdim	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
701321369Sdim	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
702321369Sdim	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
703321369Sdim	wrmsr(MSR_HV_SIEFP, val);
704321369Sdim
705321369Sdim
706321369Sdim	/*
707321369Sdim	 * Configure and unmask SINT for message and event flags.
708321369Sdim	 */
709321369Sdim	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
710321369Sdim	orig = rdmsr(sint);
711321369Sdim	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
712321369Sdim	    (orig & MSR_HV_SINT_RSVD_MASK);
713321369Sdim	wrmsr(sint, val);
714321369Sdim
715321369Sdim	/*
716321369Sdim	 * Configure and unmask SINT for timer.
717321369Sdim	 */
718321369Sdim	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
719321369Sdim	orig = rdmsr(sint);
720321369Sdim	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
721321369Sdim	    (orig & MSR_HV_SINT_RSVD_MASK);
722321369Sdim	wrmsr(sint, val);
723321369Sdim
724321369Sdim	/*
725321369Sdim	 * All done; enable SynIC.
726283625Sdim	 */
727321369Sdim	orig = rdmsr(MSR_HV_SCONTROL);
728321369Sdim	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
729321369Sdim	wrmsr(MSR_HV_SCONTROL, val);
730321369Sdim}
731321369Sdim
732321369Sdimstatic void
733321369Sdimvmbus_synic_teardown(void *arg)
734321369Sdim{
735321369Sdim	uint64_t orig;
736321369Sdim	uint32_t sint;
737321369Sdim
738321369Sdim	/*
739321369Sdim	 * Disable SynIC.
740321369Sdim	 */
741321369Sdim	orig = rdmsr(MSR_HV_SCONTROL);
742321369Sdim	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
743321369Sdim
744321369Sdim	/*
745321369Sdim	 * Mask message and event flags SINT.
746321369Sdim	 */
747321369Sdim	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
748321369Sdim	orig = rdmsr(sint);
749321369Sdim	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
750321369Sdim
751321369Sdim	/*
752321369Sdim	 * Mask timer SINT.
753321369Sdim	 */
754321369Sdim	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
755321369Sdim	orig = rdmsr(sint);
756321369Sdim	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
757321369Sdim
758321369Sdim	/*
759321369Sdim	 * Teardown SynIC message.
760321369Sdim	 */
761321369Sdim	orig = rdmsr(MSR_HV_SIMP);
762321369Sdim	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
763321369Sdim
764321369Sdim	/*
765321369Sdim	 * Teardown SynIC event flags.
766321369Sdim	 */
767321369Sdim	orig = rdmsr(MSR_HV_SIEFP);
768321369Sdim	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
769285181Sdim}
770321369Sdim
771283625Sdimstatic int
772283625Sdimvmbus_dma_alloc(struct vmbus_softc *sc)
773{
774	bus_dma_tag_t parent_dtag;
775	uint8_t *evtflags;
776	int cpu;
777
778	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
779	CPU_FOREACH(cpu) {
780		void *ptr;
781
782		/*
783		 * Per-cpu messages and event flags.
784		 */
785		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
786		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
787		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
788		if (ptr == NULL)
789			return ENOMEM;
790		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
791
792		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
793		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
794		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
795		if (ptr == NULL)
796			return ENOMEM;
797		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
798	}
799
800	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
801	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
802	if (evtflags == NULL)
803		return ENOMEM;
804	sc->vmbus_rx_evtflags = (u_long *)evtflags;
805	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
806	sc->vmbus_evtflags = evtflags;
807
808	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
809	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
810	if (sc->vmbus_mnf1 == NULL)
811		return ENOMEM;
812
813	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
814	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
815	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
816	if (sc->vmbus_mnf2 == NULL)
817		return ENOMEM;
818
819	return 0;
820}
821
822static void
823vmbus_dma_free(struct vmbus_softc *sc)
824{
825	int cpu;
826
827	if (sc->vmbus_evtflags != NULL) {
828		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
829		sc->vmbus_evtflags = NULL;
830		sc->vmbus_rx_evtflags = NULL;
831		sc->vmbus_tx_evtflags = NULL;
832	}
833	if (sc->vmbus_mnf1 != NULL) {
834		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
835		sc->vmbus_mnf1 = NULL;
836	}
837	if (sc->vmbus_mnf2 != NULL) {
838		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
839		sc->vmbus_mnf2 = NULL;
840	}
841
842	CPU_FOREACH(cpu) {
843		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
844			hyperv_dmamem_free(
845			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
846			    VMBUS_PCPU_GET(sc, message, cpu));
847			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
848		}
849		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
850			hyperv_dmamem_free(
851			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
852			    VMBUS_PCPU_GET(sc, event_flags, cpu));
853			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
854		}
855	}
856}
857
858/**
859 * @brief Find a free IDT slot and setup the interrupt handler.
860 */
861static int
862vmbus_vector_alloc(void)
863{
864	int vector;
865	uintptr_t func;
866	struct gate_descriptor *ip;
867
868	/*
869	 * Search backwards form the highest IDT vector available for use
870	 * as vmbus channel callback vector. We install 'vmbus_isr'
871	 * handler at that vector and use it to interrupt vcpus.
872	 */
873	vector = APIC_SPURIOUS_INT;
874	while (--vector >= APIC_IPI_INTS) {
875		ip = &idt[vector];
876		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
877		if (func == (uintptr_t)&IDTVEC(rsvd)) {
878#ifdef __i386__
879			setidt(vector , IDTVEC(vmbus_isr), SDT_SYS386IGT,
880			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
881#else
882			setidt(vector , IDTVEC(vmbus_isr), SDT_SYSIGT,
883			    SEL_KPL, 0);
884#endif
885
886			return (vector);
887		}
888	}
889	return (0);
890}
891
892/**
893 * @brief Restore the IDT slot to rsvd.
894 */
895static void
896vmbus_vector_free(int vector)
897{
898	uintptr_t func;
899	struct gate_descriptor *ip;
900
901	if (vector == 0)
902		return;
903
904	KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
905	    ("invalid vector %d", vector));
906
907	ip = &idt[vector];
908	func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
909	KASSERT(func == (uintptr_t)&IDTVEC(vmbus_isr),
910	    ("invalid vector %d", vector));
911
912	setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
913}
914
915static void
916vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
917{
918	cpuset_t *mask = xmask;
919	int error;
920
921	error = cpuset_setthread(curthread->td_tid, mask);
922	if (error) {
923		panic("curthread=%ju: can't pin; error=%d",
924		    (uintmax_t)curthread->td_tid, error);
925	}
926}
927
928static int
929vmbus_intr_setup(struct vmbus_softc *sc)
930{
931	int cpu;
932
933	CPU_FOREACH(cpu) {
934		struct task cpuset_task;
935		char buf[MAXCOMLEN + 1];
936		cpuset_t cpu_mask;
937
938		/* Allocate an interrupt counter for Hyper-V interrupt */
939		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
940		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
941
942		/*
943		 * Setup taskqueue to handle events.  Task will be per-
944		 * channel.
945		 */
946		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
947		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
948		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
949		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, event_tq, cpu),
950		    1, PI_NET, "hvevent%d", cpu);
951
952		CPU_SETOF(cpu, &cpu_mask);
953		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
954		    &cpu_mask);
955		taskqueue_enqueue(VMBUS_PCPU_GET(sc, event_tq, cpu),
956		    &cpuset_task);
957		taskqueue_drain(VMBUS_PCPU_GET(sc, event_tq, cpu),
958		    &cpuset_task);
959
960		/*
961		 * Setup tasks and taskqueues to handle messages.
962		 */
963		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
964		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
965		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
966		taskqueue_start_threads(VMBUS_PCPU_PTR(sc, message_tq, cpu), 1,
967		    PI_NET, "hvmsg%d", cpu);
968		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
969		    vmbus_msg_task, sc);
970
971		CPU_SETOF(cpu, &cpu_mask);
972		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task,
973		    &cpu_mask);
974		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
975		    &cpuset_task);
976		taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
977		    &cpuset_task);
978	}
979
980	/*
981	 * All Hyper-V ISR required resources are setup, now let's find a
982	 * free IDT vector for Hyper-V ISR and set it up.
983	 */
984	sc->vmbus_idtvec = vmbus_vector_alloc();
985	if (sc->vmbus_idtvec == 0) {
986		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
987		return ENXIO;
988	}
989	if (bootverbose) {
990		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
991		    sc->vmbus_idtvec);
992	}
993	return 0;
994}
995
996static void
997vmbus_intr_teardown(struct vmbus_softc *sc)
998{
999	int cpu;
1000
1001	vmbus_vector_free(sc->vmbus_idtvec);
1002
1003	CPU_FOREACH(cpu) {
1004		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
1005			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
1006			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
1007		}
1008		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
1009			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1010			    VMBUS_PCPU_PTR(sc, message_task, cpu));
1011			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
1012			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
1013		}
1014	}
1015}
1016
1017static int
1018vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
1019{
1020	return (ENOENT);
1021}
1022
1023static int
1024vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
1025{
1026	const struct vmbus_channel *chan;
1027	char guidbuf[HYPERV_GUID_STRLEN];
1028
1029	chan = vmbus_get_channel(child);
1030	if (chan == NULL) {
1031		/* Event timer device, which does not belong to a channel */
1032		return (0);
1033	}
1034
1035	strlcat(buf, "classid=", buflen);
1036	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1037	strlcat(buf, guidbuf, buflen);
1038
1039	strlcat(buf, " deviceid=", buflen);
1040	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1041	strlcat(buf, guidbuf, buflen);
1042
1043	return (0);
1044}
1045
1046int
1047vmbus_add_child(struct vmbus_channel *chan)
1048{
1049	struct vmbus_softc *sc = chan->ch_vmbus;
1050	device_t parent = sc->vmbus_dev;
1051
1052	mtx_lock(&Giant);
1053
1054	chan->ch_dev = device_add_child(parent, NULL, -1);
1055	if (chan->ch_dev == NULL) {
1056		mtx_unlock(&Giant);
1057		device_printf(parent, "device_add_child for chan%u failed\n",
1058		    chan->ch_id);
1059		return (ENXIO);
1060	}
1061	device_set_ivars(chan->ch_dev, chan);
1062	device_probe_and_attach(chan->ch_dev);
1063
1064	mtx_unlock(&Giant);
1065	return (0);
1066}
1067
1068int
1069vmbus_delete_child(struct vmbus_channel *chan)
1070{
1071	int error = 0;
1072
1073	mtx_lock(&Giant);
1074	if (chan->ch_dev != NULL) {
1075		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1076		    chan->ch_dev);
1077		chan->ch_dev = NULL;
1078	}
1079	mtx_unlock(&Giant);
1080	return (error);
1081}
1082
1083static int
1084vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1085{
1086	struct vmbus_softc *sc = arg1;
1087	char verstr[16];
1088
1089	snprintf(verstr, sizeof(verstr), "%u.%u",
1090	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1091	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1092	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1093}
1094
1095/*
1096 * We need the function to make sure the MMIO resource is allocated from the
1097 * ranges found in _CRS.
1098 *
1099 * For the release function, we can use bus_generic_release_resource().
1100 */
1101static struct resource *
1102vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1103    rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1104{
1105	device_t parent = device_get_parent(dev);
1106	struct resource *res;
1107
1108#ifdef NEW_PCIB
1109	if (type == SYS_RES_MEMORY) {
1110		struct vmbus_softc *sc = device_get_softc(dev);
1111
1112		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1113		    rid, start, end, count, flags);
1114	} else
1115#endif
1116	{
1117		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1118		    end, count, flags);
1119	}
1120
1121	return (res);
1122}
1123
1124static device_t
1125get_nexus(device_t vmbus)
1126{
1127	device_t acpi = device_get_parent(vmbus);
1128	device_t nexus = device_get_parent(acpi);
1129	return (nexus);
1130}
1131
1132static int
1133vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1134{
1135	return (PCIB_ALLOC_MSI(get_nexus(bus), dev, count, maxcount, irqs));
1136}
1137
1138static int
1139vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1140{
1141	return (PCIB_RELEASE_MSI(get_nexus(bus), dev, count, irqs));
1142}
1143
1144static int
1145vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1146{
1147	return (PCIB_ALLOC_MSIX(get_nexus(bus), dev, irq));
1148}
1149
1150static int
1151vmbus_release_msix(device_t bus, device_t dev, int irq)
1152{
1153	return (PCIB_RELEASE_MSIX(get_nexus(bus), dev, irq));
1154}
1155
1156static int
1157vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1158	uint32_t *data)
1159{
1160	return (PCIB_MAP_MSI(get_nexus(bus), dev, irq, addr, data));
1161}
1162
1163static uint32_t
1164vmbus_get_version_method(device_t bus, device_t dev)
1165{
1166	struct vmbus_softc *sc = device_get_softc(bus);
1167
1168	return sc->vmbus_version;
1169}
1170
1171static int
1172vmbus_probe_guid_method(device_t bus, device_t dev,
1173    const struct hyperv_guid *guid)
1174{
1175	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1176
1177	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1178		return 0;
1179	return ENXIO;
1180}
1181
1182static uint32_t
1183vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1184{
1185	const struct vmbus_softc *sc = device_get_softc(bus);
1186
1187	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1188}
1189
1190#ifdef NEW_PCIB
1191#define VTPM_BASE_ADDR 0xfed40000
1192#define FOUR_GB (1ULL << 32)
1193
1194enum parse_pass { parse_64, parse_32 };
1195
1196struct parse_context {
1197	device_t vmbus_dev;
1198	enum parse_pass pass;
1199};
1200
1201static ACPI_STATUS
1202parse_crs(ACPI_RESOURCE *res, void *ctx)
1203{
1204	const struct parse_context *pc = ctx;
1205	device_t vmbus_dev = pc->vmbus_dev;
1206
1207	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1208	UINT64 start, end;
1209
1210	switch (res->Type) {
1211	case ACPI_RESOURCE_TYPE_ADDRESS32:
1212		start = res->Data.Address32.Address.Minimum;
1213		end = res->Data.Address32.Address.Maximum;
1214		break;
1215
1216	case ACPI_RESOURCE_TYPE_ADDRESS64:
1217		start = res->Data.Address64.Address.Minimum;
1218		end = res->Data.Address64.Address.Maximum;
1219		break;
1220
1221	default:
1222		/* Unused types. */
1223		return (AE_OK);
1224	}
1225
1226	/*
1227	 * We don't use <1MB addresses.
1228	 */
1229	if (end < 0x100000)
1230		return (AE_OK);
1231
1232	/* Don't conflict with vTPM. */
1233	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1234		end = VTPM_BASE_ADDR - 1;
1235
1236	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1237	    (pc->pass == parse_64 && start >= FOUR_GB))
1238		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1239		    start, end, 0);
1240
1241	return (AE_OK);
1242}
1243
1244static void
1245vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1246{
1247	struct parse_context pc;
1248	ACPI_STATUS status;
1249
1250	if (bootverbose)
1251		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1252
1253	pc.vmbus_dev = vmbus_dev;
1254	pc.pass = pass;
1255	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1256			parse_crs, &pc);
1257
1258	if (bootverbose && ACPI_FAILURE(status))
1259		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1260}
1261
1262static void
1263vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1264{
1265	device_t acpi0, pcib0 = NULL;
1266	device_t *children;
1267	int i, count;
1268
1269	/* Try to find _CRS on VMBus device */
1270	vmbus_get_crs(dev, dev, pass);
1271
1272	/* Try to find _CRS on VMBus device's parent */
1273	acpi0 = device_get_parent(dev);
1274	vmbus_get_crs(acpi0, dev, pass);
1275
1276	/* Try to locate pcib0 and find _CRS on it */
1277	if (device_get_children(acpi0, &children, &count) != 0)
1278		return;
1279
1280	for (i = 0; i < count; i++) {
1281		if (!device_is_attached(children[i]))
1282			continue;
1283
1284		if (strcmp("pcib0", device_get_nameunit(children[i])))
1285			continue;
1286
1287		pcib0 = children[i];
1288		break;
1289	}
1290
1291	if (pcib0)
1292		vmbus_get_crs(pcib0, dev, pass);
1293
1294	free(children, M_TEMP);
1295}
1296
1297static void
1298vmbus_get_mmio_res(device_t dev)
1299{
1300	struct vmbus_softc *sc = device_get_softc(dev);
1301	/*
1302	 * We walk the resources twice to make sure that: in the resource
1303	 * list, the 32-bit resources appear behind the 64-bit resources.
1304	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1305	 * iterate through the list to find a range for a 64-bit BAR in
1306	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1307	 * ranges first.
1308	 */
1309	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1310
1311	vmbus_get_mmio_res_pass(dev, parse_64);
1312	vmbus_get_mmio_res_pass(dev, parse_32);
1313}
1314
1315static void
1316vmbus_free_mmio_res(device_t dev)
1317{
1318	struct vmbus_softc *sc = device_get_softc(dev);
1319
1320	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1321}
1322#endif	/* NEW_PCIB */
1323
1324static int
1325vmbus_probe(device_t dev)
1326{
1327	char *id[] = { "VMBUS", NULL };
1328
1329	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
1330	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1331	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1332		return (ENXIO);
1333
1334	device_set_desc(dev, "Hyper-V Vmbus");
1335
1336	return (BUS_PROBE_DEFAULT);
1337}
1338
1339/**
1340 * @brief Main vmbus driver initialization routine.
1341 *
1342 * Here, we
1343 * - initialize the vmbus driver context
1344 * - setup various driver entry points
1345 * - invoke the vmbus hv main init routine
1346 * - get the irq resource
1347 * - invoke the vmbus to add the vmbus root device
1348 * - setup the vmbus root device
1349 * - retrieve the channel offers
1350 */
1351static int
1352vmbus_doattach(struct vmbus_softc *sc)
1353{
1354	struct sysctl_oid_list *child;
1355	struct sysctl_ctx_list *ctx;
1356	int ret;
1357
1358	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1359		return (0);
1360
1361#ifdef NEW_PCIB
1362	vmbus_get_mmio_res(sc->vmbus_dev);
1363#endif
1364
1365	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1366
1367	sc->vmbus_gpadl = VMBUS_GPADL_START;
1368	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1369	TAILQ_INIT(&sc->vmbus_prichans);
1370	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1371	TAILQ_INIT(&sc->vmbus_chans);
1372	sc->vmbus_chmap = malloc(
1373	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1374	    M_WAITOK | M_ZERO);
1375
1376	/*
1377	 * Create context for "post message" Hypercalls
1378	 */
1379	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1380	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1381	    sizeof(struct vmbus_msghc));
1382	if (sc->vmbus_xc == NULL) {
1383		ret = ENXIO;
1384		goto cleanup;
1385	}
1386
1387	/*
1388	 * Allocate DMA stuffs.
1389	 */
1390	ret = vmbus_dma_alloc(sc);
1391	if (ret != 0)
1392		goto cleanup;
1393
1394	/*
1395	 * Setup interrupt.
1396	 */
1397	ret = vmbus_intr_setup(sc);
1398	if (ret != 0)
1399		goto cleanup;
1400
1401	/*
1402	 * Setup SynIC.
1403	 */
1404	if (bootverbose)
1405		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1406	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1407	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1408
1409	/*
1410	 * Initialize vmbus, e.g. connect to Hypervisor.
1411	 */
1412	ret = vmbus_init(sc);
1413	if (ret != 0)
1414		goto cleanup;
1415
1416	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1417	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1418		sc->vmbus_event_proc = vmbus_event_proc_compat;
1419	else
1420		sc->vmbus_event_proc = vmbus_event_proc;
1421
1422	ret = vmbus_scan(sc);
1423	if (ret != 0)
1424		goto cleanup;
1425
1426	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1427	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1428	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1429	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1430	    vmbus_sysctl_version, "A", "vmbus version");
1431
1432	return (ret);
1433
1434cleanup:
1435	vmbus_scan_teardown(sc);
1436	vmbus_intr_teardown(sc);
1437	vmbus_dma_free(sc);
1438	if (sc->vmbus_xc != NULL) {
1439		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1440		sc->vmbus_xc = NULL;
1441	}
1442	free(sc->vmbus_chmap, M_DEVBUF);
1443	mtx_destroy(&sc->vmbus_prichan_lock);
1444	mtx_destroy(&sc->vmbus_chan_lock);
1445
1446	return (ret);
1447}
1448
1449static void
1450vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1451{
1452}
1453
1454static int
1455vmbus_attach(device_t dev)
1456{
1457	vmbus_sc = device_get_softc(dev);
1458	vmbus_sc->vmbus_dev = dev;
1459
1460	/*
1461	 * Event processing logic will be configured:
1462	 * - After the vmbus protocol version negotiation.
1463	 * - Before we request channel offers.
1464	 */
1465	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1466
1467	/*
1468	 * If the system has already booted and thread
1469	 * scheduling is possible indicated by the global
1470	 * cold set to zero, we just call the driver
1471	 * initialization directly.
1472	 */
1473	if (!cold)
1474		vmbus_doattach(vmbus_sc);
1475
1476	return (0);
1477}
1478
1479static int
1480vmbus_detach(device_t dev)
1481{
1482	struct vmbus_softc *sc = device_get_softc(dev);
1483
1484	bus_generic_detach(dev);
1485	vmbus_chan_destroy_all(sc);
1486
1487	vmbus_scan_teardown(sc);
1488
1489	vmbus_disconnect(sc);
1490
1491	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1492		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1493		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1494	}
1495
1496	vmbus_intr_teardown(sc);
1497	vmbus_dma_free(sc);
1498
1499	if (sc->vmbus_xc != NULL) {
1500		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1501		sc->vmbus_xc = NULL;
1502	}
1503
1504	free(sc->vmbus_chmap, M_DEVBUF);
1505	mtx_destroy(&sc->vmbus_prichan_lock);
1506	mtx_destroy(&sc->vmbus_chan_lock);
1507
1508#ifdef NEW_PCIB
1509	vmbus_free_mmio_res(dev);
1510#endif
1511
1512	return (0);
1513}
1514
1515static void
1516vmbus_sysinit(void *arg __unused)
1517{
1518	struct vmbus_softc *sc = vmbus_get_softc();
1519
1520	if (vm_guest != VM_GUEST_HV || sc == NULL)
1521		return;
1522
1523	/*
1524	 * If the system has already booted and thread
1525	 * scheduling is possible, as indicated by the
1526	 * global cold set to zero, we just call the driver
1527	 * initialization directly.
1528	 */
1529	if (!cold)
1530		vmbus_doattach(sc);
1531}
1532/*
1533 * NOTE:
1534 * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1535 * initialized.
1536 */
1537SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1538