1/*      $NetBSD: xpci_xenbus.c,v 1.29 2024/06/23 00:53:34 riastradh Exp $      */
2
3/*
4 * Copyright (c) 2009 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__KERNEL_RCSID(0, "$NetBSD: xpci_xenbus.c,v 1.29 2024/06/23 00:53:34 riastradh Exp $");
29
30#include "opt_xen.h"
31
32#include <sys/types.h>
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/errno.h>
36#include <sys/kernel.h>
37#include <sys/bus.h>
38
39#include <uvm/uvm_extern.h>
40
41#include <machine/bus_private.h>
42
43#include <dev/isa/isareg.h>
44
45#include <xen/hypervisor.h>
46#include <xen/evtchn.h>
47#include <xen/granttables.h>
48#include <xen/include/public/io/pciif.h>
49#include <xen/xenbus.h>
50
51#include "locators.h"
52
53#include <dev/pci/pcivar.h>
54
55#undef XPCI_DEBUG
56#ifdef XPCI_DEBUG
57#define DPRINTF(x) printf x;
58#else
59#define DPRINTF(x)
60#endif
61
62struct xpci_xenbus_softc {
63	device_t sc_dev;
64	struct xenbus_device *sc_xbusd;
65	unsigned int sc_evtchn;
66	int sc_backend_status; /* our status with backend */
67#define XPCI_STATE_DISCONNECTED 0
68#define XPCI_STATE_CONNECTED    1
69#define XPCI_STATE_SUSPENDED    2
70	int sc_shutdown;
71	struct xen_pci_sharedinfo *sc_shared;
72	grant_ref_t sc_shared_gntref;
73};
74#define GRANT_INVALID_REF -1
75
76static int  xpci_xenbus_match(device_t, cfdata_t, void *);
77static void xpci_xenbus_attach(device_t, device_t, void *);
78static int  xpci_xenbus_detach(device_t, int);
79
80static void xpci_backend_changed(void *, XenbusState);
81static int  xpci_xenbus_resume(void *);
82static void xpci_connect(struct xpci_xenbus_softc *);
83static void xpci_attach_pcibus(int, int);
84
85static struct xpci_xenbus_softc *xpci_sc = NULL;
86
87CFATTACH_DECL_NEW(xpci_xenbus, sizeof(struct xpci_xenbus_softc),
88   xpci_xenbus_match, xpci_xenbus_attach, xpci_xenbus_detach, NULL);
89
90struct x86_bus_dma_tag pci_bus_dma_tag = {
91	._tag_needs_free	= 0,
92#if defined(_LP64) || defined(PAE)
93	._bounce_thresh		= PCI32_DMA_BOUNCE_THRESHOLD,
94	._bounce_alloc_lo	= 0,
95	._bounce_alloc_hi	= PCI32_DMA_BOUNCE_THRESHOLD,
96#else
97	._bounce_thresh		= 0,
98	._bounce_alloc_lo	= 0,
99	._bounce_alloc_hi	= 0,
100#endif
101	._may_bounce		= NULL,
102};
103
104#ifdef _LP64
105struct x86_bus_dma_tag pci_bus_dma64_tag = {
106	._tag_needs_free	= 0,
107	._bounce_thresh		= 0,
108	._bounce_alloc_lo	= 0,
109	._bounce_alloc_hi	= 0,
110	._may_bounce		= NULL,
111};
112#endif
113
114static int
115xpci_xenbus_match(device_t parent, cfdata_t match, void *aux)
116{
117	struct xenbusdev_attach_args *xa = aux;
118
119	if (strcmp(xa->xa_type, "pci") != 0)
120		return 0;
121
122	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
123	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
124		return 0;
125
126	return 1;
127}
128
129static void
130xpci_xenbus_attach(device_t parent, device_t self, void *aux)
131{
132	struct xpci_xenbus_softc *sc = device_private(self);
133	struct xenbusdev_attach_args *xa = aux;
134
135	if (xpci_sc != NULL) {
136		aprint_error("Xen PCI frontend already attached\n");
137		return;
138	}
139	xpci_sc = sc;
140	DPRINTF(("xpci_sc %p\n", xpci_sc));
141
142	config_pending_incr(self);
143	aprint_normal(": Xen PCI passthrough Interface\n");
144	sc->sc_dev = self;
145
146	sc->sc_xbusd = xa->xa_xbusd;
147	sc->sc_xbusd->xbusd_otherend_changed = xpci_backend_changed;
148
149	sc->sc_backend_status = XPCI_STATE_DISCONNECTED;
150	sc->sc_shutdown = 1;
151	/* initialise shared structures and tell backend that we are ready */
152	xpci_xenbus_resume(sc);
153}
154
155static int
156xpci_xenbus_detach(device_t dev, int flags)
157{
158	return EBUSY;
159}
160
161static int
162xpci_xenbus_resume(void *p)
163{
164	struct xpci_xenbus_softc *sc = p;
165	struct xenbus_transaction *xbt;
166	int error;
167	struct xen_pci_sharedinfo *shared;
168	paddr_t ma;
169	const char *errmsg;
170
171	sc->sc_shared_gntref = GRANT_INVALID_REF;
172	/* setup device: alloc event channel and shared info structure */
173	sc->sc_shared = shared = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
174		 UVM_KMF_ZERO | UVM_KMF_WIRED);
175	if (shared == NULL)
176		 panic("xpci_xenbus_resume: can't alloc shared info");
177
178	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)shared, &ma);
179	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_shared_gntref);
180	if (error)
181		 return error;
182	DPRINTF(("shared %p ma 0x%jx ref %#x\n", shared, (uintmax_t)ma,
183	    sc->sc_shared_gntref));
184	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
185	if (error)
186		 return error;
187	aprint_verbose_dev(sc->sc_dev, "using event channel %d\n",
188	    sc->sc_evtchn);
189#if 0
190	xen_intr_establish_xname(-1, &xen_pic, pbxi->pbx_evtchn, IST_LEVEL,
191	    IPL_BIO, &xpci_handler, sc, true,
192	    device_xname(sc->sc_dev));
193#endif
194
195again:
196	xbt = xenbus_transaction_start();
197	if (xbt == NULL)
198		 return ENOMEM;
199	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
200	    "pci-op-ref","%u", sc->sc_shared_gntref);
201	if (error) {
202		 errmsg = "writing pci-op-ref";
203		 goto abort_transaction;
204	}
205	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
206	    "event-channel", "%u", sc->sc_evtchn);
207	if (error) {
208		 errmsg = "writing event channel";
209		 goto abort_transaction;
210	}
211	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
212	    "magic", "%s", XEN_PCI_MAGIC);
213	if (error) {
214		 errmsg = "writing magic";
215		 goto abort_transaction;
216	}
217	error = xenbus_switch_state(sc->sc_xbusd, xbt, XenbusStateInitialised);
218	if (error) {
219		 errmsg = "writing frontend XenbusStateInitialised";
220		 goto abort_transaction;
221	}
222	error = xenbus_transaction_end(xbt, 0);
223	if (error == EAGAIN)
224		 goto again;
225	if (error) {
226		 xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
227		 return -1;
228	}
229	return 0;
230
231abort_transaction:
232	xenbus_transaction_end(xbt, 1);
233	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
234	return error;
235}
236
237static void
238xpci_backend_changed(void *arg, XenbusState new_state)
239{
240	struct xpci_xenbus_softc *sc = device_private((device_t)arg);
241	int s;
242	DPRINTF(("%s: new backend state %d\n", device_xname(sc->sc_dev), new_state));
243
244	switch (new_state) {
245	case XenbusStateUnknown:
246	case XenbusStateInitialising:
247	case XenbusStateInitWait:
248	case XenbusStateInitialised:
249		break;
250	case XenbusStateClosing:
251		s = splbio(); /* XXXSMP */
252		sc->sc_shutdown = 1;
253		/* wait for requests to complete */
254#if 0
255		while (sc->sc_backend_status == XPCI_STATE_CONNECTED &&
256		   sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
257			/* XXXSMP */
258			tsleep(xpci_xenbus_detach, PRIBIO, "xpcidetach",
259			   hz/2);
260#endif
261		splx(s);
262		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
263		break;
264	case XenbusStateConnected:
265		/*
266		 * note that xpci_backend_changed() can only be called by
267		 * the xenbus thread.
268		 */
269
270		if (sc->sc_backend_status == XPCI_STATE_CONNECTED)
271			/* already connected */
272			return;
273
274		sc->sc_shutdown = 0;
275		xpci_connect(sc);
276
277		sc->sc_backend_status = XPCI_STATE_CONNECTED;
278
279		/* the devices should be working now */
280		config_pending_decr(sc->sc_dev);
281		break;
282	default:
283		panic("bad backend state %d", new_state);
284	}
285}
286
287static void
288xpci_connect(struct xpci_xenbus_softc *sc)
289{
290	u_long num_roots;
291	int err;
292	char *domain, *bus, *ep;
293	char node[10];
294	u_long busn;
295	int i;
296	int s;
297
298	err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
299	   "root_num", &num_roots, 10);
300	if (err == ENOENT) {
301		aprint_error_dev(sc->sc_dev,
302		   "No PCI Roots found, trying 0000:00\n");
303		s = splhigh();
304		xpci_attach_pcibus(0, 0);
305		splx(s);
306		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
307		return;
308	} else if (err) {
309		aprint_error_dev(sc->sc_dev, "can't read root_num: %d\n", err);
310		return;
311	}
312
313	aprint_verbose_dev(sc->sc_dev, "%lu bus%s\n", num_roots,
314	    (num_roots > 1) ? "ses" : "");
315
316	for (i = 0; i < num_roots; i++) {
317		char root[32];
318		snprintf(node, sizeof(node), "root-%d", i);
319		xenbus_read(NULL, sc->sc_xbusd->xbusd_otherend, node,
320		    root, sizeof(root));
321		/* split dddd:bb in 2 strings, a la strtok */
322		domain = dev;
323		root[4] = '\0';
324		bus = &root[5];
325		if (strcmp(domain, "0000") != 0) {
326			aprint_error_dev(sc->sc_dev,
327			   "non-zero PCI domain %s not supported\n", domain);
328		} else {
329			busn = strtoul(bus, &ep, 16);
330			if (*ep != '\0')
331				aprint_error_dev(sc->sc_dev,
332				   "%s is not a number\n", bus);
333			else {
334				s = splhigh();
335				xpci_attach_pcibus(0, busn);
336				splx(s);
337			}
338		}
339	}
340
341	xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
342}
343
344static void
345xpci_attach_pcibus(int domain, int busn)
346{
347	struct pcibus_attach_args pba;
348
349	memset(&pba, 0, sizeof(struct pcibus_attach_args));
350	pba.pba_iot = x86_bus_space_io;
351	pba.pba_memt = x86_bus_space_mem;
352	pba.pba_dmat = &pci_bus_dma_tag;
353#ifdef _LP64
354	pba.pba_dmat64 = &pci_bus_dma64_tag;
355#else
356	pba.pba_dmat64 = NULL;
357#endif
358	pba.pba_flags = PCI_FLAGS_MEM_OKAY | PCI_FLAGS_IO_OKAY |
359	    PCI_FLAGS_MRL_OKAY | PCI_FLAGS_MRM_OKAY | PCI_FLAGS_MWI_OKAY;
360	pba.pba_bridgetag = NULL;
361	pba.pba_bus = busn;
362	config_found(xpci_sc->sc_dev, &pba, pcibusprint, CFARGS_NONE);
363}
364
365/* functions required by the MI PCI system */
366
367void
368pci_attach_hook(device_t parent, device_t self, struct pcibus_attach_args *pba)
369{
370	/* nothing */
371}
372
373int
374pci_bus_maxdevs(pci_chipset_tag_t pc, int busno)
375{
376	return 32;
377}
378
379pcitag_t
380pci_make_tag(pci_chipset_tag_t pc, int bus, int device, int function)
381{
382	pcitag_t tag;
383	KASSERT((function & ~0x7) == 0);
384	KASSERT((device & ~0x1f) == 0);
385	KASSERT((bus & ~0xff) == 0);
386	tag.mode1 = (bus << 8) | (device << 3) | (function << 0);
387	return tag;
388}
389
390void
391pci_decompose_tag(pci_chipset_tag_t pc, pcitag_t tag,
392    int *bp, int *dp, int *fp)
393{
394	if (bp != NULL)
395		*bp = (tag.mode1 >> 8) & 0xff;
396	if (dp != NULL)
397		*dp = (tag.mode1 >> 3) & 0x1f;
398	if (fp != NULL)
399		*fp = (tag.mode1 >> 0) & 0x7;
400	return;
401}
402
403static void
404xpci_do_op(struct xen_pci_op *op)
405{
406	struct xen_pci_op *active_op = &xpci_sc->sc_shared->op;
407	static __cpu_simple_lock_t pci_conf_lock = __SIMPLELOCK_UNLOCKED;
408	int s;
409
410	s = splhigh();
411	__cpu_simple_lock(&pci_conf_lock);
412
413	memcpy(active_op, op, sizeof(struct xen_pci_op));
414	x86_sfence();
415	xen_atomic_set_bit(&xpci_sc->sc_shared->flags, _XEN_PCIF_active);
416	hypervisor_notify_via_evtchn(xpci_sc->sc_evtchn);
417	while (xen_atomic_test_bit(&xpci_sc->sc_shared->flags,
418	    _XEN_PCIF_active)) {
419		hypervisor_clear_event(xpci_sc->sc_evtchn);
420		/* HYPERVISOR_yield(); */
421	}
422	memcpy(op, active_op, sizeof(struct xen_pci_op));
423
424	__cpu_simple_unlock(&pci_conf_lock);
425	splx(s);
426}
427
428static int
429xpci_conf_read(pci_chipset_tag_t pc, pcitag_t tag, int reg, int size,
430    pcireg_t *value)
431{
432	int bus, dev, func;
433	struct xen_pci_op op = {
434		.cmd    = XEN_PCI_OP_conf_read,
435		.domain = 0, /* XXX */
436	};
437
438	pci_decompose_tag(pc, tag, &bus, &dev, &func);
439	DPRINTF(("pci_conf_read %d:%d:%d reg 0x%x", bus, dev, func, reg));
440
441	op.bus = bus;
442	op.devfn = (dev << 3) | func;
443	op.offset = reg;
444	op.size = size;
445	xpci_do_op(&op);
446
447	*value = op.value;
448	DPRINTF((" val 0x%x err %d\n", *value, op.err));
449
450	return op.err;
451}
452
453pcireg_t
454pci_conf_read(pci_chipset_tag_t pc, pcitag_t tag, int reg)
455{
456	pcireg_t v;
457
458	xpci_conf_read(pc, tag, reg, 4, &v);
459	return v;
460}
461
462static int
463xpci_conf_write(pci_chipset_tag_t pc, pcitag_t tag, int reg, int size,
464    pcireg_t data)
465{
466	int bus, dev, func;
467	struct xen_pci_op op = {
468		.cmd    = XEN_PCI_OP_conf_write,
469		.domain = 0, /* XXX */
470	};
471
472	pci_decompose_tag(pc, tag, &bus, &dev, &func);
473	DPRINTF(("pci_conf_write %d:%d:%d reg 0x%x val 0x%x", bus, dev, func, reg, data));
474
475	op.bus = bus;
476	op.devfn = (dev << 3) | func;
477	op.offset = reg;
478	op.size = size;
479	op.value = data;
480	xpci_do_op(&op);
481
482	DPRINTF((" err %d\n", op.err));
483
484	return op.err;
485}
486
487void
488pci_conf_write(pci_chipset_tag_t pc, pcitag_t tag, int reg, pcireg_t data)
489{
490	xpci_conf_write(pc, tag, reg, 4, data);
491}
492
493int
494xpci_enumerate_bus1(struct pci_softc *sc, const int *locators,
495    int (*match)(void *, const struct pci_attach_args *), void *cookie,
496    struct pci_attach_args *pap)
497{
498#if 0
499	char *string;
500	char *domain, *bus, *dev, *func, *ep;
501	u_long busn, devn, funcn;
502	char node[10];
503	u_long num_devs;
504	int i;
505	int err;
506	pcitag_t tag;
507	pci_chipset_tag_t pc = sc->sc_pc;
508
509	err = xenbus_read_ul(NULL, xpci_sc->sc_xbusd->xbusd_otherend,
510	   "num_devs", &num_devs, 10);
511	if (err) {
512		aprint_error_dev(xpci_sc->sc_dev,
513		   "can't read num_devs: %d\n", err);
514		return err;
515	}
516	for (i = 0; i < num_devs; i++) {
517		char string[32];
518		snprintf(node, sizeof(node), "dev-%d", i);
519		xenbus_read(NULL, xpci_sc->sc_xbusd->xbusd_otherend,
520		   node, string, sizeof(string));
521		/* split dddd:bb:dd:ff in 4 strings, a la strtok */
522		domain = string;
523		string[4] = '\0';
524		bus = &string[5];
525		string[7] = '\0';
526		dev = &string[8];
527		string[10] = '\0';
528		func = &string[11];
529		if (strcmp(domain, "0000") != 0) {
530			aprint_error_dev(xpci_sc->sc_dev,
531			   "non-zero PCI domain %s not supported\n", domain);
532		} else {
533			busn = strtoul(bus, &ep, 16);
534			if (*ep != '\0') {
535				aprint_error_dev(xpci_sc->sc_dev,
536				   "%s is not a number\n", bus);
537				goto endfor;
538			}
539			devn = strtoul(dev, &ep, 16);
540			if (*ep != '\0') {
541				aprint_error_dev(xpci_sc->sc_dev,
542				   "%s is not a number\n", dev);
543				goto endfor;
544			}
545			funcn = strtoul(func, &ep, 16);
546			if (*ep != '\0') {
547				aprint_error_dev(xpci_sc->sc_dev,
548				   "%s is not a number\n", func);
549				goto endfor;
550			}
551			if (busn != sc->sc_bus)
552				goto endfor;
553			tag = pci_make_tag(pc, busn, devn, funcn);
554			err = pci_probe_device1(sc, tag, match, pap);
555			if (match != NULL && err != 0)
556				return (err);
557		}
558endfor:
559	}
560	return (0);
561#else
562	int devn, funcn;
563	pcitag_t tag;
564	pci_chipset_tag_t pc = sc->sc_pc;
565	int err;
566	/*
567	 * Xen is lacking an important info: the domain:bus:dev:func
568	 * present in dev-xx in the store are real PCI bus:dev:func, and
569	 * xenback may present us fake ones, and unfortunately it's not
570	 * the list of devices is not published in the store with this
571	 * information. So we have to scan all dev/func combination :(
572	 * the MI scan function isn't enough because it doesn't search
573	 * for functions >= 1 if function 0 is not there.
574	 */
575	for (devn = 0; devn < 32; devn++) {
576		for (funcn = 0; funcn < 8; funcn++) {
577			pcireg_t csr, bar;
578			int reg;
579			tag = pci_make_tag(pc, sc->sc_bus, devn, funcn);
580			/* try a READ on device ID. if it fails, no device */
581			if (xpci_conf_read(pc, tag, PCI_ID_REG, 4, &csr) != 0)
582				continue;
583			/* check CSR. linux disable the device, sigh */
584			if (xpci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG, 4,
585			    &csr) != 0) {
586				aprint_error(
587				    "0x%2x:0x%2x:0x%2x failed to read CSR\n",
588				    sc->sc_bus, devn, funcn);
589				continue;
590			}
591			if ((csr &
592			    (PCI_COMMAND_IO_ENABLE|PCI_COMMAND_MEM_ENABLE))
593			    == 0) {
594				/* need to enable the device */
595				for (reg = PCI_MAPREG_START;
596				    reg < PCI_MAPREG_END;
597				    reg += 4) {
598					if (xpci_conf_read(pc, tag, reg, 4,
599					    &bar) != 0) {
600						aprint_error(
601						    "0x%2x:0x%2x:0x%2x "
602						    "failed to read 0x%x\n",
603						    sc->sc_bus, devn, funcn,
604						    reg);
605						goto next;
606
607					}
608					if (bar & PCI_MAPREG_TYPE_IO)
609						csr |= PCI_COMMAND_IO_ENABLE;
610					else if (bar)
611						csr |= PCI_COMMAND_MEM_ENABLE;
612				}
613				DPRINTF(("write CSR 0x%x\n", csr));
614				if (xpci_conf_write(pc, tag,
615				    PCI_COMMAND_STATUS_REG, 4, csr)) {
616					aprint_error(
617					    "0x%2x:0x%2x:0x%2x "
618					    "failed to write CSR\n",
619					     sc->sc_bus, devn, funcn);
620					goto next;
621				}
622			}
623			err = pci_probe_device1(sc, tag, match, pap);
624			if (match != NULL && err != 0)
625				return (err);
626next:
627			continue;
628		}
629	}
630	return 0;
631#endif
632}
633