ntb_hw.c revision 290686
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 290686 2015-11-11 18:56:02Z cem $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/queue.h>
39#include <sys/rman.h>
40#include <sys/sbuf.h>
41#include <sys/sysctl.h>
42#include <vm/vm.h>
43#include <vm/pmap.h>
44#include <machine/bus.h>
45#include <machine/pmap.h>
46#include <machine/resource.h>
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49
50#include "ntb_regs.h"
51#include "ntb_hw.h"
52
53/*
54 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
55 * allows you to connect two systems using a PCI-e link.
56 *
57 * This module contains the hardware abstraction layer for the NTB. It allows
58 * you to send and recieve interrupts, map the memory windows and send and
59 * receive messages in the scratch-pad registers.
60 *
61 * NOTE: Much of the code in this module is shared with Linux. Any patches may
62 * be picked up and redistributed in Linux with a dual GPL/BSD license.
63 */
64
65#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
66
67#define NTB_HB_TIMEOUT		1 /* second */
68#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
69
70#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
71
72enum ntb_device_type {
73	NTB_XEON,
74	NTB_ATOM
75};
76
77/* ntb_conn_type are hardware numbers, cannot change. */
78enum ntb_conn_type {
79	NTB_CONN_TRANSPARENT = 0,
80	NTB_CONN_B2B = 1,
81	NTB_CONN_RP = 2,
82};
83
84enum ntb_b2b_direction {
85	NTB_DEV_USD = 0,
86	NTB_DEV_DSD = 1,
87};
88
89enum ntb_bar {
90	NTB_CONFIG_BAR = 0,
91	NTB_B2B_BAR_1,
92	NTB_B2B_BAR_2,
93	NTB_B2B_BAR_3,
94	NTB_MAX_BARS
95};
96
97/* Device features and workarounds */
98#define HAS_FEATURE(feature)	\
99	((ntb->features & (feature)) != 0)
100
101struct ntb_hw_info {
102	uint32_t		device_id;
103	const char		*desc;
104	enum ntb_device_type	type;
105	uint32_t		features;
106};
107
108struct ntb_pci_bar_info {
109	bus_space_tag_t		pci_bus_tag;
110	bus_space_handle_t	pci_bus_handle;
111	int			pci_resource_id;
112	struct resource		*pci_resource;
113	vm_paddr_t		pbase;
114	caddr_t			vbase;
115	vm_size_t		size;
116
117	/* Configuration register offsets */
118	uint32_t		psz_off;
119	uint32_t		ssz_off;
120	uint32_t		pbarxlat_off;
121};
122
123struct ntb_int_info {
124	struct resource	*res;
125	int		rid;
126	void		*tag;
127};
128
129struct ntb_vec {
130	struct ntb_softc	*ntb;
131	uint32_t		num;
132};
133
134struct ntb_reg {
135	uint32_t	ntb_ctl;
136	uint32_t	lnk_sta;
137	uint8_t		db_size;
138	unsigned	mw_bar[NTB_MAX_BARS];
139};
140
141struct ntb_alt_reg {
142	uint32_t	db_bell;
143	uint32_t	db_mask;
144	uint32_t	spad;
145};
146
147struct ntb_xlat_reg {
148	uint32_t	bar0_base;
149	uint32_t	bar2_base;
150	uint32_t	bar4_base;
151	uint32_t	bar5_base;
152
153	uint32_t	bar2_xlat;
154	uint32_t	bar4_xlat;
155	uint32_t	bar5_xlat;
156
157	uint32_t	bar2_limit;
158	uint32_t	bar4_limit;
159	uint32_t	bar5_limit;
160};
161
162struct ntb_b2b_addr {
163	uint64_t	bar0_addr;
164	uint64_t	bar2_addr64;
165	uint64_t	bar4_addr64;
166	uint64_t	bar4_addr32;
167	uint64_t	bar5_addr32;
168};
169
170struct ntb_softc {
171	device_t		device;
172	enum ntb_device_type	type;
173	uint32_t		features;
174
175	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
176	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
177	uint32_t		allocated_interrupts;
178
179	struct callout		heartbeat_timer;
180	struct callout		lr_timer;
181
182	void			*ntb_ctx;
183	const struct ntb_ctx_ops *ctx_ops;
184	struct ntb_vec		*msix_vec;
185#define CTX_LOCK(sc)		mtx_lock(&(sc)->ctx_lock)
186#define CTX_UNLOCK(sc)		mtx_unlock(&(sc)->ctx_lock)
187#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
188	struct mtx		ctx_lock;
189
190	uint32_t		ppd;
191	enum ntb_conn_type	conn_type;
192	enum ntb_b2b_direction	dev_type;
193
194	/* Offset of peer bar0 in B2B BAR */
195	uint64_t			b2b_off;
196	/* Memory window used to access peer bar0 */
197#define B2B_MW_DISABLED			UINT8_MAX
198	uint8_t				b2b_mw_idx;
199
200	uint8_t				mw_count;
201	uint8_t				spad_count;
202	uint8_t				db_count;
203	uint8_t				db_vec_count;
204	uint8_t				db_vec_shift;
205
206	/* Protects local db_mask. */
207#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
208#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
209#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
210	struct mtx			db_mask_lock;
211
212	volatile uint32_t		ntb_ctl;
213	volatile uint32_t		lnk_sta;
214
215	uint64_t			db_valid_mask;
216	uint64_t			db_link_mask;
217	uint64_t			db_mask;
218
219	int				last_ts;	/* ticks @ last irq */
220
221	const struct ntb_reg		*reg;
222	const struct ntb_alt_reg	*self_reg;
223	const struct ntb_alt_reg	*peer_reg;
224	const struct ntb_xlat_reg	*xlat_reg;
225};
226
227#ifdef __i386__
228static __inline uint64_t
229bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
230    bus_size_t offset)
231{
232
233	return (bus_space_read_4(tag, handle, offset) |
234	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
235}
236
237static __inline void
238bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
239    bus_size_t offset, uint64_t val)
240{
241
242	bus_space_write_4(tag, handle, offset, val);
243	bus_space_write_4(tag, handle, offset + 4, val >> 32);
244}
245#endif
246
247#define ntb_bar_read(SIZE, bar, offset) \
248	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
249	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
250#define ntb_bar_write(SIZE, bar, offset, val) \
251	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
252	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
253#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
254#define ntb_reg_write(SIZE, offset, val) \
255	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
256#define ntb_mw_read(SIZE, offset) \
257	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
258#define ntb_mw_write(SIZE, offset, val) \
259	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
260		offset, val)
261
262static int ntb_probe(device_t device);
263static int ntb_attach(device_t device);
264static int ntb_detach(device_t device);
265static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
266static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
267static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
268    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
269static int ntb_map_pci_bars(struct ntb_softc *ntb);
270static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
271    const char *);
272static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
273static int map_memory_window_bar(struct ntb_softc *ntb,
274    struct ntb_pci_bar_info *bar);
275static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
276static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
277static int ntb_init_isr(struct ntb_softc *ntb);
278static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
279static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
280static void ntb_teardown_interrupts(struct ntb_softc *ntb);
281static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
282static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
283static void ndev_vec_isr(void *arg);
284static void ndev_irq_isr(void *arg);
285static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
286static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
287static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
288static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
289static void ntb_free_msix_vec(struct ntb_softc *ntb);
290static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
291static void ntb_detect_max_mw(struct ntb_softc *ntb);
292static int ntb_detect_xeon(struct ntb_softc *ntb);
293static int ntb_detect_atom(struct ntb_softc *ntb);
294static int ntb_xeon_init_dev(struct ntb_softc *ntb);
295static int ntb_atom_init_dev(struct ntb_softc *ntb);
296static void ntb_teardown_xeon(struct ntb_softc *ntb);
297static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
298static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
299    enum ntb_bar regbar);
300static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
301    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
302static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
303    enum ntb_bar idx);
304static int xeon_setup_b2b_mw(struct ntb_softc *,
305    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
306static inline bool link_is_up(struct ntb_softc *ntb);
307static inline bool atom_link_is_err(struct ntb_softc *ntb);
308static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
309static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
310static void atom_link_hb(void *arg);
311static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
312static void recover_atom_link(void *arg);
313static bool ntb_poll_link(struct ntb_softc *ntb);
314static void save_bar_parameters(struct ntb_pci_bar_info *bar);
315static void ntb_sysctl_init(struct ntb_softc *);
316static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
317static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
318static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
319
320static unsigned g_ntb_hw_debug_level;
321SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
322    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
323#define ntb_printf(lvl, ...) do {				\
324	if ((lvl) <= g_ntb_hw_debug_level) {			\
325		device_printf(ntb->device, __VA_ARGS__);	\
326	}							\
327} while (0)
328
329static struct ntb_hw_info pci_ids[] = {
330	/* XXX: PS/SS IDs left out until they are supported. */
331	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
332		NTB_ATOM, 0 },
333
334	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
335		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
336	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
337		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
338	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
339		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
340		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
341	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
342		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
343		    NTB_SB01BASE_LOCKUP },
344	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
345		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
346		    NTB_SB01BASE_LOCKUP },
347
348	{ 0x00000000, NULL, NTB_ATOM, 0 }
349};
350
351static const struct ntb_reg atom_reg = {
352	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
353	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
354	.db_size = sizeof(uint64_t),
355	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
356};
357
358static const struct ntb_alt_reg atom_pri_reg = {
359	.db_bell = ATOM_PDOORBELL_OFFSET,
360	.db_mask = ATOM_PDBMSK_OFFSET,
361	.spad = ATOM_SPAD_OFFSET,
362};
363
364static const struct ntb_alt_reg atom_b2b_reg = {
365	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
366	.spad = ATOM_B2B_SPAD_OFFSET,
367};
368
369static const struct ntb_xlat_reg atom_sec_xlat = {
370#if 0
371	/* "FIXME" says the Linux driver. */
372	.bar0_base = ATOM_SBAR0BASE_OFFSET,
373	.bar2_base = ATOM_SBAR2BASE_OFFSET,
374	.bar4_base = ATOM_SBAR4BASE_OFFSET,
375
376	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
377	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
378#endif
379
380	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
381	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
382};
383
384static const struct ntb_reg xeon_reg = {
385	.ntb_ctl = XEON_NTBCNTL_OFFSET,
386	.lnk_sta = XEON_LINK_STATUS_OFFSET,
387	.db_size = sizeof(uint16_t),
388	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
389};
390
391static const struct ntb_alt_reg xeon_pri_reg = {
392	.db_bell = XEON_PDOORBELL_OFFSET,
393	.db_mask = XEON_PDBMSK_OFFSET,
394	.spad = XEON_SPAD_OFFSET,
395};
396
397static const struct ntb_alt_reg xeon_b2b_reg = {
398	.db_bell = XEON_B2B_DOORBELL_OFFSET,
399	.spad = XEON_B2B_SPAD_OFFSET,
400};
401
402static const struct ntb_xlat_reg xeon_sec_xlat = {
403	.bar0_base = XEON_SBAR0BASE_OFFSET,
404	.bar2_base = XEON_SBAR2BASE_OFFSET,
405	.bar4_base = XEON_SBAR4BASE_OFFSET,
406	.bar5_base = XEON_SBAR5BASE_OFFSET,
407
408	.bar2_limit = XEON_SBAR2LMT_OFFSET,
409	.bar4_limit = XEON_SBAR4LMT_OFFSET,
410	.bar5_limit = XEON_SBAR5LMT_OFFSET,
411
412	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
413	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
414	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
415};
416
417static struct ntb_b2b_addr xeon_b2b_usd_addr = {
418	.bar0_addr = XEON_B2B_BAR0_USD_ADDR,
419	.bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64,
420	.bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64,
421	.bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32,
422	.bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32,
423};
424
425static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
426	.bar0_addr = XEON_B2B_BAR0_DSD_ADDR,
427	.bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64,
428	.bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64,
429	.bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32,
430	.bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32,
431};
432
433SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
434    "B2B MW segment overrides -- MUST be the same on both sides");
435
436SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
437    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
438    "hardware, use this 64-bit address on the bus between the NTB devices for "
439    "the window at BAR2, on the upstream side of the link.  MUST be the same "
440    "address on both sides.");
441SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
442    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
443SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
444    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
445    "(split-BAR mode).");
446SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
447    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
448    "(split-BAR mode).");
449
450SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
451    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
452    "hardware, use this 64-bit address on the bus between the NTB devices for "
453    "the window at BAR2, on the downstream side of the link.  MUST be the same"
454    " address on both sides.");
455SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
456    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
457SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
458    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
459    "(split-BAR mode).");
460SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
461    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
462    "(split-BAR mode).");
463
464/*
465 * OS <-> Driver interface structures
466 */
467MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
468
469static device_method_t ntb_pci_methods[] = {
470	/* Device interface */
471	DEVMETHOD(device_probe,     ntb_probe),
472	DEVMETHOD(device_attach,    ntb_attach),
473	DEVMETHOD(device_detach,    ntb_detach),
474	DEVMETHOD_END
475};
476
477static driver_t ntb_pci_driver = {
478	"ntb_hw",
479	ntb_pci_methods,
480	sizeof(struct ntb_softc),
481};
482
483static devclass_t ntb_devclass;
484DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
485MODULE_VERSION(ntb_hw, 1);
486
487SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
488
489/*
490 * OS <-> Driver linkage functions
491 */
492static int
493ntb_probe(device_t device)
494{
495	struct ntb_hw_info *p;
496
497	p = ntb_get_device_info(pci_get_devid(device));
498	if (p == NULL)
499		return (ENXIO);
500
501	device_set_desc(device, p->desc);
502	return (0);
503}
504
505static int
506ntb_attach(device_t device)
507{
508	struct ntb_softc *ntb;
509	struct ntb_hw_info *p;
510	int error;
511
512	ntb = DEVICE2SOFTC(device);
513	p = ntb_get_device_info(pci_get_devid(device));
514
515	ntb->device = device;
516	ntb->type = p->type;
517	ntb->features = p->features;
518	ntb->b2b_mw_idx = B2B_MW_DISABLED;
519
520	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
521	callout_init(&ntb->heartbeat_timer, 1);
522	callout_init(&ntb->lr_timer, 1);
523	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
524	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
525
526	if (ntb->type == NTB_ATOM)
527		error = ntb_detect_atom(ntb);
528	else
529		error = ntb_detect_xeon(ntb);
530	if (error != 0)
531		goto out;
532
533	ntb_detect_max_mw(ntb);
534
535	pci_enable_busmaster(ntb->device);
536
537	error = ntb_map_pci_bars(ntb);
538	if (error != 0)
539		goto out;
540	if (ntb->type == NTB_ATOM)
541		error = ntb_atom_init_dev(ntb);
542	else
543		error = ntb_xeon_init_dev(ntb);
544	if (error != 0)
545		goto out;
546
547	ntb_poll_link(ntb);
548
549	ntb_sysctl_init(ntb);
550
551out:
552	if (error != 0)
553		ntb_detach(device);
554	return (error);
555}
556
557static int
558ntb_detach(device_t device)
559{
560	struct ntb_softc *ntb;
561
562	ntb = DEVICE2SOFTC(device);
563
564	if (ntb->self_reg != NULL)
565		ntb_db_set_mask(ntb, ntb->db_valid_mask);
566	callout_drain(&ntb->heartbeat_timer);
567	callout_drain(&ntb->lr_timer);
568	pci_disable_busmaster(ntb->device);
569	if (ntb->type == NTB_XEON)
570		ntb_teardown_xeon(ntb);
571	ntb_teardown_interrupts(ntb);
572
573	mtx_destroy(&ntb->db_mask_lock);
574	mtx_destroy(&ntb->ctx_lock);
575
576	/*
577	 * Redetect total MWs so we unmap properly -- in case we lowered the
578	 * maximum to work around Xeon errata.
579	 */
580	ntb_detect_max_mw(ntb);
581	ntb_unmap_pci_bar(ntb);
582
583	return (0);
584}
585
586/*
587 * Driver internal routines
588 */
589static inline enum ntb_bar
590ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
591{
592
593	KASSERT(mw < ntb->mw_count ||
594	    (mw != B2B_MW_DISABLED && mw == ntb->b2b_mw_idx),
595	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
596	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
597
598	return (ntb->reg->mw_bar[mw]);
599}
600
601static inline bool
602bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
603{
604	/* XXX This assertion could be stronger. */
605	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
606	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
607}
608
609static inline void
610bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
611    uint32_t *xlat, uint32_t *lmt)
612{
613	uint32_t basev, lmtv, xlatv;
614
615	switch (bar) {
616	case NTB_B2B_BAR_1:
617		basev = ntb->xlat_reg->bar2_base;
618		lmtv = ntb->xlat_reg->bar2_limit;
619		xlatv = ntb->xlat_reg->bar2_xlat;
620		break;
621	case NTB_B2B_BAR_2:
622		basev = ntb->xlat_reg->bar4_base;
623		lmtv = ntb->xlat_reg->bar4_limit;
624		xlatv = ntb->xlat_reg->bar4_xlat;
625		break;
626	case NTB_B2B_BAR_3:
627		basev = ntb->xlat_reg->bar5_base;
628		lmtv = ntb->xlat_reg->bar5_limit;
629		xlatv = ntb->xlat_reg->bar5_xlat;
630		break;
631	default:
632		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
633		    ("bad bar"));
634		basev = lmtv = xlatv = 0;
635		break;
636	}
637
638	if (base != NULL)
639		*base = basev;
640	if (xlat != NULL)
641		*xlat = xlatv;
642	if (lmt != NULL)
643		*lmt = lmtv;
644}
645
646static int
647ntb_map_pci_bars(struct ntb_softc *ntb)
648{
649	int rc;
650
651	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
652	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
653	if (rc != 0)
654		goto out;
655
656	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
657	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
658	if (rc != 0)
659		goto out;
660	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
661	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
662	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
663
664	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
665	/* XXX Are shared MW B2Bs write-combining? */
666	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP) && !HAS_FEATURE(NTB_SPLIT_BAR))
667		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
668	else
669		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
670	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
671	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
672	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
673
674	if (!HAS_FEATURE(NTB_SPLIT_BAR))
675		goto out;
676
677	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
678	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
679		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
680	else
681		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
682	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
683	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
684	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
685
686out:
687	if (rc != 0)
688		device_printf(ntb->device,
689		    "unable to allocate pci resource\n");
690	return (rc);
691}
692
693static void
694print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
695    const char *kind)
696{
697
698	device_printf(ntb->device,
699	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
700	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
701	    (char *)bar->vbase + bar->size - 1,
702	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
703	    (uintmax_t)bar->size, kind);
704}
705
706static int
707map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
708{
709
710	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
711	    &bar->pci_resource_id, RF_ACTIVE);
712	if (bar->pci_resource == NULL)
713		return (ENXIO);
714
715	save_bar_parameters(bar);
716	print_map_success(ntb, bar, "mmr");
717	return (0);
718}
719
720static int
721map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
722{
723	int rc;
724	uint8_t bar_size_bits = 0;
725
726	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
727	    &bar->pci_resource_id, RF_ACTIVE);
728
729	if (bar->pci_resource == NULL)
730		return (ENXIO);
731
732	save_bar_parameters(bar);
733	/*
734	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
735	 * hardware issue. To work around this, query the size it should be
736	 * configured to by the device and modify the resource to correspond to
737	 * this new size. The BIOS on systems with this problem is required to
738	 * provide enough address space to allow the driver to make this change
739	 * safely.
740	 *
741	 * Ideally I could have just specified the size when I allocated the
742	 * resource like:
743	 *  bus_alloc_resource(ntb->device,
744	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
745	 *	1ul << bar_size_bits, RF_ACTIVE);
746	 * but the PCI driver does not honor the size in this call, so we have
747	 * to modify it after the fact.
748	 */
749	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
750		if (bar->pci_resource_id == PCIR_BAR(2))
751			bar_size_bits = pci_read_config(ntb->device,
752			    XEON_PBAR23SZ_OFFSET, 1);
753		else
754			bar_size_bits = pci_read_config(ntb->device,
755			    XEON_PBAR45SZ_OFFSET, 1);
756
757		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
758		    bar->pci_resource, bar->pbase,
759		    bar->pbase + (1ul << bar_size_bits) - 1);
760		if (rc != 0) {
761			device_printf(ntb->device,
762			    "unable to resize bar\n");
763			return (rc);
764		}
765
766		save_bar_parameters(bar);
767	}
768
769	/* Mark bar region as write combining to improve performance. */
770	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
771	    VM_MEMATTR_WRITE_COMBINING);
772	print_map_success(ntb, bar, "mw");
773	if (rc == 0)
774		device_printf(ntb->device,
775		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
776		    "WRITE_COMBINING.\n",
777		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
778		    (char *)bar->vbase + bar->size - 1,
779		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1));
780	else
781		device_printf(ntb->device,
782		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
783		    "WRITE_COMBINING: %d\n",
784		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
785		    (char *)bar->vbase + bar->size - 1,
786		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
787		    rc);
788		/* Proceed anyway */
789	return (0);
790}
791
792static void
793ntb_unmap_pci_bar(struct ntb_softc *ntb)
794{
795	struct ntb_pci_bar_info *current_bar;
796	int i;
797
798	for (i = 0; i < NTB_MAX_BARS; i++) {
799		current_bar = &ntb->bar_info[i];
800		if (current_bar->pci_resource != NULL)
801			bus_release_resource(ntb->device, SYS_RES_MEMORY,
802			    current_bar->pci_resource_id,
803			    current_bar->pci_resource);
804	}
805}
806
807static int
808ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
809{
810	uint32_t i;
811	int rc;
812
813	for (i = 0; i < num_vectors; i++) {
814		ntb->int_info[i].rid = i + 1;
815		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
816		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
817		if (ntb->int_info[i].res == NULL) {
818			device_printf(ntb->device,
819			    "bus_alloc_resource failed\n");
820			return (ENOMEM);
821		}
822		ntb->int_info[i].tag = NULL;
823		ntb->allocated_interrupts++;
824		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
825		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
826		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
827		if (rc != 0) {
828			device_printf(ntb->device, "bus_setup_intr failed\n");
829			return (ENXIO);
830		}
831	}
832	return (0);
833}
834
835/*
836 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
837 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
838 * should be okay.  This tunable should enable us to test that hypothesis
839 * when someone gets their hands on some Xeon hardware.
840 */
841static int ntb_force_remap_mode;
842SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
843    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
844    " to a smaller number of ithreads, even if the desired number are "
845    "available");
846
847/*
848 * In case it is NOT ok, give consumers an abort button.
849 */
850static int ntb_prefer_intx;
851SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
852    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
853    "than remapping MSI-X messages over available slots (match Linux driver "
854    "behavior)");
855
856/*
857 * Remap the desired number of MSI-X messages to available ithreads in a simple
858 * round-robin fashion.
859 */
860static int
861ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
862{
863	u_int *vectors;
864	uint32_t i;
865	int rc;
866
867	if (ntb_prefer_intx != 0)
868		return (ENXIO);
869
870	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
871
872	for (i = 0; i < desired; i++)
873		vectors[i] = (i % avail) + 1;
874
875	rc = pci_remap_msix(dev, desired, vectors);
876	free(vectors, M_NTB);
877	return (rc);
878}
879
880static int
881ntb_init_isr(struct ntb_softc *ntb)
882{
883	uint32_t desired_vectors, num_vectors;
884	int rc;
885
886	ntb->allocated_interrupts = 0;
887	ntb->last_ts = ticks;
888
889	/*
890	 * Mask all doorbell interrupts.
891	 */
892	ntb_db_set_mask(ntb, ntb->db_valid_mask);
893
894	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
895	    ntb->db_count);
896	if (desired_vectors >= 1) {
897		rc = pci_alloc_msix(ntb->device, &num_vectors);
898
899		if (ntb_force_remap_mode != 0 && rc == 0 &&
900		    num_vectors == desired_vectors)
901			num_vectors--;
902
903		if (rc == 0 && num_vectors < desired_vectors) {
904			rc = ntb_remap_msix(ntb->device, desired_vectors,
905			    num_vectors);
906			if (rc == 0)
907				num_vectors = desired_vectors;
908			else
909				pci_release_msi(ntb->device);
910		}
911		if (rc != 0)
912			num_vectors = 1;
913	} else
914		num_vectors = 1;
915
916	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
917		ntb->db_vec_count = 1;
918		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
919		rc = ntb_setup_legacy_interrupt(ntb);
920	} else {
921		ntb_create_msix_vec(ntb, num_vectors);
922		rc = ntb_setup_msix(ntb, num_vectors);
923	}
924	if (rc != 0) {
925		device_printf(ntb->device,
926		    "Error allocating interrupts: %d\n", rc);
927		ntb_free_msix_vec(ntb);
928	}
929
930	return (rc);
931}
932
933static int
934ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
935{
936	int rc;
937
938	ntb->int_info[0].rid = 0;
939	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
940	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
941	if (ntb->int_info[0].res == NULL) {
942		device_printf(ntb->device, "bus_alloc_resource failed\n");
943		return (ENOMEM);
944	}
945
946	ntb->int_info[0].tag = NULL;
947	ntb->allocated_interrupts = 1;
948
949	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
950	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
951	    ntb, &ntb->int_info[0].tag);
952	if (rc != 0) {
953		device_printf(ntb->device, "bus_setup_intr failed\n");
954		return (ENXIO);
955	}
956
957	return (0);
958}
959
960static void
961ntb_teardown_interrupts(struct ntb_softc *ntb)
962{
963	struct ntb_int_info *current_int;
964	int i;
965
966	for (i = 0; i < ntb->allocated_interrupts; i++) {
967		current_int = &ntb->int_info[i];
968		if (current_int->tag != NULL)
969			bus_teardown_intr(ntb->device, current_int->res,
970			    current_int->tag);
971
972		if (current_int->res != NULL)
973			bus_release_resource(ntb->device, SYS_RES_IRQ,
974			    rman_get_rid(current_int->res), current_int->res);
975	}
976
977	ntb_free_msix_vec(ntb);
978	pci_release_msi(ntb->device);
979}
980
981/*
982 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
983 * out to make code clearer.
984 */
985static inline uint64_t
986db_ioread(struct ntb_softc *ntb, uint64_t regoff)
987{
988
989	if (ntb->type == NTB_ATOM)
990		return (ntb_reg_read(8, regoff));
991
992	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
993
994	return (ntb_reg_read(2, regoff));
995}
996
997static inline void
998db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
999{
1000
1001	KASSERT((val & ~ntb->db_valid_mask) == 0,
1002	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1003	     (uintmax_t)(val & ~ntb->db_valid_mask),
1004	     (uintmax_t)ntb->db_valid_mask));
1005
1006	if (regoff == ntb->self_reg->db_mask)
1007		DB_MASK_ASSERT(ntb, MA_OWNED);
1008	db_iowrite_raw(ntb, regoff, val);
1009}
1010
1011static inline void
1012db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1013{
1014
1015	if (ntb->type == NTB_ATOM) {
1016		ntb_reg_write(8, regoff, val);
1017		return;
1018	}
1019
1020	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1021	ntb_reg_write(2, regoff, (uint16_t)val);
1022}
1023
1024void
1025ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1026{
1027
1028	DB_MASK_LOCK(ntb);
1029	ntb->db_mask |= bits;
1030	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1031	DB_MASK_UNLOCK(ntb);
1032}
1033
1034void
1035ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1036{
1037
1038	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1039	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1040	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1041	     (uintmax_t)ntb->db_valid_mask));
1042
1043	DB_MASK_LOCK(ntb);
1044	ntb->db_mask &= ~bits;
1045	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1046	DB_MASK_UNLOCK(ntb);
1047}
1048
1049uint64_t
1050ntb_db_read(struct ntb_softc *ntb)
1051{
1052
1053	return (db_ioread(ntb, ntb->self_reg->db_bell));
1054}
1055
1056void
1057ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1058{
1059
1060	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1061	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1062	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1063	     (uintmax_t)ntb->db_valid_mask));
1064
1065	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1066}
1067
1068static inline uint64_t
1069ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1070{
1071	uint64_t shift, mask;
1072
1073	shift = ntb->db_vec_shift;
1074	mask = (1ull << shift) - 1;
1075	return (mask << (shift * db_vector));
1076}
1077
1078static void
1079ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1080{
1081	uint64_t vec_mask;
1082
1083	ntb->last_ts = ticks;
1084	vec_mask = ntb_vec_mask(ntb, vec);
1085
1086	if ((vec_mask & ntb->db_link_mask) != 0) {
1087		if (ntb_poll_link(ntb))
1088			ntb_link_event(ntb);
1089	}
1090
1091	if ((vec_mask & ntb->db_valid_mask) != 0)
1092		ntb_db_event(ntb, vec);
1093}
1094
1095static void
1096ndev_vec_isr(void *arg)
1097{
1098	struct ntb_vec *nvec = arg;
1099
1100	ntb_interrupt(nvec->ntb, nvec->num);
1101}
1102
1103static void
1104ndev_irq_isr(void *arg)
1105{
1106	/* If we couldn't set up MSI-X, we only have the one vector. */
1107	ntb_interrupt(arg, 0);
1108}
1109
1110static int
1111ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1112{
1113	uint32_t i;
1114
1115	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1116	    M_ZERO | M_WAITOK);
1117	for (i = 0; i < num_vectors; i++) {
1118		ntb->msix_vec[i].num = i;
1119		ntb->msix_vec[i].ntb = ntb;
1120	}
1121
1122	return (0);
1123}
1124
1125static void
1126ntb_free_msix_vec(struct ntb_softc *ntb)
1127{
1128
1129	if (ntb->msix_vec == NULL)
1130		return;
1131
1132	free(ntb->msix_vec, M_NTB);
1133	ntb->msix_vec = NULL;
1134}
1135
1136static struct ntb_hw_info *
1137ntb_get_device_info(uint32_t device_id)
1138{
1139	struct ntb_hw_info *ep = pci_ids;
1140
1141	while (ep->device_id) {
1142		if (ep->device_id == device_id)
1143			return (ep);
1144		++ep;
1145	}
1146	return (NULL);
1147}
1148
1149static void
1150ntb_teardown_xeon(struct ntb_softc *ntb)
1151{
1152
1153	if (ntb->reg != NULL)
1154		ntb_link_disable(ntb);
1155}
1156
1157static void
1158ntb_detect_max_mw(struct ntb_softc *ntb)
1159{
1160
1161	if (ntb->type == NTB_ATOM) {
1162		ntb->mw_count = ATOM_MW_COUNT;
1163		return;
1164	}
1165
1166	if (HAS_FEATURE(NTB_SPLIT_BAR))
1167		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1168	else
1169		ntb->mw_count = XEON_SNB_MW_COUNT;
1170}
1171
1172static int
1173ntb_detect_xeon(struct ntb_softc *ntb)
1174{
1175	uint8_t ppd, conn_type;
1176
1177	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1178	ntb->ppd = ppd;
1179
1180	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1181		ntb->dev_type = NTB_DEV_DSD;
1182	else
1183		ntb->dev_type = NTB_DEV_USD;
1184
1185	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1186		ntb->features |= NTB_SPLIT_BAR;
1187
1188	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1189	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1190		ntb->features |= NTB_SDOORBELL_LOCKUP;
1191
1192	conn_type = ppd & XEON_PPD_CONN_TYPE;
1193	switch (conn_type) {
1194	case NTB_CONN_B2B:
1195		ntb->conn_type = conn_type;
1196		break;
1197	case NTB_CONN_RP:
1198	case NTB_CONN_TRANSPARENT:
1199	default:
1200		device_printf(ntb->device, "Unsupported connection type: %u\n",
1201		    (unsigned)conn_type);
1202		return (ENXIO);
1203	}
1204	return (0);
1205}
1206
1207static int
1208ntb_detect_atom(struct ntb_softc *ntb)
1209{
1210	uint32_t ppd, conn_type;
1211
1212	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1213	ntb->ppd = ppd;
1214
1215	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1216		ntb->dev_type = NTB_DEV_DSD;
1217	else
1218		ntb->dev_type = NTB_DEV_USD;
1219
1220	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1221	switch (conn_type) {
1222	case NTB_CONN_B2B:
1223		ntb->conn_type = conn_type;
1224		break;
1225	default:
1226		device_printf(ntb->device, "Unsupported NTB configuration\n");
1227		return (ENXIO);
1228	}
1229	return (0);
1230}
1231
1232static int
1233ntb_xeon_init_dev(struct ntb_softc *ntb)
1234{
1235	int rc;
1236
1237	ntb->spad_count		= XEON_SPAD_COUNT;
1238	ntb->db_count		= XEON_DB_COUNT;
1239	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1240	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1241	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1242
1243	if (ntb->conn_type != NTB_CONN_B2B) {
1244		device_printf(ntb->device, "Connection type %d not supported\n",
1245		    ntb->conn_type);
1246		return (ENXIO);
1247	}
1248
1249	ntb->reg = &xeon_reg;
1250	ntb->self_reg = &xeon_pri_reg;
1251	ntb->peer_reg = &xeon_b2b_reg;
1252	ntb->xlat_reg = &xeon_sec_xlat;
1253
1254	/*
1255	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1256	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1257	 * which may hang the system.  To workaround this use the second memory
1258	 * window to access the interrupt and scratch pad registers on the
1259	 * remote system.
1260	 */
1261	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1262		/* Use the last MW for mapping remote spad */
1263		ntb->b2b_mw_idx = ntb->mw_count - 1;
1264	else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1265		/*
1266		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1267		 * mirrored to the remote system.  Shrink the number of bits by one,
1268		 * since bit 14 is the last bit.
1269		 *
1270		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1271		 * anyway.  Nor for non-B2B connection types.
1272		 */
1273		ntb->db_count = XEON_DB_COUNT - 1;
1274
1275	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1276
1277	if (ntb->dev_type == NTB_DEV_USD)
1278		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1279		    &xeon_b2b_usd_addr);
1280	else
1281		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1282		    &xeon_b2b_dsd_addr);
1283	if (rc != 0)
1284		return (rc);
1285
1286	/* Enable Bus Master and Memory Space on the secondary side */
1287	ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1288	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1289
1290	/*
1291	 * Mask all doorbell interrupts.
1292	 */
1293	ntb_db_set_mask(ntb, ntb->db_valid_mask);
1294
1295	rc = ntb_init_isr(ntb);
1296	return (rc);
1297}
1298
1299static int
1300ntb_atom_init_dev(struct ntb_softc *ntb)
1301{
1302	int error;
1303
1304	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1305	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1306
1307	ntb->spad_count		 = ATOM_SPAD_COUNT;
1308	ntb->db_count		 = ATOM_DB_COUNT;
1309	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1310	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1311	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1312
1313	ntb->reg = &atom_reg;
1314	ntb->self_reg = &atom_pri_reg;
1315	ntb->peer_reg = &atom_b2b_reg;
1316	ntb->xlat_reg = &atom_sec_xlat;
1317
1318	/*
1319	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1320	 * resolved.  Mask transaction layer internal parity errors.
1321	 */
1322	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1323
1324	configure_atom_secondary_side_bars(ntb);
1325
1326	/* Enable Bus Master and Memory Space on the secondary side */
1327	ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1328	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1329
1330	error = ntb_init_isr(ntb);
1331	if (error != 0)
1332		return (error);
1333
1334	/* Initiate PCI-E link training */
1335	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1336
1337	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1338
1339	return (0);
1340}
1341
1342/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1343static void
1344configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1345{
1346
1347	if (ntb->dev_type == NTB_DEV_USD) {
1348		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1349		    XEON_B2B_BAR2_DSD_ADDR64);
1350		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1351		    XEON_B2B_BAR4_DSD_ADDR64);
1352		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_USD_ADDR64);
1353		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_USD_ADDR64);
1354	} else {
1355		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1356		    XEON_B2B_BAR2_USD_ADDR64);
1357		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1358		    XEON_B2B_BAR4_USD_ADDR64);
1359		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_DSD_ADDR64);
1360		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_DSD_ADDR64);
1361	}
1362}
1363
1364
1365/*
1366 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1367 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1368 * remains for use by a higher layer.
1369 *
1370 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1371 * MW size is sufficiently large.
1372 */
1373static unsigned int ntb_b2b_mw_share;
1374SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1375    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1376    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1377    "value here.");
1378
1379static void
1380xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1381    enum ntb_bar regbar)
1382{
1383	struct ntb_pci_bar_info *bar;
1384	uint8_t bar_sz;
1385
1386	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1387		return;
1388
1389	bar = &ntb->bar_info[idx];
1390	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1391	if (idx == regbar) {
1392		if (ntb->b2b_off != 0)
1393			bar_sz--;
1394		else
1395			bar_sz = 0;
1396	}
1397	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1398	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1399	(void)bar_sz;
1400}
1401
1402static void
1403xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1404    enum ntb_bar idx, enum ntb_bar regbar)
1405{
1406	uint64_t reg_val;
1407	uint32_t base_reg, lmt_reg;
1408
1409	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1410	if (idx == regbar)
1411		bar_addr += ntb->b2b_off;
1412
1413	if (!bar_is_64bit(ntb, idx)) {
1414		ntb_reg_write(4, base_reg, bar_addr);
1415		reg_val = ntb_reg_read(4, base_reg);
1416		(void)reg_val;
1417
1418		ntb_reg_write(4, lmt_reg, bar_addr);
1419		reg_val = ntb_reg_read(4, lmt_reg);
1420		(void)reg_val;
1421	} else {
1422		ntb_reg_write(8, base_reg, bar_addr);
1423		reg_val = ntb_reg_read(8, base_reg);
1424		(void)reg_val;
1425
1426		ntb_reg_write(8, lmt_reg, bar_addr);
1427		reg_val = ntb_reg_read(8, lmt_reg);
1428		(void)reg_val;
1429	}
1430}
1431
1432static void
1433xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1434{
1435	struct ntb_pci_bar_info *bar;
1436
1437	bar = &ntb->bar_info[idx];
1438	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1439		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1440		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1441	} else {
1442		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1443		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1444	}
1445	(void)base_addr;
1446}
1447
1448static int
1449xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1450    const struct ntb_b2b_addr *peer_addr)
1451{
1452	struct ntb_pci_bar_info *b2b_bar;
1453	vm_size_t bar_size;
1454	uint64_t bar_addr;
1455	enum ntb_bar b2b_bar_num, i;
1456
1457	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1458		b2b_bar = NULL;
1459		b2b_bar_num = NTB_CONFIG_BAR;
1460		ntb->b2b_off = 0;
1461	} else {
1462		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1463		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1464		    ("invalid b2b mw bar"));
1465
1466		b2b_bar = &ntb->bar_info[b2b_bar_num];
1467		bar_size = b2b_bar->size;
1468
1469		if (ntb_b2b_mw_share != 0 &&
1470		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1471			ntb->b2b_off = bar_size >> 1;
1472		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1473			ntb->b2b_off = 0;
1474			ntb->mw_count--;
1475		} else {
1476			device_printf(ntb->device,
1477			    "B2B bar size is too small!\n");
1478			return (EIO);
1479		}
1480	}
1481
1482	/*
1483	 * Reset the secondary bar sizes to match the primary bar sizes.
1484	 * (Except, disable or halve the size of the B2B secondary bar.)
1485	 */
1486	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1487		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1488
1489	bar_addr = 0;
1490	if (b2b_bar_num == NTB_CONFIG_BAR)
1491		bar_addr = addr->bar0_addr;
1492	else if (b2b_bar_num == NTB_B2B_BAR_1)
1493		bar_addr = addr->bar2_addr64;
1494	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1495		bar_addr = addr->bar4_addr64;
1496	else if (b2b_bar_num == NTB_B2B_BAR_2)
1497		bar_addr = addr->bar4_addr32;
1498	else if (b2b_bar_num == NTB_B2B_BAR_3)
1499		bar_addr = addr->bar5_addr32;
1500	else
1501		KASSERT(false, ("invalid bar"));
1502
1503	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1504
1505	/*
1506	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1507	 * register BAR.  The B2B BAR is either disabled above or configured
1508	 * half-size.  It starts at PBAR xlat + offset.
1509	 *
1510	 * Also set up incoming BAR limits == base (zero length window).
1511	 */
1512	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1513	    b2b_bar_num);
1514	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1515		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1516		    NTB_B2B_BAR_2, b2b_bar_num);
1517		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1518		    NTB_B2B_BAR_3, b2b_bar_num);
1519	} else
1520		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1521		    NTB_B2B_BAR_2, b2b_bar_num);
1522
1523	/* Zero incoming translation addrs */
1524	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1525	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1526
1527	/* Zero outgoing translation limits (whole bar size windows) */
1528	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1529	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1530
1531	/* Set outgoing translation offsets */
1532	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1533	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1534		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1535		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1536	} else
1537		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1538
1539	/* Set the translation offset for B2B registers */
1540	bar_addr = 0;
1541	if (b2b_bar_num == NTB_CONFIG_BAR)
1542		bar_addr = peer_addr->bar0_addr;
1543	else if (b2b_bar_num == NTB_B2B_BAR_1)
1544		bar_addr = peer_addr->bar2_addr64;
1545	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1546		bar_addr = peer_addr->bar4_addr64;
1547	else if (b2b_bar_num == NTB_B2B_BAR_2)
1548		bar_addr = peer_addr->bar4_addr32;
1549	else if (b2b_bar_num == NTB_B2B_BAR_3)
1550		bar_addr = peer_addr->bar5_addr32;
1551	else
1552		KASSERT(false, ("invalid bar"));
1553
1554	/*
1555	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1556	 * at a time.
1557	 */
1558	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1559	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1560	return (0);
1561}
1562
1563static inline bool
1564link_is_up(struct ntb_softc *ntb)
1565{
1566
1567	if (ntb->type == NTB_XEON) {
1568		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1569			return (true);
1570		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1571	}
1572
1573	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1574	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1575}
1576
1577static inline bool
1578atom_link_is_err(struct ntb_softc *ntb)
1579{
1580	uint32_t status;
1581
1582	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1583
1584	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1585	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1586		return (true);
1587
1588	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1589	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1590}
1591
1592/* Atom does not have link status interrupt, poll on that platform */
1593static void
1594atom_link_hb(void *arg)
1595{
1596	struct ntb_softc *ntb = arg;
1597	sbintime_t timo, poll_ts;
1598
1599	timo = NTB_HB_TIMEOUT * hz;
1600	poll_ts = ntb->last_ts + timo;
1601
1602	/*
1603	 * Delay polling the link status if an interrupt was received, unless
1604	 * the cached link status says the link is down.
1605	 */
1606	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1607		timo = poll_ts - ticks;
1608		goto out;
1609	}
1610
1611	if (ntb_poll_link(ntb))
1612		ntb_link_event(ntb);
1613
1614	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1615		/* Link is down with error, proceed with recovery */
1616		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1617		return;
1618	}
1619
1620out:
1621	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1622}
1623
1624static void
1625atom_perform_link_restart(struct ntb_softc *ntb)
1626{
1627	uint32_t status;
1628
1629	/* Driver resets the NTB ModPhy lanes - magic! */
1630	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1631	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1632	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1633	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1634
1635	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1636	pause("ModPhy", hz / 10);
1637
1638	/* Clear AER Errors, write to clear */
1639	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1640	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1641	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1642
1643	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1644	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1645	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1646	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1647
1648	/* Clear DeSkew Buffer error, write to clear */
1649	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1650	status |= ATOM_DESKEWSTS_DBERR;
1651	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1652
1653	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1654	status &= ATOM_IBIST_ERR_OFLOW;
1655	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1656
1657	/* Releases the NTB state machine to allow the link to retrain */
1658	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1659	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1660	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1661}
1662
1663/*
1664 * ntb_set_ctx() - associate a driver context with an ntb device
1665 * @ntb:        NTB device context
1666 * @ctx:        Driver context
1667 * @ctx_ops:    Driver context operations
1668 *
1669 * Associate a driver context and operations with a ntb device.  The context is
1670 * provided by the client driver, and the driver may associate a different
1671 * context with each ntb device.
1672 *
1673 * Return: Zero if the context is associated, otherwise an error number.
1674 */
1675int
1676ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1677{
1678
1679	if (ctx == NULL || ops == NULL)
1680		return (EINVAL);
1681	if (ntb->ctx_ops != NULL)
1682		return (EINVAL);
1683
1684	CTX_LOCK(ntb);
1685	if (ntb->ctx_ops != NULL) {
1686		CTX_UNLOCK(ntb);
1687		return (EINVAL);
1688	}
1689	ntb->ntb_ctx = ctx;
1690	ntb->ctx_ops = ops;
1691	CTX_UNLOCK(ntb);
1692
1693	return (0);
1694}
1695
1696/*
1697 * It is expected that this will only be used from contexts where the ctx_lock
1698 * is not needed to protect ntb_ctx lifetime.
1699 */
1700void *
1701ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1702{
1703
1704	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1705	if (ops != NULL)
1706		*ops = ntb->ctx_ops;
1707	return (ntb->ntb_ctx);
1708}
1709
1710/*
1711 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1712 * @ntb:        NTB device context
1713 *
1714 * Clear any association that may exist between a driver context and the ntb
1715 * device.
1716 */
1717void
1718ntb_clear_ctx(struct ntb_softc *ntb)
1719{
1720
1721	CTX_LOCK(ntb);
1722	ntb->ntb_ctx = NULL;
1723	ntb->ctx_ops = NULL;
1724	CTX_UNLOCK(ntb);
1725}
1726
1727/*
1728 * ntb_link_event() - notify driver context of a change in link status
1729 * @ntb:        NTB device context
1730 *
1731 * Notify the driver context that the link status may have changed.  The driver
1732 * should call ntb_link_is_up() to get the current status.
1733 */
1734void
1735ntb_link_event(struct ntb_softc *ntb)
1736{
1737
1738	CTX_LOCK(ntb);
1739	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1740		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1741	CTX_UNLOCK(ntb);
1742}
1743
1744/*
1745 * ntb_db_event() - notify driver context of a doorbell event
1746 * @ntb:        NTB device context
1747 * @vector:     Interrupt vector number
1748 *
1749 * Notify the driver context of a doorbell event.  If hardware supports
1750 * multiple interrupt vectors for doorbells, the vector number indicates which
1751 * vector received the interrupt.  The vector number is relative to the first
1752 * vector used for doorbells, starting at zero, and must be less than
1753 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1754 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1755 * those bits are associated with the vector number.
1756 */
1757static void
1758ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1759{
1760
1761	CTX_LOCK(ntb);
1762	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1763		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1764	CTX_UNLOCK(ntb);
1765}
1766
1767/*
1768 * ntb_link_enable() - enable the link on the secondary side of the ntb
1769 * @ntb:        NTB device context
1770 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1771 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1772 *
1773 * Enable the link on the secondary side of the ntb.  This can only be done
1774 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1775 * should train the link to its maximum speed and width, or the requested speed
1776 * and width, whichever is smaller, if supported.
1777 *
1778 * Return: Zero on success, otherwise an error number.
1779 *
1780 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1781 *      and width input will be ignored.
1782 */
1783int
1784ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1785    enum ntb_width w __unused)
1786{
1787	uint32_t cntl;
1788
1789	if (ntb->type == NTB_ATOM) {
1790		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1791		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1792		return (0);
1793	}
1794
1795	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1796		ntb_link_event(ntb);
1797		return (0);
1798	}
1799
1800	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1801	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1802	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1803	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1804	if (HAS_FEATURE(NTB_SPLIT_BAR))
1805		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1806	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1807	return (0);
1808}
1809
1810/*
1811 * ntb_link_disable() - disable the link on the secondary side of the ntb
1812 * @ntb:        NTB device context
1813 *
1814 * Disable the link on the secondary side of the ntb.  This can only be done
1815 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1816 * should disable the link.  Returning from this call must indicate that a
1817 * barrier has passed, though with no more writes may pass in either direction
1818 * across the link, except if this call returns an error number.
1819 *
1820 * Return: Zero on success, otherwise an error number.
1821 */
1822int
1823ntb_link_disable(struct ntb_softc *ntb)
1824{
1825	uint32_t cntl;
1826
1827	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1828		ntb_link_event(ntb);
1829		return (0);
1830	}
1831
1832	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1833	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1834	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1835	if (HAS_FEATURE(NTB_SPLIT_BAR))
1836		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1837	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1838	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1839	return (0);
1840}
1841
1842static void
1843recover_atom_link(void *arg)
1844{
1845	struct ntb_softc *ntb = arg;
1846	unsigned speed, width, oldspeed, oldwidth;
1847	uint32_t status32;
1848
1849	atom_perform_link_restart(ntb);
1850
1851	/*
1852	 * There is a potential race between the 2 NTB devices recovering at
1853	 * the same time.  If the times are the same, the link will not recover
1854	 * and the driver will be stuck in this loop forever.  Add a random
1855	 * interval to the recovery time to prevent this race.
1856	 */
1857	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
1858	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
1859
1860	if (atom_link_is_err(ntb))
1861		goto retry;
1862
1863	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1864	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
1865		goto out;
1866
1867	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1868	width = NTB_LNK_STA_WIDTH(status32);
1869	speed = status32 & NTB_LINK_SPEED_MASK;
1870
1871	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1872	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1873	if (oldwidth != width || oldspeed != speed)
1874		goto retry;
1875
1876out:
1877	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
1878	    ntb);
1879	return;
1880
1881retry:
1882	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
1883	    ntb);
1884}
1885
1886/*
1887 * Polls the HW link status register(s); returns true if something has changed.
1888 */
1889static bool
1890ntb_poll_link(struct ntb_softc *ntb)
1891{
1892	uint32_t ntb_cntl;
1893	uint16_t reg_val;
1894
1895	if (ntb->type == NTB_ATOM) {
1896		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1897		if (ntb_cntl == ntb->ntb_ctl)
1898			return (false);
1899
1900		ntb->ntb_ctl = ntb_cntl;
1901		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1902	} else {
1903		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1904
1905		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1906		if (reg_val == ntb->lnk_sta)
1907			return (false);
1908
1909		ntb->lnk_sta = reg_val;
1910	}
1911	return (true);
1912}
1913
1914static inline enum ntb_speed
1915ntb_link_sta_speed(struct ntb_softc *ntb)
1916{
1917
1918	if (!link_is_up(ntb))
1919		return (NTB_SPEED_NONE);
1920	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
1921}
1922
1923static inline enum ntb_width
1924ntb_link_sta_width(struct ntb_softc *ntb)
1925{
1926
1927	if (!link_is_up(ntb))
1928		return (NTB_WIDTH_NONE);
1929	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
1930}
1931
1932SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
1933    "Driver state, statistics, and HW registers");
1934
1935#define NTB_REGSZ_MASK	(3ul << 30)
1936#define NTB_REG_64	(1ul << 30)
1937#define NTB_REG_32	(2ul << 30)
1938#define NTB_REG_16	(3ul << 30)
1939#define NTB_REG_8	(0ul << 30)
1940
1941#define NTB_DB_READ	(1ul << 29)
1942#define NTB_PCI_REG	(1ul << 28)
1943#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
1944
1945static void
1946ntb_sysctl_init(struct ntb_softc *ntb)
1947{
1948	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
1949	struct sysctl_ctx_list *ctx;
1950	struct sysctl_oid *tree, *tmptree;
1951
1952	ctx = device_get_sysctl_ctx(ntb->device);
1953
1954	tree = SYSCTL_ADD_NODE(ctx,
1955	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
1956	    "debug_info", CTLFLAG_RD, NULL,
1957	    "Driver state, statistics, and HW registers");
1958	tree_par = SYSCTL_CHILDREN(tree);
1959
1960	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
1961	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
1962	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
1963	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
1964
1965	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
1966		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
1967		    &ntb->b2b_mw_idx, 0,
1968		    "Index of the MW used for B2B remote register access");
1969		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
1970		    CTLFLAG_RD, &ntb->b2b_off,
1971		    "If non-zero, offset of B2B register region in shared MW");
1972	}
1973
1974	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
1975	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
1976	    "Features/errata of this NTB device");
1977
1978	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
1979	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
1980	    "NTB CTL register (cached)");
1981	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
1982	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
1983	    "LNK STA register (cached)");
1984
1985	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
1986	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
1987	    "A", "Link status");
1988
1989	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
1990	    &ntb->mw_count, 0, "MW count (excl. non-shared B2B register BAR)");
1991	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
1992	    &ntb->spad_count, 0, "Scratchpad count");
1993	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
1994	    &ntb->db_count, 0, "Doorbell count");
1995	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
1996	    &ntb->db_vec_count, 0, "Doorbell vector count");
1997	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
1998	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
1999
2000	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2001	    &ntb->db_valid_mask, "Doorbell valid mask");
2002	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2003	    &ntb->db_link_mask, "Doorbell link mask");
2004	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2005	    &ntb->db_mask, "Doorbell mask (cached)");
2006
2007	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2008	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2009	regpar = SYSCTL_CHILDREN(tmptree);
2010
2011	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2012	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2013	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2014	    "NTB Control register");
2015	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2016	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2017	    0x19c, sysctl_handle_register, "IU",
2018	    "NTB Link Capabilities");
2019	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2020	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2021	    0x1a0, sysctl_handle_register, "IU",
2022	    "NTB Link Control register");
2023
2024	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2025	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2026	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2027	    sysctl_handle_register, "QU", "Doorbell mask register");
2028	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2029	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2030	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2031	    sysctl_handle_register, "QU", "Doorbell register");
2032
2033	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2034	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2035	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2036	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2037	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2038		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2039		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2040		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2041		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2042		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2043		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2044		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2045		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2046	} else {
2047		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2048		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2049		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2050		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2051	}
2052
2053	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2054	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2055	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2056	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2057	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2058		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2059		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2060		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2061		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2062		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2063		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2064		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2065		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2066	} else {
2067		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2068		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2069		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2070		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2071	}
2072
2073	if (ntb->type == NTB_ATOM)
2074		return;
2075
2076	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2077	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2078	statpar = SYSCTL_CHILDREN(tmptree);
2079	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2080	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2081	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2082	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2083
2084	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2085	    CTLFLAG_RD, NULL, "Xeon HW errors");
2086	errpar = SYSCTL_CHILDREN(tmptree);
2087
2088	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "devsts",
2089	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2090	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2091	    sysctl_handle_register, "SU", "DEVSTS");
2092	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "lnksts",
2093	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2094	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2095	    sysctl_handle_register, "SU", "LNKSTS");
2096	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2097	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2098	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2099	    sysctl_handle_register, "IU", "UNCERRSTS");
2100	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2101	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2102	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2103	    sysctl_handle_register, "IU", "CORERRSTS");
2104
2105	if (ntb->conn_type != NTB_CONN_B2B)
2106		return;
2107
2108	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2109	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2110	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2111	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2112	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2113		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2114		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2115		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2116		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2117		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2118		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2119		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2120		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2121	} else {
2122		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2123		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2124		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2125		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2126	}
2127
2128	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2129	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2130	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2131	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2132	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2133		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2134		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2135		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2136		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2137		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2138		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2139		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2140		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2141	} else {
2142		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2143		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2144		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2145		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2146	}
2147
2148	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2149	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2150	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2151	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2152	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2153	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2154	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2155	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2156	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2157		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2158		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2159		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2160		    sysctl_handle_register, "IU",
2161		    "Secondary BAR4 base register");
2162		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2163		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2164		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2165		    sysctl_handle_register, "IU",
2166		    "Secondary BAR5 base register");
2167	} else {
2168		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2169		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2170		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2171		    sysctl_handle_register, "QU",
2172		    "Secondary BAR45 base register");
2173	}
2174}
2175
2176static int
2177sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2178{
2179	struct ntb_softc *ntb;
2180	struct sbuf sb;
2181	int error;
2182
2183	error = 0;
2184	ntb = arg1;
2185
2186	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2187
2188	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2189	error = sbuf_finish(&sb);
2190	sbuf_delete(&sb);
2191
2192	if (error || !req->newptr)
2193		return (error);
2194	return (EINVAL);
2195}
2196
2197static int
2198sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2199{
2200	struct ntb_softc *ntb;
2201	struct sbuf sb;
2202	enum ntb_speed speed;
2203	enum ntb_width width;
2204	int error;
2205
2206	error = 0;
2207	ntb = arg1;
2208
2209	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2210
2211	if (ntb_link_is_up(ntb, &speed, &width))
2212		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2213		    (unsigned)speed, (unsigned)width);
2214	else
2215		sbuf_printf(&sb, "down");
2216
2217	error = sbuf_finish(&sb);
2218	sbuf_delete(&sb);
2219
2220	if (error || !req->newptr)
2221		return (error);
2222	return (EINVAL);
2223}
2224
2225static int
2226sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2227{
2228	struct ntb_softc *ntb;
2229	const void *outp;
2230	uintptr_t sz;
2231	uint64_t umv;
2232	char be[sizeof(umv)];
2233	size_t outsz;
2234	uint32_t reg;
2235	bool db, pci;
2236	int error;
2237
2238	ntb = arg1;
2239	reg = arg2 & ~NTB_REGFLAGS_MASK;
2240	sz = arg2 & NTB_REGSZ_MASK;
2241	db = (arg2 & NTB_DB_READ) != 0;
2242	pci = (arg2 & NTB_PCI_REG) != 0;
2243
2244	KASSERT(!(db && pci), ("bogus"));
2245
2246	if (db) {
2247		KASSERT(sz == NTB_REG_64, ("bogus"));
2248		umv = db_ioread(ntb, reg);
2249		outsz = sizeof(uint64_t);
2250	} else {
2251		switch (sz) {
2252		case NTB_REG_64:
2253			if (pci)
2254				umv = pci_read_config(ntb->device, reg, 8);
2255			else
2256				umv = ntb_reg_read(8, reg);
2257			outsz = sizeof(uint64_t);
2258			break;
2259		case NTB_REG_32:
2260			if (pci)
2261				umv = pci_read_config(ntb->device, reg, 4);
2262			else
2263				umv = ntb_reg_read(4, reg);
2264			outsz = sizeof(uint32_t);
2265			break;
2266		case NTB_REG_16:
2267			if (pci)
2268				umv = pci_read_config(ntb->device, reg, 2);
2269			else
2270				umv = ntb_reg_read(2, reg);
2271			outsz = sizeof(uint16_t);
2272			break;
2273		case NTB_REG_8:
2274			if (pci)
2275				umv = pci_read_config(ntb->device, reg, 1);
2276			else
2277				umv = ntb_reg_read(1, reg);
2278			outsz = sizeof(uint8_t);
2279			break;
2280		default:
2281			panic("bogus");
2282			break;
2283		}
2284	}
2285
2286	/* Encode bigendian so that sysctl -x is legible. */
2287	be64enc(be, umv);
2288	outp = ((char *)be) + sizeof(umv) - outsz;
2289
2290	error = SYSCTL_OUT(req, outp, outsz);
2291	if (error || !req->newptr)
2292		return (error);
2293	return (EINVAL);
2294}
2295
2296/*
2297 * Public API to the rest of the OS
2298 */
2299
2300/**
2301 * ntb_get_max_spads() - get the total scratch regs usable
2302 * @ntb: pointer to ntb_softc instance
2303 *
2304 * This function returns the max 32bit scratchpad registers usable by the
2305 * upper layer.
2306 *
2307 * RETURNS: total number of scratch pad registers available
2308 */
2309uint8_t
2310ntb_get_max_spads(struct ntb_softc *ntb)
2311{
2312
2313	return (ntb->spad_count);
2314}
2315
2316uint8_t
2317ntb_mw_count(struct ntb_softc *ntb)
2318{
2319
2320	return (ntb->mw_count);
2321}
2322
2323/**
2324 * ntb_spad_write() - write to the secondary scratchpad register
2325 * @ntb: pointer to ntb_softc instance
2326 * @idx: index to the scratchpad register, 0 based
2327 * @val: the data value to put into the register
2328 *
2329 * This function allows writing of a 32bit value to the indexed scratchpad
2330 * register. The register resides on the secondary (external) side.
2331 *
2332 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2333 */
2334int
2335ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2336{
2337
2338	if (idx >= ntb->spad_count)
2339		return (EINVAL);
2340
2341	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2342
2343	return (0);
2344}
2345
2346/**
2347 * ntb_spad_read() - read from the primary scratchpad register
2348 * @ntb: pointer to ntb_softc instance
2349 * @idx: index to scratchpad register, 0 based
2350 * @val: pointer to 32bit integer for storing the register value
2351 *
2352 * This function allows reading of the 32bit scratchpad register on
2353 * the primary (internal) side.
2354 *
2355 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2356 */
2357int
2358ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2359{
2360
2361	if (idx >= ntb->spad_count)
2362		return (EINVAL);
2363
2364	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2365
2366	return (0);
2367}
2368
2369/**
2370 * ntb_peer_spad_write() - write to the secondary scratchpad register
2371 * @ntb: pointer to ntb_softc instance
2372 * @idx: index to the scratchpad register, 0 based
2373 * @val: the data value to put into the register
2374 *
2375 * This function allows writing of a 32bit value to the indexed scratchpad
2376 * register. The register resides on the secondary (external) side.
2377 *
2378 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2379 */
2380int
2381ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2382{
2383
2384	if (idx >= ntb->spad_count)
2385		return (EINVAL);
2386
2387	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2388		ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2389	else
2390		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2391
2392	return (0);
2393}
2394
2395/**
2396 * ntb_peer_spad_read() - read from the primary scratchpad register
2397 * @ntb: pointer to ntb_softc instance
2398 * @idx: index to scratchpad register, 0 based
2399 * @val: pointer to 32bit integer for storing the register value
2400 *
2401 * This function allows reading of the 32bit scratchpad register on
2402 * the primary (internal) side.
2403 *
2404 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2405 */
2406int
2407ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2408{
2409
2410	if (idx >= ntb->spad_count)
2411		return (EINVAL);
2412
2413	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2414		*val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2415	else
2416		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2417
2418	return (0);
2419}
2420
2421/*
2422 * ntb_mw_get_range() - get the range of a memory window
2423 * @ntb:        NTB device context
2424 * @idx:        Memory window number
2425 * @base:       OUT - the base address for mapping the memory window
2426 * @size:       OUT - the size for mapping the memory window
2427 * @align:      OUT - the base alignment for translating the memory window
2428 * @align_size: OUT - the size alignment for translating the memory window
2429 *
2430 * Get the range of a memory window.  NULL may be given for any output
2431 * parameter if the value is not needed.  The base and size may be used for
2432 * mapping the memory window, to access the peer memory.  The alignment and
2433 * size may be used for translating the memory window, for the peer to access
2434 * memory on the local system.
2435 *
2436 * Return: Zero on success, otherwise an error number.
2437 */
2438int
2439ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2440    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size)
2441{
2442	struct ntb_pci_bar_info *bar;
2443	size_t bar_b2b_off;
2444
2445	if (mw_idx >= ntb_mw_count(ntb))
2446		return (EINVAL);
2447
2448	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, mw_idx)];
2449	bar_b2b_off = 0;
2450	if (mw_idx == ntb->b2b_mw_idx) {
2451		KASSERT(ntb->b2b_off != 0,
2452		    ("user shouldn't get non-shared b2b mw"));
2453		bar_b2b_off = ntb->b2b_off;
2454	}
2455
2456	if (base != NULL)
2457		*base = bar->pbase + bar_b2b_off;
2458	if (vbase != NULL)
2459		*vbase = bar->vbase + bar_b2b_off;
2460	if (size != NULL)
2461		*size = bar->size - bar_b2b_off;
2462	if (align != NULL)
2463		*align = bar->size;
2464	if (align_size != NULL)
2465		*align_size = 1;
2466	return (0);
2467}
2468
2469/*
2470 * ntb_mw_set_trans() - set the translation of a memory window
2471 * @ntb:        NTB device context
2472 * @idx:        Memory window number
2473 * @addr:       The dma address local memory to expose to the peer
2474 * @size:       The size of the local memory to expose to the peer
2475 *
2476 * Set the translation of a memory window.  The peer may access local memory
2477 * through the window starting at the address, up to the size.  The address
2478 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2479 * must be aligned to the size alignment specified by ntb_mw_get_range().
2480 *
2481 * Return: Zero on success, otherwise an error number.
2482 */
2483int
2484ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2485    size_t size)
2486{
2487	struct ntb_pci_bar_info *bar;
2488	uint64_t base, limit, reg_val;
2489	size_t bar_size, mw_size;
2490	uint32_t base_reg, xlat_reg, limit_reg;
2491	enum ntb_bar bar_num;
2492
2493	if (idx >= ntb_mw_count(ntb))
2494		return (EINVAL);
2495
2496	bar_num = ntb_mw_to_bar(ntb, idx);
2497	bar = &ntb->bar_info[bar_num];
2498
2499	bar_size = bar->size;
2500	if (idx == ntb->b2b_mw_idx)
2501		mw_size = bar_size - ntb->b2b_off;
2502	else
2503		mw_size = bar_size;
2504
2505	/* Hardware requires that addr is aligned to bar size */
2506	if ((addr & (bar_size - 1)) != 0)
2507		return (EINVAL);
2508
2509	if (size > mw_size)
2510		return (EINVAL);
2511
2512	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2513
2514	limit = 0;
2515	if (bar_is_64bit(ntb, bar_num)) {
2516		base = ntb_reg_read(8, base_reg);
2517
2518		if (limit_reg != 0 && size != mw_size)
2519			limit = base + size;
2520
2521		/* Set and verify translation address */
2522		ntb_reg_write(8, xlat_reg, addr);
2523		reg_val = ntb_reg_read(8, xlat_reg);
2524		if (reg_val != addr) {
2525			ntb_reg_write(8, xlat_reg, 0);
2526			return (EIO);
2527		}
2528
2529		/* Set and verify the limit */
2530		ntb_reg_write(8, limit_reg, limit);
2531		reg_val = ntb_reg_read(8, limit_reg);
2532		if (reg_val != limit) {
2533			ntb_reg_write(8, limit_reg, base);
2534			ntb_reg_write(8, xlat_reg, 0);
2535			return (EIO);
2536		}
2537	} else {
2538		/* Configure 32-bit (split) BAR MW */
2539
2540		if ((addr & ~UINT32_MAX) != 0)
2541			return (EINVAL);
2542		if (((addr + size) & ~UINT32_MAX) != 0)
2543			return (EINVAL);
2544
2545		base = ntb_reg_read(4, base_reg);
2546
2547		if (limit_reg != 0 && size != mw_size)
2548			limit = base + size;
2549
2550		/* Set and verify translation address */
2551		ntb_reg_write(4, xlat_reg, addr);
2552		reg_val = ntb_reg_read(4, xlat_reg);
2553		if (reg_val != addr) {
2554			ntb_reg_write(4, xlat_reg, 0);
2555			return (EIO);
2556		}
2557
2558		/* Set and verify the limit */
2559		ntb_reg_write(4, limit_reg, limit);
2560		reg_val = ntb_reg_read(4, limit_reg);
2561		if (reg_val != limit) {
2562			ntb_reg_write(4, limit_reg, base);
2563			ntb_reg_write(4, xlat_reg, 0);
2564			return (EIO);
2565		}
2566	}
2567	return (0);
2568}
2569
2570/*
2571 * ntb_mw_clear_trans() - clear the translation of a memory window
2572 * @ntb:	NTB device context
2573 * @idx:	Memory window number
2574 *
2575 * Clear the translation of a memory window.  The peer may no longer access
2576 * local memory through the window.
2577 *
2578 * Return: Zero on success, otherwise an error number.
2579 */
2580int
2581ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2582{
2583
2584	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2585}
2586
2587/**
2588 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2589 * @ntb: pointer to ntb_softc instance
2590 * @bit: doorbell bits to ring
2591 *
2592 * This function allows triggering of a doorbell on the secondary/external
2593 * side that will initiate an interrupt on the remote host
2594 */
2595void
2596ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2597{
2598
2599	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2600		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
2601		return;
2602	}
2603
2604	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2605}
2606
2607/*
2608 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2609 * as well as the size of the register (via *sz_out).
2610 *
2611 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2612 * ring to its memory window write.
2613 *
2614 * Note that writing the peer doorbell via a memory window will *not* generate
2615 * an interrupt on the remote host; that must be done seperately.
2616 */
2617bus_addr_t
2618ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2619{
2620	struct ntb_pci_bar_info *bar;
2621	uint64_t regoff;
2622
2623	KASSERT(sz_out != NULL, ("must be non-NULL"));
2624
2625	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2626		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2627		regoff = ntb->peer_reg->db_bell;
2628	} else {
2629		KASSERT((HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 2) ||
2630		    (!HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 1),
2631		    ("mw_count invalid after setup"));
2632		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2633		    ("invalid b2b idx"));
2634
2635		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2636		regoff = XEON_PDOORBELL_OFFSET;
2637	}
2638	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2639
2640	*sz_out = ntb->reg->db_size;
2641	/* HACK: Specific to current x86 bus implementation. */
2642	return ((uint64_t)bar->pci_bus_handle + regoff);
2643}
2644
2645/*
2646 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2647 * @ntb:	NTB device context
2648 *
2649 * Hardware may support different number or arrangement of doorbell bits.
2650 *
2651 * Return: A mask of doorbell bits supported by the ntb.
2652 */
2653uint64_t
2654ntb_db_valid_mask(struct ntb_softc *ntb)
2655{
2656
2657	return (ntb->db_valid_mask);
2658}
2659
2660/*
2661 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2662 * @ntb:	NTB device context
2663 * @vector:	Doorbell vector number
2664 *
2665 * Each interrupt vector may have a different number or arrangement of bits.
2666 *
2667 * Return: A mask of doorbell bits serviced by a vector.
2668 */
2669uint64_t
2670ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2671{
2672
2673	if (vector > ntb->db_vec_count)
2674		return (0);
2675	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2676}
2677
2678/**
2679 * ntb_link_is_up() - get the current ntb link state
2680 * @ntb:        NTB device context
2681 * @speed:      OUT - The link speed expressed as PCIe generation number
2682 * @width:      OUT - The link width expressed as the number of PCIe lanes
2683 *
2684 * RETURNS: true or false based on the hardware link state
2685 */
2686bool
2687ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2688    enum ntb_width *width)
2689{
2690
2691	if (speed != NULL)
2692		*speed = ntb_link_sta_speed(ntb);
2693	if (width != NULL)
2694		*width = ntb_link_sta_width(ntb);
2695	return (link_is_up(ntb));
2696}
2697
2698static void
2699save_bar_parameters(struct ntb_pci_bar_info *bar)
2700{
2701
2702	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2703	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2704	bar->pbase = rman_get_start(bar->pci_resource);
2705	bar->size = rman_get_size(bar->pci_resource);
2706	bar->vbase = rman_get_virtual(bar->pci_resource);
2707}
2708
2709device_t
2710ntb_get_device(struct ntb_softc *ntb)
2711{
2712
2713	return (ntb->device);
2714}
2715
2716/* Export HW-specific errata information. */
2717bool
2718ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
2719{
2720
2721	return (HAS_FEATURE(feature));
2722}
2723