ntb_hw_intel.c revision 301292
1230429Skib/*-
2230429Skib * Copyright (C) 2013 Intel Corporation
3230429Skib * Copyright (C) 2015 EMC Corporation
4230429Skib * All rights reserved.
5230429Skib *
6230429Skib * Redistribution and use in source and binary forms, with or without
7230429Skib * modification, are permitted provided that the following conditions
8230429Skib * are met:
9230429Skib * 1. Redistributions of source code must retain the above copyright
10230429Skib *    notice, this list of conditions and the following disclaimer.
11230429Skib * 2. Redistributions in binary form must reproduce the above copyright
12230429Skib *    notice, this list of conditions and the following disclaimer in the
13230429Skib *    documentation and/or other materials provided with the distribution.
14230429Skib *
15230429Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16230429Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17230429Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18230429Skib * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19230429Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20230429Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21230429Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22230429Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23230429Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24230429Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25230429Skib * SUCH DAMAGE.
26230429Skib */
27230429Skib
28230429Skib#include <sys/cdefs.h>
29230429Skib__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 301292 2016-06-04 00:08:37Z mav $");
30230429Skib
31230429Skib#include <sys/param.h>
32230429Skib#include <sys/kernel.h>
33230429Skib#include <sys/systm.h>
34230429Skib#include <sys/bus.h>
35230429Skib#include <sys/endian.h>
36230429Skib#include <sys/malloc.h>
37230429Skib#include <sys/module.h>
38230429Skib#include <sys/mutex.h>
39230429Skib#include <sys/pciio.h>
40230429Skib#include <sys/queue.h>
41230429Skib#include <sys/rman.h>
42230864Skib#include <sys/sbuf.h>
43230429Skib#include <sys/sysctl.h>
44230429Skib#include <vm/vm.h>
45230429Skib#include <vm/pmap.h>
46230429Skib#include <machine/bus.h>
47230429Skib#include <machine/intr_machdep.h>
48230429Skib#include <machine/resource.h>
49230429Skib#include <dev/pci/pcireg.h>
50230429Skib#include <dev/pci/pcivar.h>
51230429Skib
52230429Skib#include "ntb_regs.h"
53230429Skib#include "ntb_hw.h"
54230429Skib
55230429Skib/*
56230429Skib * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
57230429Skib * allows you to connect two systems using a PCI-e link.
58230429Skib *
59230429Skib * This module contains the hardware abstraction layer for the NTB. It allows
60251047Skib * you to send and receive interrupts, map the memory windows and send and
61230429Skib * receive messages in the scratch-pad registers.
62230429Skib *
63230429Skib * NOTE: Much of the code in this module is shared with Linux. Any patches may
64230429Skib * be picked up and redistributed in Linux with a dual GPL/BSD license.
65230429Skib */
66230429Skib
67230429Skib#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
68230429Skib
69230429Skib#define NTB_HB_TIMEOUT		1 /* second */
70230429Skib#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
71230429Skib#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
72230429Skib
73230429Skib#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
74230429Skib
75230429Skib#define	NTB_MSIX_VER_GUARD	0xaabbccdd
76230429Skib#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
77230429Skib#define	ONE_MB			(1024u * 1024)
78230429Skib
79230429Skib/*
80230429Skib * PCI constants could be somewhere more generic, but aren't defined/used in
81251047Skib * pci.c.
82251047Skib */
83251047Skib#define	PCI_MSIX_ENTRY_SIZE		16
84251047Skib#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
85251047Skib#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
86251047Skib#define	PCI_MSIX_ENTRY_DATA		8
87251047Skib
88251047Skibenum ntb_device_type {
89251047Skib	NTB_XEON,
90251047Skib	NTB_ATOM
91251047Skib};
92251047Skib
93230429Skib/* ntb_conn_type are hardware numbers, cannot change. */
94230429Skibenum ntb_conn_type {
95230429Skib	NTB_CONN_TRANSPARENT = 0,
96230429Skib	NTB_CONN_B2B = 1,
97230429Skib	NTB_CONN_RP = 2,
98230429Skib};
99230429Skib
100230429Skibenum ntb_b2b_direction {
101230429Skib	NTB_DEV_USD = 0,
102230429Skib	NTB_DEV_DSD = 1,
103230429Skib};
104230429Skib
105230429Skibenum ntb_bar {
106230429Skib	NTB_CONFIG_BAR = 0,
107230429Skib	NTB_B2B_BAR_1,
108230429Skib	NTB_B2B_BAR_2,
109230429Skib	NTB_B2B_BAR_3,
110230429Skib	NTB_MAX_BARS
111230429Skib};
112
113enum {
114	NTB_MSIX_GUARD = 0,
115	NTB_MSIX_DATA0,
116	NTB_MSIX_DATA1,
117	NTB_MSIX_DATA2,
118	NTB_MSIX_OFS0,
119	NTB_MSIX_OFS1,
120	NTB_MSIX_OFS2,
121	NTB_MSIX_DONE,
122	NTB_MAX_MSIX_SPAD
123};
124
125/* Device features and workarounds */
126#define HAS_FEATURE(feature)	\
127	((ntb->features & (feature)) != 0)
128
129struct ntb_hw_info {
130	uint32_t		device_id;
131	const char		*desc;
132	enum ntb_device_type	type;
133	uint32_t		features;
134};
135
136struct ntb_pci_bar_info {
137	bus_space_tag_t		pci_bus_tag;
138	bus_space_handle_t	pci_bus_handle;
139	int			pci_resource_id;
140	struct resource		*pci_resource;
141	vm_paddr_t		pbase;
142	caddr_t			vbase;
143	vm_size_t		size;
144	vm_memattr_t		map_mode;
145
146	/* Configuration register offsets */
147	uint32_t		psz_off;
148	uint32_t		ssz_off;
149	uint32_t		pbarxlat_off;
150};
151
152struct ntb_int_info {
153	struct resource	*res;
154	int		rid;
155	void		*tag;
156};
157
158struct ntb_vec {
159	struct ntb_softc	*ntb;
160	uint32_t		num;
161	unsigned		masked;
162};
163
164struct ntb_reg {
165	uint32_t	ntb_ctl;
166	uint32_t	lnk_sta;
167	uint8_t		db_size;
168	unsigned	mw_bar[NTB_MAX_BARS];
169};
170
171struct ntb_alt_reg {
172	uint32_t	db_bell;
173	uint32_t	db_mask;
174	uint32_t	spad;
175};
176
177struct ntb_xlat_reg {
178	uint32_t	bar0_base;
179	uint32_t	bar2_base;
180	uint32_t	bar4_base;
181	uint32_t	bar5_base;
182
183	uint32_t	bar2_xlat;
184	uint32_t	bar4_xlat;
185	uint32_t	bar5_xlat;
186
187	uint32_t	bar2_limit;
188	uint32_t	bar4_limit;
189	uint32_t	bar5_limit;
190};
191
192struct ntb_b2b_addr {
193	uint64_t	bar0_addr;
194	uint64_t	bar2_addr64;
195	uint64_t	bar4_addr64;
196	uint64_t	bar4_addr32;
197	uint64_t	bar5_addr32;
198};
199
200struct ntb_msix_data {
201	uint32_t	nmd_ofs;
202	uint32_t	nmd_data;
203};
204
205struct ntb_softc {
206	device_t		device;
207	enum ntb_device_type	type;
208	uint32_t		features;
209
210	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
211	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
212	uint32_t		allocated_interrupts;
213
214	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
215	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
216	bool			peer_msix_good;
217	bool			peer_msix_done;
218	struct ntb_pci_bar_info	*peer_lapic_bar;
219	struct callout		peer_msix_work;
220
221	struct callout		heartbeat_timer;
222	struct callout		lr_timer;
223
224	void			*ntb_ctx;
225	const struct ntb_ctx_ops *ctx_ops;
226	struct ntb_vec		*msix_vec;
227#define CTX_LOCK(sc)		mtx_lock(&(sc)->ctx_lock)
228#define CTX_UNLOCK(sc)		mtx_unlock(&(sc)->ctx_lock)
229#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
230	struct mtx		ctx_lock;
231
232	uint32_t		ppd;
233	enum ntb_conn_type	conn_type;
234	enum ntb_b2b_direction	dev_type;
235
236	/* Offset of peer bar0 in B2B BAR */
237	uint64_t			b2b_off;
238	/* Memory window used to access peer bar0 */
239#define B2B_MW_DISABLED			UINT8_MAX
240	uint8_t				b2b_mw_idx;
241	uint8_t				msix_mw_idx;
242
243	uint8_t				mw_count;
244	uint8_t				spad_count;
245	uint8_t				db_count;
246	uint8_t				db_vec_count;
247	uint8_t				db_vec_shift;
248
249	/* Protects local db_mask. */
250#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
251#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
252#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
253	struct mtx			db_mask_lock;
254
255	volatile uint32_t		ntb_ctl;
256	volatile uint32_t		lnk_sta;
257
258	uint64_t			db_valid_mask;
259	uint64_t			db_link_mask;
260	uint64_t			db_mask;
261
262	int				last_ts;	/* ticks @ last irq */
263
264	const struct ntb_reg		*reg;
265	const struct ntb_alt_reg	*self_reg;
266	const struct ntb_alt_reg	*peer_reg;
267	const struct ntb_xlat_reg	*xlat_reg;
268};
269
270#ifdef __i386__
271static __inline uint64_t
272bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
273    bus_size_t offset)
274{
275
276	return (bus_space_read_4(tag, handle, offset) |
277	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
278}
279
280static __inline void
281bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
282    bus_size_t offset, uint64_t val)
283{
284
285	bus_space_write_4(tag, handle, offset, val);
286	bus_space_write_4(tag, handle, offset + 4, val >> 32);
287}
288#endif
289
290#define ntb_bar_read(SIZE, bar, offset) \
291	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
292	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
293#define ntb_bar_write(SIZE, bar, offset, val) \
294	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
295	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
296#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
297#define ntb_reg_write(SIZE, offset, val) \
298	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
299#define ntb_mw_read(SIZE, offset) \
300	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
301#define ntb_mw_write(SIZE, offset, val) \
302	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
303		offset, val)
304
305static int ntb_probe(device_t device);
306static int ntb_attach(device_t device);
307static int ntb_detach(device_t device);
308static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
309static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
310static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
311static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
312    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
313static int ntb_map_pci_bars(struct ntb_softc *ntb);
314static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
315    vm_memattr_t);
316static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
317    const char *);
318static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
319static int map_memory_window_bar(struct ntb_softc *ntb,
320    struct ntb_pci_bar_info *bar);
321static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
322static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
323static int ntb_init_isr(struct ntb_softc *ntb);
324static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
325static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
326static void ntb_teardown_interrupts(struct ntb_softc *ntb);
327static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
328static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
329static void ndev_vec_isr(void *arg);
330static void ndev_irq_isr(void *arg);
331static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
332static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
333static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
334static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
335static void ntb_free_msix_vec(struct ntb_softc *ntb);
336static void ntb_get_msix_info(struct ntb_softc *ntb);
337static void ntb_exchange_msix(void *);
338static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
339static void ntb_detect_max_mw(struct ntb_softc *ntb);
340static int ntb_detect_xeon(struct ntb_softc *ntb);
341static int ntb_detect_atom(struct ntb_softc *ntb);
342static int ntb_xeon_init_dev(struct ntb_softc *ntb);
343static int ntb_atom_init_dev(struct ntb_softc *ntb);
344static void ntb_teardown_xeon(struct ntb_softc *ntb);
345static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
346static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
347    enum ntb_bar regbar);
348static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
349    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
350static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
351    enum ntb_bar idx);
352static int xeon_setup_b2b_mw(struct ntb_softc *,
353    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
354static int xeon_setup_msix_bar(struct ntb_softc *);
355static inline bool link_is_up(struct ntb_softc *ntb);
356static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
357static inline bool atom_link_is_err(struct ntb_softc *ntb);
358static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
359static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
360static void atom_link_hb(void *arg);
361static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
362static void recover_atom_link(void *arg);
363static bool ntb_poll_link(struct ntb_softc *ntb);
364static void save_bar_parameters(struct ntb_pci_bar_info *bar);
365static void ntb_sysctl_init(struct ntb_softc *);
366static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
367static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
368static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
369static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
370static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
371
372static unsigned g_ntb_hw_debug_level;
373SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
374    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
375#define ntb_printf(lvl, ...) do {				\
376	if ((lvl) <= g_ntb_hw_debug_level) {			\
377		device_printf(ntb->device, __VA_ARGS__);	\
378	}							\
379} while (0)
380
381#define	_NTB_PAT_UC	0
382#define	_NTB_PAT_WC	1
383#define	_NTB_PAT_WT	4
384#define	_NTB_PAT_WP	5
385#define	_NTB_PAT_WB	6
386#define	_NTB_PAT_UCM	7
387static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
388SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
389    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
390    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
391    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
392    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
393    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
394    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
395    "UC-: " __XSTRING(_NTB_PAT_UCM));
396
397static inline vm_memattr_t
398ntb_pat_flags(void)
399{
400
401	switch (g_ntb_mw_pat) {
402	case _NTB_PAT_WC:
403		return (VM_MEMATTR_WRITE_COMBINING);
404	case _NTB_PAT_WT:
405		return (VM_MEMATTR_WRITE_THROUGH);
406	case _NTB_PAT_WP:
407		return (VM_MEMATTR_WRITE_PROTECTED);
408	case _NTB_PAT_WB:
409		return (VM_MEMATTR_WRITE_BACK);
410	case _NTB_PAT_UCM:
411		return (VM_MEMATTR_WEAK_UNCACHEABLE);
412	case _NTB_PAT_UC:
413		/* FALLTHROUGH */
414	default:
415		return (VM_MEMATTR_UNCACHEABLE);
416	}
417}
418
419/*
420 * Well, this obviously doesn't belong here, but it doesn't seem to exist
421 * anywhere better yet.
422 */
423static inline const char *
424ntb_vm_memattr_to_str(vm_memattr_t pat)
425{
426
427	switch (pat) {
428	case VM_MEMATTR_WRITE_COMBINING:
429		return ("WRITE_COMBINING");
430	case VM_MEMATTR_WRITE_THROUGH:
431		return ("WRITE_THROUGH");
432	case VM_MEMATTR_WRITE_PROTECTED:
433		return ("WRITE_PROTECTED");
434	case VM_MEMATTR_WRITE_BACK:
435		return ("WRITE_BACK");
436	case VM_MEMATTR_WEAK_UNCACHEABLE:
437		return ("UNCACHED");
438	case VM_MEMATTR_UNCACHEABLE:
439		return ("UNCACHEABLE");
440	default:
441		return ("UNKNOWN");
442	}
443}
444
445static int g_ntb_msix_idx = 0;
446SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
447    0, "Use this memory window to access the peer MSIX message complex on "
448    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
449    "Like b2b_mw_idx, negative values index from the last available memory "
450    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
451
452static int g_ntb_mw_idx = -1;
453SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
454    0, "Use this memory window to access the peer NTB registers.  A "
455    "non-negative value starts from the first MW index; a negative value "
456    "starts from the last MW index.  The default is -1, i.e., the last "
457    "available memory window.  Both sides of the NTB MUST set the same "
458    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
459
460static struct ntb_hw_info pci_ids[] = {
461	/* XXX: PS/SS IDs left out until they are supported. */
462	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
463		NTB_ATOM, 0 },
464
465	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
466		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
467	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
468		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
469	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
470		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
471		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
472	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
473		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
474		    NTB_SB01BASE_LOCKUP },
475	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
476		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
477		    NTB_SB01BASE_LOCKUP },
478
479	{ 0x00000000, NULL, NTB_ATOM, 0 }
480};
481
482static const struct ntb_reg atom_reg = {
483	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
484	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
485	.db_size = sizeof(uint64_t),
486	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
487};
488
489static const struct ntb_alt_reg atom_pri_reg = {
490	.db_bell = ATOM_PDOORBELL_OFFSET,
491	.db_mask = ATOM_PDBMSK_OFFSET,
492	.spad = ATOM_SPAD_OFFSET,
493};
494
495static const struct ntb_alt_reg atom_b2b_reg = {
496	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
497	.spad = ATOM_B2B_SPAD_OFFSET,
498};
499
500static const struct ntb_xlat_reg atom_sec_xlat = {
501#if 0
502	/* "FIXME" says the Linux driver. */
503	.bar0_base = ATOM_SBAR0BASE_OFFSET,
504	.bar2_base = ATOM_SBAR2BASE_OFFSET,
505	.bar4_base = ATOM_SBAR4BASE_OFFSET,
506
507	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
508	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
509#endif
510
511	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
512	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
513};
514
515static const struct ntb_reg xeon_reg = {
516	.ntb_ctl = XEON_NTBCNTL_OFFSET,
517	.lnk_sta = XEON_LINK_STATUS_OFFSET,
518	.db_size = sizeof(uint16_t),
519	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
520};
521
522static const struct ntb_alt_reg xeon_pri_reg = {
523	.db_bell = XEON_PDOORBELL_OFFSET,
524	.db_mask = XEON_PDBMSK_OFFSET,
525	.spad = XEON_SPAD_OFFSET,
526};
527
528static const struct ntb_alt_reg xeon_b2b_reg = {
529	.db_bell = XEON_B2B_DOORBELL_OFFSET,
530	.spad = XEON_B2B_SPAD_OFFSET,
531};
532
533static const struct ntb_xlat_reg xeon_sec_xlat = {
534	.bar0_base = XEON_SBAR0BASE_OFFSET,
535	.bar2_base = XEON_SBAR2BASE_OFFSET,
536	.bar4_base = XEON_SBAR4BASE_OFFSET,
537	.bar5_base = XEON_SBAR5BASE_OFFSET,
538
539	.bar2_limit = XEON_SBAR2LMT_OFFSET,
540	.bar4_limit = XEON_SBAR4LMT_OFFSET,
541	.bar5_limit = XEON_SBAR5LMT_OFFSET,
542
543	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
544	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
545	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
546};
547
548static struct ntb_b2b_addr xeon_b2b_usd_addr = {
549	.bar0_addr = XEON_B2B_BAR0_ADDR,
550	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
551	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
552	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
553	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
554};
555
556static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
557	.bar0_addr = XEON_B2B_BAR0_ADDR,
558	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
559	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
560	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
561	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
562};
563
564SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
565    "B2B MW segment overrides -- MUST be the same on both sides");
566
567SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
568    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
569    "hardware, use this 64-bit address on the bus between the NTB devices for "
570    "the window at BAR2, on the upstream side of the link.  MUST be the same "
571    "address on both sides.");
572SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
573    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
574SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
575    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
576    "(split-BAR mode).");
577SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
578    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
579    "(split-BAR mode).");
580
581SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
582    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
583    "hardware, use this 64-bit address on the bus between the NTB devices for "
584    "the window at BAR2, on the downstream side of the link.  MUST be the same"
585    " address on both sides.");
586SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
587    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
588SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
589    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
590    "(split-BAR mode).");
591SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
592    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
593    "(split-BAR mode).");
594
595/*
596 * OS <-> Driver interface structures
597 */
598MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
599
600static device_method_t ntb_pci_methods[] = {
601	/* Device interface */
602	DEVMETHOD(device_probe,     ntb_probe),
603	DEVMETHOD(device_attach,    ntb_attach),
604	DEVMETHOD(device_detach,    ntb_detach),
605	DEVMETHOD_END
606};
607
608static driver_t ntb_pci_driver = {
609	"ntb_hw",
610	ntb_pci_methods,
611	sizeof(struct ntb_softc),
612};
613
614static devclass_t ntb_devclass;
615DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
616MODULE_VERSION(ntb_hw, 1);
617
618SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
619
620/*
621 * OS <-> Driver linkage functions
622 */
623static int
624ntb_probe(device_t device)
625{
626	struct ntb_hw_info *p;
627
628	p = ntb_get_device_info(pci_get_devid(device));
629	if (p == NULL)
630		return (ENXIO);
631
632	device_set_desc(device, p->desc);
633	return (0);
634}
635
636static int
637ntb_attach(device_t device)
638{
639	struct ntb_softc *ntb;
640	struct ntb_hw_info *p;
641	int error;
642
643	ntb = DEVICE2SOFTC(device);
644	p = ntb_get_device_info(pci_get_devid(device));
645
646	ntb->device = device;
647	ntb->type = p->type;
648	ntb->features = p->features;
649	ntb->b2b_mw_idx = B2B_MW_DISABLED;
650	ntb->msix_mw_idx = B2B_MW_DISABLED;
651
652	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
653	callout_init(&ntb->heartbeat_timer, 1);
654	callout_init(&ntb->lr_timer, 1);
655	callout_init(&ntb->peer_msix_work, 1);
656	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
657	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
658
659	if (ntb->type == NTB_ATOM)
660		error = ntb_detect_atom(ntb);
661	else
662		error = ntb_detect_xeon(ntb);
663	if (error != 0)
664		goto out;
665
666	ntb_detect_max_mw(ntb);
667
668	pci_enable_busmaster(ntb->device);
669
670	error = ntb_map_pci_bars(ntb);
671	if (error != 0)
672		goto out;
673	if (ntb->type == NTB_ATOM)
674		error = ntb_atom_init_dev(ntb);
675	else
676		error = ntb_xeon_init_dev(ntb);
677	if (error != 0)
678		goto out;
679
680	ntb_spad_clear(ntb);
681
682	ntb_poll_link(ntb);
683
684	ntb_sysctl_init(ntb);
685
686out:
687	if (error != 0)
688		ntb_detach(device);
689	return (error);
690}
691
692static int
693ntb_detach(device_t device)
694{
695	struct ntb_softc *ntb;
696
697	ntb = DEVICE2SOFTC(device);
698
699	if (ntb->self_reg != NULL) {
700		DB_MASK_LOCK(ntb);
701		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
702		DB_MASK_UNLOCK(ntb);
703	}
704	callout_drain(&ntb->heartbeat_timer);
705	callout_drain(&ntb->lr_timer);
706	callout_drain(&ntb->peer_msix_work);
707	pci_disable_busmaster(ntb->device);
708	if (ntb->type == NTB_XEON)
709		ntb_teardown_xeon(ntb);
710	ntb_teardown_interrupts(ntb);
711
712	mtx_destroy(&ntb->db_mask_lock);
713	mtx_destroy(&ntb->ctx_lock);
714
715	ntb_unmap_pci_bar(ntb);
716
717	return (0);
718}
719
720/*
721 * Driver internal routines
722 */
723static inline enum ntb_bar
724ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
725{
726
727	KASSERT(mw < ntb->mw_count,
728	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
729	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
730
731	return (ntb->reg->mw_bar[mw]);
732}
733
734static inline bool
735bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
736{
737	/* XXX This assertion could be stronger. */
738	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
739	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
740}
741
742static inline void
743bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
744    uint32_t *xlat, uint32_t *lmt)
745{
746	uint32_t basev, lmtv, xlatv;
747
748	switch (bar) {
749	case NTB_B2B_BAR_1:
750		basev = ntb->xlat_reg->bar2_base;
751		lmtv = ntb->xlat_reg->bar2_limit;
752		xlatv = ntb->xlat_reg->bar2_xlat;
753		break;
754	case NTB_B2B_BAR_2:
755		basev = ntb->xlat_reg->bar4_base;
756		lmtv = ntb->xlat_reg->bar4_limit;
757		xlatv = ntb->xlat_reg->bar4_xlat;
758		break;
759	case NTB_B2B_BAR_3:
760		basev = ntb->xlat_reg->bar5_base;
761		lmtv = ntb->xlat_reg->bar5_limit;
762		xlatv = ntb->xlat_reg->bar5_xlat;
763		break;
764	default:
765		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
766		    ("bad bar"));
767		basev = lmtv = xlatv = 0;
768		break;
769	}
770
771	if (base != NULL)
772		*base = basev;
773	if (xlat != NULL)
774		*xlat = xlatv;
775	if (lmt != NULL)
776		*lmt = lmtv;
777}
778
779static int
780ntb_map_pci_bars(struct ntb_softc *ntb)
781{
782	int rc;
783
784	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
785	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
786	if (rc != 0)
787		goto out;
788
789	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
790	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
791	if (rc != 0)
792		goto out;
793	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
794	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
795	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
796
797	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
798	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
799	if (rc != 0)
800		goto out;
801	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
802	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
803	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
804
805	if (!HAS_FEATURE(NTB_SPLIT_BAR))
806		goto out;
807
808	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
809	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
810	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
811	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
812	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
813
814out:
815	if (rc != 0)
816		device_printf(ntb->device,
817		    "unable to allocate pci resource\n");
818	return (rc);
819}
820
821static void
822print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
823    const char *kind)
824{
825
826	device_printf(ntb->device,
827	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
828	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
829	    (char *)bar->vbase + bar->size - 1,
830	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
831	    (uintmax_t)bar->size, kind);
832}
833
834static int
835map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
836{
837
838	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
839	    &bar->pci_resource_id, RF_ACTIVE);
840	if (bar->pci_resource == NULL)
841		return (ENXIO);
842
843	save_bar_parameters(bar);
844	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
845	print_map_success(ntb, bar, "mmr");
846	return (0);
847}
848
849static int
850map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
851{
852	int rc;
853	vm_memattr_t mapmode;
854	uint8_t bar_size_bits = 0;
855
856	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
857	    &bar->pci_resource_id, RF_ACTIVE);
858
859	if (bar->pci_resource == NULL)
860		return (ENXIO);
861
862	save_bar_parameters(bar);
863	/*
864	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
865	 * hardware issue. To work around this, query the size it should be
866	 * configured to by the device and modify the resource to correspond to
867	 * this new size. The BIOS on systems with this problem is required to
868	 * provide enough address space to allow the driver to make this change
869	 * safely.
870	 *
871	 * Ideally I could have just specified the size when I allocated the
872	 * resource like:
873	 *  bus_alloc_resource(ntb->device,
874	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
875	 *	1ul << bar_size_bits, RF_ACTIVE);
876	 * but the PCI driver does not honor the size in this call, so we have
877	 * to modify it after the fact.
878	 */
879	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
880		if (bar->pci_resource_id == PCIR_BAR(2))
881			bar_size_bits = pci_read_config(ntb->device,
882			    XEON_PBAR23SZ_OFFSET, 1);
883		else
884			bar_size_bits = pci_read_config(ntb->device,
885			    XEON_PBAR45SZ_OFFSET, 1);
886
887		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
888		    bar->pci_resource, bar->pbase,
889		    bar->pbase + (1ul << bar_size_bits) - 1);
890		if (rc != 0) {
891			device_printf(ntb->device,
892			    "unable to resize bar\n");
893			return (rc);
894		}
895
896		save_bar_parameters(bar);
897	}
898
899	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
900	print_map_success(ntb, bar, "mw");
901
902	/*
903	 * Optionally, mark MW BARs as anything other than UC to improve
904	 * performance.
905	 */
906	mapmode = ntb_pat_flags();
907	if (mapmode == bar->map_mode)
908		return (0);
909
910	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
911	if (rc == 0) {
912		bar->map_mode = mapmode;
913		device_printf(ntb->device,
914		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
915		    "%s.\n",
916		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
917		    (char *)bar->vbase + bar->size - 1,
918		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
919		    ntb_vm_memattr_to_str(mapmode));
920	} else
921		device_printf(ntb->device,
922		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
923		    "%s: %d\n",
924		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
925		    (char *)bar->vbase + bar->size - 1,
926		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
927		    ntb_vm_memattr_to_str(mapmode), rc);
928		/* Proceed anyway */
929	return (0);
930}
931
932static void
933ntb_unmap_pci_bar(struct ntb_softc *ntb)
934{
935	struct ntb_pci_bar_info *current_bar;
936	int i;
937
938	for (i = 0; i < NTB_MAX_BARS; i++) {
939		current_bar = &ntb->bar_info[i];
940		if (current_bar->pci_resource != NULL)
941			bus_release_resource(ntb->device, SYS_RES_MEMORY,
942			    current_bar->pci_resource_id,
943			    current_bar->pci_resource);
944	}
945}
946
947static int
948ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
949{
950	uint32_t i;
951	int rc;
952
953	for (i = 0; i < num_vectors; i++) {
954		ntb->int_info[i].rid = i + 1;
955		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
956		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
957		if (ntb->int_info[i].res == NULL) {
958			device_printf(ntb->device,
959			    "bus_alloc_resource failed\n");
960			return (ENOMEM);
961		}
962		ntb->int_info[i].tag = NULL;
963		ntb->allocated_interrupts++;
964		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
965		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
966		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
967		if (rc != 0) {
968			device_printf(ntb->device, "bus_setup_intr failed\n");
969			return (ENXIO);
970		}
971	}
972	return (0);
973}
974
975/*
976 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
977 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
978 * should be okay.  This tunable should enable us to test that hypothesis
979 * when someone gets their hands on some Xeon hardware.
980 */
981static int ntb_force_remap_mode;
982SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
983    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
984    " to a smaller number of ithreads, even if the desired number are "
985    "available");
986
987/*
988 * In case it is NOT ok, give consumers an abort button.
989 */
990static int ntb_prefer_intx;
991SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
992    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
993    "than remapping MSI-X messages over available slots (match Linux driver "
994    "behavior)");
995
996/*
997 * Remap the desired number of MSI-X messages to available ithreads in a simple
998 * round-robin fashion.
999 */
1000static int
1001ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1002{
1003	u_int *vectors;
1004	uint32_t i;
1005	int rc;
1006
1007	if (ntb_prefer_intx != 0)
1008		return (ENXIO);
1009
1010	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1011
1012	for (i = 0; i < desired; i++)
1013		vectors[i] = (i % avail) + 1;
1014
1015	rc = pci_remap_msix(dev, desired, vectors);
1016	free(vectors, M_NTB);
1017	return (rc);
1018}
1019
1020static int
1021ntb_init_isr(struct ntb_softc *ntb)
1022{
1023	uint32_t desired_vectors, num_vectors;
1024	int rc;
1025
1026	ntb->allocated_interrupts = 0;
1027	ntb->last_ts = ticks;
1028
1029	/*
1030	 * Mask all doorbell interrupts.  (Except link events!)
1031	 */
1032	DB_MASK_LOCK(ntb);
1033	ntb->db_mask = ntb->db_valid_mask;
1034	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1035	DB_MASK_UNLOCK(ntb);
1036
1037	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1038	    ntb->db_count);
1039	if (desired_vectors >= 1) {
1040		rc = pci_alloc_msix(ntb->device, &num_vectors);
1041
1042		if (ntb_force_remap_mode != 0 && rc == 0 &&
1043		    num_vectors == desired_vectors)
1044			num_vectors--;
1045
1046		if (rc == 0 && num_vectors < desired_vectors) {
1047			rc = ntb_remap_msix(ntb->device, desired_vectors,
1048			    num_vectors);
1049			if (rc == 0)
1050				num_vectors = desired_vectors;
1051			else
1052				pci_release_msi(ntb->device);
1053		}
1054		if (rc != 0)
1055			num_vectors = 1;
1056	} else
1057		num_vectors = 1;
1058
1059	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1060		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1061			device_printf(ntb->device,
1062			    "Errata workaround does not support MSI or INTX\n");
1063			return (EINVAL);
1064		}
1065
1066		ntb->db_vec_count = 1;
1067		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1068		rc = ntb_setup_legacy_interrupt(ntb);
1069	} else {
1070		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1071		    HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1072			device_printf(ntb->device,
1073			    "Errata workaround expects %d doorbell bits\n",
1074			    XEON_NONLINK_DB_MSIX_BITS);
1075			return (EINVAL);
1076		}
1077
1078		ntb_create_msix_vec(ntb, num_vectors);
1079		rc = ntb_setup_msix(ntb, num_vectors);
1080		if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1081			ntb_get_msix_info(ntb);
1082	}
1083	if (rc != 0) {
1084		device_printf(ntb->device,
1085		    "Error allocating interrupts: %d\n", rc);
1086		ntb_free_msix_vec(ntb);
1087	}
1088
1089	return (rc);
1090}
1091
1092static int
1093ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1094{
1095	int rc;
1096
1097	ntb->int_info[0].rid = 0;
1098	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1099	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1100	if (ntb->int_info[0].res == NULL) {
1101		device_printf(ntb->device, "bus_alloc_resource failed\n");
1102		return (ENOMEM);
1103	}
1104
1105	ntb->int_info[0].tag = NULL;
1106	ntb->allocated_interrupts = 1;
1107
1108	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1109	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1110	    ntb, &ntb->int_info[0].tag);
1111	if (rc != 0) {
1112		device_printf(ntb->device, "bus_setup_intr failed\n");
1113		return (ENXIO);
1114	}
1115
1116	return (0);
1117}
1118
1119static void
1120ntb_teardown_interrupts(struct ntb_softc *ntb)
1121{
1122	struct ntb_int_info *current_int;
1123	int i;
1124
1125	for (i = 0; i < ntb->allocated_interrupts; i++) {
1126		current_int = &ntb->int_info[i];
1127		if (current_int->tag != NULL)
1128			bus_teardown_intr(ntb->device, current_int->res,
1129			    current_int->tag);
1130
1131		if (current_int->res != NULL)
1132			bus_release_resource(ntb->device, SYS_RES_IRQ,
1133			    rman_get_rid(current_int->res), current_int->res);
1134	}
1135
1136	ntb_free_msix_vec(ntb);
1137	pci_release_msi(ntb->device);
1138}
1139
1140/*
1141 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1142 * out to make code clearer.
1143 */
1144static inline uint64_t
1145db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1146{
1147
1148	if (ntb->type == NTB_ATOM)
1149		return (ntb_reg_read(8, regoff));
1150
1151	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1152
1153	return (ntb_reg_read(2, regoff));
1154}
1155
1156static inline void
1157db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1158{
1159
1160	KASSERT((val & ~ntb->db_valid_mask) == 0,
1161	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1162	     (uintmax_t)(val & ~ntb->db_valid_mask),
1163	     (uintmax_t)ntb->db_valid_mask));
1164
1165	if (regoff == ntb->self_reg->db_mask)
1166		DB_MASK_ASSERT(ntb, MA_OWNED);
1167	db_iowrite_raw(ntb, regoff, val);
1168}
1169
1170static inline void
1171db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1172{
1173
1174	if (ntb->type == NTB_ATOM) {
1175		ntb_reg_write(8, regoff, val);
1176		return;
1177	}
1178
1179	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1180	ntb_reg_write(2, regoff, (uint16_t)val);
1181}
1182
1183void
1184ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1185{
1186
1187	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1188		return;
1189
1190	DB_MASK_LOCK(ntb);
1191	ntb->db_mask |= bits;
1192	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1193	DB_MASK_UNLOCK(ntb);
1194}
1195
1196void
1197ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1198{
1199
1200	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1201	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1202	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1203	     (uintmax_t)ntb->db_valid_mask));
1204
1205	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1206		return;
1207
1208	DB_MASK_LOCK(ntb);
1209	ntb->db_mask &= ~bits;
1210	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1211	DB_MASK_UNLOCK(ntb);
1212}
1213
1214uint64_t
1215ntb_db_read(struct ntb_softc *ntb)
1216{
1217
1218	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1219		uint64_t res;
1220		unsigned i;
1221
1222		res = 0;
1223		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1224			if (ntb->msix_vec[i].masked != 0)
1225				res |= ntb_db_vector_mask(ntb, i);
1226		}
1227		return (res);
1228	}
1229
1230	return (db_ioread(ntb, ntb->self_reg->db_bell));
1231}
1232
1233void
1234ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1235{
1236
1237	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1238	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1239	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1240	     (uintmax_t)ntb->db_valid_mask));
1241
1242	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1243		unsigned i;
1244
1245		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1246			if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
1247				DB_MASK_LOCK(ntb);
1248				if (ntb->msix_vec[i].masked != 0) {
1249					/* XXX These need a public API. */
1250#if 0
1251					pci_unmask_msix(ntb->device, i);
1252#endif
1253					ntb->msix_vec[i].masked = 0;
1254				}
1255				DB_MASK_UNLOCK(ntb);
1256			}
1257		}
1258		return;
1259	}
1260
1261	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1262}
1263
1264static inline uint64_t
1265ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1266{
1267	uint64_t shift, mask;
1268
1269	shift = ntb->db_vec_shift;
1270	mask = (1ull << shift) - 1;
1271	return (mask << (shift * db_vector));
1272}
1273
1274static void
1275ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1276{
1277	uint64_t vec_mask;
1278
1279	ntb->last_ts = ticks;
1280	vec_mask = ntb_vec_mask(ntb, vec);
1281
1282	if ((vec_mask & ntb->db_link_mask) != 0) {
1283		if (ntb_poll_link(ntb))
1284			ntb_link_event(ntb);
1285	}
1286
1287	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
1288	    (vec_mask & ntb->db_link_mask) == 0) {
1289		DB_MASK_LOCK(ntb);
1290		if (ntb->msix_vec[vec].masked == 0) {
1291			/* XXX These need a public API. */
1292#if 0
1293			pci_mask_msix(ntb->device, vec);
1294#endif
1295			ntb->msix_vec[vec].masked = 1;
1296		}
1297		DB_MASK_UNLOCK(ntb);
1298	}
1299
1300	if ((vec_mask & ntb->db_valid_mask) != 0)
1301		ntb_db_event(ntb, vec);
1302}
1303
1304static void
1305ndev_vec_isr(void *arg)
1306{
1307	struct ntb_vec *nvec = arg;
1308
1309	ntb_interrupt(nvec->ntb, nvec->num);
1310}
1311
1312static void
1313ndev_irq_isr(void *arg)
1314{
1315	/* If we couldn't set up MSI-X, we only have the one vector. */
1316	ntb_interrupt(arg, 0);
1317}
1318
1319static int
1320ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1321{
1322	uint32_t i;
1323
1324	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1325	    M_ZERO | M_WAITOK);
1326	for (i = 0; i < num_vectors; i++) {
1327		ntb->msix_vec[i].num = i;
1328		ntb->msix_vec[i].ntb = ntb;
1329	}
1330
1331	return (0);
1332}
1333
1334static void
1335ntb_free_msix_vec(struct ntb_softc *ntb)
1336{
1337
1338	if (ntb->msix_vec == NULL)
1339		return;
1340
1341	free(ntb->msix_vec, M_NTB);
1342	ntb->msix_vec = NULL;
1343}
1344
1345static void
1346ntb_get_msix_info(struct ntb_softc *ntb)
1347{
1348	struct pci_devinfo *dinfo;
1349	struct pcicfg_msix *msix;
1350	uint32_t laddr, data, i, offset;
1351
1352	dinfo = device_get_ivars(ntb->device);
1353	msix = &dinfo->cfg.msix;
1354
1355	laddr = data = 0;
1356
1357	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1358
1359	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1360		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1361
1362		laddr = bus_read_4(msix->msix_table_res, offset +
1363		    PCI_MSIX_ENTRY_LOWER_ADDR);
1364		ntb_printf(2, "local lower MSIX addr(%u): 0x%x\n", i, laddr);
1365
1366		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1367		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1368		     MSI_INTEL_ADDR_BASE));
1369		ntb->msix_data[i].nmd_ofs = laddr & ~MSI_INTEL_ADDR_BASE;
1370
1371		data = bus_read_4(msix->msix_table_res, offset +
1372		    PCI_MSIX_ENTRY_DATA);
1373		ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1374
1375		ntb->msix_data[i].nmd_data = data;
1376	}
1377}
1378
1379static struct ntb_hw_info *
1380ntb_get_device_info(uint32_t device_id)
1381{
1382	struct ntb_hw_info *ep = pci_ids;
1383
1384	while (ep->device_id) {
1385		if (ep->device_id == device_id)
1386			return (ep);
1387		++ep;
1388	}
1389	return (NULL);
1390}
1391
1392static void
1393ntb_teardown_xeon(struct ntb_softc *ntb)
1394{
1395
1396	if (ntb->reg != NULL)
1397		ntb_link_disable(ntb);
1398}
1399
1400static void
1401ntb_detect_max_mw(struct ntb_softc *ntb)
1402{
1403
1404	if (ntb->type == NTB_ATOM) {
1405		ntb->mw_count = ATOM_MW_COUNT;
1406		return;
1407	}
1408
1409	if (HAS_FEATURE(NTB_SPLIT_BAR))
1410		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1411	else
1412		ntb->mw_count = XEON_SNB_MW_COUNT;
1413}
1414
1415static int
1416ntb_detect_xeon(struct ntb_softc *ntb)
1417{
1418	uint8_t ppd, conn_type;
1419
1420	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1421	ntb->ppd = ppd;
1422
1423	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1424		ntb->dev_type = NTB_DEV_DSD;
1425	else
1426		ntb->dev_type = NTB_DEV_USD;
1427
1428	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1429		ntb->features |= NTB_SPLIT_BAR;
1430
1431	/*
1432	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1433	 * errata workaround; only do one at a time.
1434	 */
1435	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1436		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1437
1438	conn_type = ppd & XEON_PPD_CONN_TYPE;
1439	switch (conn_type) {
1440	case NTB_CONN_B2B:
1441		ntb->conn_type = conn_type;
1442		break;
1443	case NTB_CONN_RP:
1444	case NTB_CONN_TRANSPARENT:
1445	default:
1446		device_printf(ntb->device, "Unsupported connection type: %u\n",
1447		    (unsigned)conn_type);
1448		return (ENXIO);
1449	}
1450	return (0);
1451}
1452
1453static int
1454ntb_detect_atom(struct ntb_softc *ntb)
1455{
1456	uint32_t ppd, conn_type;
1457
1458	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1459	ntb->ppd = ppd;
1460
1461	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1462		ntb->dev_type = NTB_DEV_DSD;
1463	else
1464		ntb->dev_type = NTB_DEV_USD;
1465
1466	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1467	switch (conn_type) {
1468	case NTB_CONN_B2B:
1469		ntb->conn_type = conn_type;
1470		break;
1471	default:
1472		device_printf(ntb->device, "Unsupported NTB configuration\n");
1473		return (ENXIO);
1474	}
1475	return (0);
1476}
1477
1478static int
1479ntb_xeon_init_dev(struct ntb_softc *ntb)
1480{
1481	int rc;
1482
1483	ntb->spad_count		= XEON_SPAD_COUNT;
1484	ntb->db_count		= XEON_DB_COUNT;
1485	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1486	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1487	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1488
1489	if (ntb->conn_type != NTB_CONN_B2B) {
1490		device_printf(ntb->device, "Connection type %d not supported\n",
1491		    ntb->conn_type);
1492		return (ENXIO);
1493	}
1494
1495	ntb->reg = &xeon_reg;
1496	ntb->self_reg = &xeon_pri_reg;
1497	ntb->peer_reg = &xeon_b2b_reg;
1498	ntb->xlat_reg = &xeon_sec_xlat;
1499
1500	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1501		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1502		    ntb->mw_count;
1503		ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1504		    g_ntb_msix_idx, ntb->msix_mw_idx);
1505		rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1506		    VM_MEMATTR_UNCACHEABLE);
1507		KASSERT(rc == 0, ("shouldn't fail"));
1508	} else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
1509		/*
1510		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1511		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1512		 * which may hang the system.  To workaround this, use a memory
1513		 * window to access the interrupt and scratch pad registers on the
1514		 * remote system.
1515		 */
1516		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1517		    ntb->mw_count;
1518		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1519		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1520		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1521		    VM_MEMATTR_UNCACHEABLE);
1522		KASSERT(rc == 0, ("shouldn't fail"));
1523	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1524		/*
1525		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1526		 * mirrored to the remote system.  Shrink the number of bits by one,
1527		 * since bit 14 is the last bit.
1528		 *
1529		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1530		 * anyway.  Nor for non-B2B connection types.
1531		 */
1532		ntb->db_count = XEON_DB_COUNT - 1;
1533
1534	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1535
1536	if (ntb->dev_type == NTB_DEV_USD)
1537		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1538		    &xeon_b2b_usd_addr);
1539	else
1540		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1541		    &xeon_b2b_dsd_addr);
1542	if (rc != 0)
1543		return (rc);
1544
1545	/* Enable Bus Master and Memory Space on the secondary side */
1546	ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1547	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1548
1549	/*
1550	 * Mask all doorbell interrupts.
1551	 */
1552	DB_MASK_LOCK(ntb);
1553	ntb->db_mask = ntb->db_valid_mask;
1554	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1555	DB_MASK_UNLOCK(ntb);
1556
1557	rc = xeon_setup_msix_bar(ntb);
1558	if (rc != 0)
1559		return (rc);
1560
1561	rc = ntb_init_isr(ntb);
1562	return (rc);
1563}
1564
1565static int
1566ntb_atom_init_dev(struct ntb_softc *ntb)
1567{
1568	int error;
1569
1570	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1571	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1572
1573	ntb->spad_count		 = ATOM_SPAD_COUNT;
1574	ntb->db_count		 = ATOM_DB_COUNT;
1575	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1576	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1577	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1578
1579	ntb->reg = &atom_reg;
1580	ntb->self_reg = &atom_pri_reg;
1581	ntb->peer_reg = &atom_b2b_reg;
1582	ntb->xlat_reg = &atom_sec_xlat;
1583
1584	/*
1585	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1586	 * resolved.  Mask transaction layer internal parity errors.
1587	 */
1588	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1589
1590	configure_atom_secondary_side_bars(ntb);
1591
1592	/* Enable Bus Master and Memory Space on the secondary side */
1593	ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1594	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1595
1596	error = ntb_init_isr(ntb);
1597	if (error != 0)
1598		return (error);
1599
1600	/* Initiate PCI-E link training */
1601	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1602
1603	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1604
1605	return (0);
1606}
1607
1608/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1609static void
1610configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1611{
1612
1613	if (ntb->dev_type == NTB_DEV_USD) {
1614		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1615		    XEON_B2B_BAR2_ADDR64);
1616		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1617		    XEON_B2B_BAR4_ADDR64);
1618		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1619		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1620	} else {
1621		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1622		    XEON_B2B_BAR2_ADDR64);
1623		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1624		    XEON_B2B_BAR4_ADDR64);
1625		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1626		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1627	}
1628}
1629
1630
1631/*
1632 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1633 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1634 * remains for use by a higher layer.
1635 *
1636 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1637 * MW size is sufficiently large.
1638 */
1639static unsigned int ntb_b2b_mw_share;
1640SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1641    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1642    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1643    "value here.");
1644
1645static void
1646xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1647    enum ntb_bar regbar)
1648{
1649	struct ntb_pci_bar_info *bar;
1650	uint8_t bar_sz;
1651
1652	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1653		return;
1654
1655	bar = &ntb->bar_info[idx];
1656	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1657	if (idx == regbar) {
1658		if (ntb->b2b_off != 0)
1659			bar_sz--;
1660		else
1661			bar_sz = 0;
1662	} else if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
1663	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx) {
1664		/* Restrict LAPIC BAR to 1MB */
1665		pci_write_config(ntb->device, bar->psz_off, 20, 1);
1666		pci_write_config(ntb->device, bar->ssz_off, 20, 1);
1667		bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1668		bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1669		(void)bar_sz;
1670		return;
1671	}
1672	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1673	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1674	(void)bar_sz;
1675}
1676
1677static void
1678xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1679    enum ntb_bar idx, enum ntb_bar regbar)
1680{
1681	uint64_t reg_val, lmt_addr;
1682	uint32_t base_reg, lmt_reg;
1683
1684	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1685	if (idx == regbar)
1686		bar_addr += ntb->b2b_off;
1687	lmt_addr = bar_addr;
1688
1689	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
1690	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx)
1691		lmt_addr += ONE_MB;
1692
1693	/*
1694	 * Set limit registers first to avoid an errata where setting the base
1695	 * registers locks the limit registers.
1696	 */
1697	if (!bar_is_64bit(ntb, idx)) {
1698		ntb_reg_write(4, lmt_reg, lmt_addr);
1699		reg_val = ntb_reg_read(4, lmt_reg);
1700		(void)reg_val;
1701
1702		ntb_reg_write(4, base_reg, bar_addr);
1703		reg_val = ntb_reg_read(4, base_reg);
1704		(void)reg_val;
1705	} else {
1706		ntb_reg_write(8, lmt_reg, lmt_addr);
1707		reg_val = ntb_reg_read(8, lmt_reg);
1708		(void)reg_val;
1709
1710		ntb_reg_write(8, base_reg, bar_addr);
1711		reg_val = ntb_reg_read(8, base_reg);
1712		(void)reg_val;
1713	}
1714}
1715
1716static void
1717xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1718{
1719	struct ntb_pci_bar_info *bar;
1720
1721	bar = &ntb->bar_info[idx];
1722	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1723		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1724		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1725	} else {
1726		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1727		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1728	}
1729	(void)base_addr;
1730}
1731
1732static int
1733xeon_setup_msix_bar(struct ntb_softc *ntb)
1734{
1735	struct ntb_pci_bar_info *lapic_bar;
1736	enum ntb_bar bar_num;
1737	int rc;
1738
1739	if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1740		return (0);
1741
1742	bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1743	lapic_bar = &ntb->bar_info[bar_num];
1744
1745	/* Restrict LAPIC BAR to 1MB */
1746	if (lapic_bar->size > ONE_MB) {
1747		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
1748		    lapic_bar->pci_resource, lapic_bar->pbase,
1749		    lapic_bar->pbase + ONE_MB - 1);
1750		if (rc == 0)
1751			lapic_bar->size = ONE_MB;
1752		else {
1753			ntb_printf(0, "Failed to shrink LAPIC BAR resource to "
1754			    "1 MB: %d\n", rc);
1755			/* Ignore error */
1756		}
1757	}
1758
1759	ntb->peer_lapic_bar = lapic_bar;
1760	return (0);
1761}
1762
1763static int
1764xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1765    const struct ntb_b2b_addr *peer_addr)
1766{
1767	struct ntb_pci_bar_info *b2b_bar;
1768	vm_size_t bar_size;
1769	uint64_t bar_addr;
1770	enum ntb_bar b2b_bar_num, i;
1771
1772	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1773		b2b_bar = NULL;
1774		b2b_bar_num = NTB_CONFIG_BAR;
1775		ntb->b2b_off = 0;
1776	} else {
1777		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1778		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1779		    ("invalid b2b mw bar"));
1780
1781		b2b_bar = &ntb->bar_info[b2b_bar_num];
1782		bar_size = b2b_bar->size;
1783
1784		if (ntb_b2b_mw_share != 0 &&
1785		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1786			ntb->b2b_off = bar_size >> 1;
1787		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1788			ntb->b2b_off = 0;
1789		} else {
1790			device_printf(ntb->device,
1791			    "B2B bar size is too small!\n");
1792			return (EIO);
1793		}
1794	}
1795
1796	/*
1797	 * Reset the secondary bar sizes to match the primary bar sizes.
1798	 * (Except, disable or halve the size of the B2B secondary bar.)
1799	 */
1800	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1801		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1802
1803	bar_addr = 0;
1804	if (b2b_bar_num == NTB_CONFIG_BAR)
1805		bar_addr = addr->bar0_addr;
1806	else if (b2b_bar_num == NTB_B2B_BAR_1)
1807		bar_addr = addr->bar2_addr64;
1808	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1809		bar_addr = addr->bar4_addr64;
1810	else if (b2b_bar_num == NTB_B2B_BAR_2)
1811		bar_addr = addr->bar4_addr32;
1812	else if (b2b_bar_num == NTB_B2B_BAR_3)
1813		bar_addr = addr->bar5_addr32;
1814	else
1815		KASSERT(false, ("invalid bar"));
1816
1817	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1818
1819	/*
1820	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1821	 * register BAR.  The B2B BAR is either disabled above or configured
1822	 * half-size.  It starts at PBAR xlat + offset.
1823	 *
1824	 * Also set up incoming BAR limits == base (zero length window).
1825	 */
1826	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1827	    b2b_bar_num);
1828	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1829		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1830		    NTB_B2B_BAR_2, b2b_bar_num);
1831		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1832		    NTB_B2B_BAR_3, b2b_bar_num);
1833	} else
1834		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1835		    NTB_B2B_BAR_2, b2b_bar_num);
1836
1837	/* Zero incoming translation addrs */
1838	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1839	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1840
1841	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1842		size_t size, xlatoffset;
1843
1844		switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
1845		case NTB_B2B_BAR_1:
1846			size = 8;
1847			xlatoffset = XEON_SBAR2XLAT_OFFSET;
1848			break;
1849		case NTB_B2B_BAR_2:
1850			xlatoffset = XEON_SBAR4XLAT_OFFSET;
1851			if (HAS_FEATURE(NTB_SPLIT_BAR))
1852				size = 4;
1853			else
1854				size = 8;
1855			break;
1856		case NTB_B2B_BAR_3:
1857			xlatoffset = XEON_SBAR5XLAT_OFFSET;
1858			size = 4;
1859			break;
1860		default:
1861			KASSERT(false, ("Bogus msix mw idx: %u",
1862			    ntb->msix_mw_idx));
1863			return (EINVAL);
1864		}
1865
1866		/*
1867		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1868		 * workaround
1869		 */
1870		if (size == 4)
1871			ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
1872		else
1873			ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
1874	}
1875	(void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1876	(void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1877
1878	/* Zero outgoing translation limits (whole bar size windows) */
1879	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1880	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1881
1882	/* Set outgoing translation offsets */
1883	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1884	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1885		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1886		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1887	} else
1888		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1889
1890	/* Set the translation offset for B2B registers */
1891	bar_addr = 0;
1892	if (b2b_bar_num == NTB_CONFIG_BAR)
1893		bar_addr = peer_addr->bar0_addr;
1894	else if (b2b_bar_num == NTB_B2B_BAR_1)
1895		bar_addr = peer_addr->bar2_addr64;
1896	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1897		bar_addr = peer_addr->bar4_addr64;
1898	else if (b2b_bar_num == NTB_B2B_BAR_2)
1899		bar_addr = peer_addr->bar4_addr32;
1900	else if (b2b_bar_num == NTB_B2B_BAR_3)
1901		bar_addr = peer_addr->bar5_addr32;
1902	else
1903		KASSERT(false, ("invalid bar"));
1904
1905	/*
1906	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1907	 * at a time.
1908	 */
1909	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1910	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1911	return (0);
1912}
1913
1914static inline bool
1915_xeon_link_is_up(struct ntb_softc *ntb)
1916{
1917
1918	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1919		return (true);
1920	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1921}
1922
1923static inline bool
1924link_is_up(struct ntb_softc *ntb)
1925{
1926
1927	if (ntb->type == NTB_XEON)
1928		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1929		    !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
1930
1931	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1932	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1933}
1934
1935static inline bool
1936atom_link_is_err(struct ntb_softc *ntb)
1937{
1938	uint32_t status;
1939
1940	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1941
1942	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1943	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1944		return (true);
1945
1946	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1947	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1948}
1949
1950/* Atom does not have link status interrupt, poll on that platform */
1951static void
1952atom_link_hb(void *arg)
1953{
1954	struct ntb_softc *ntb = arg;
1955	sbintime_t timo, poll_ts;
1956
1957	timo = NTB_HB_TIMEOUT * hz;
1958	poll_ts = ntb->last_ts + timo;
1959
1960	/*
1961	 * Delay polling the link status if an interrupt was received, unless
1962	 * the cached link status says the link is down.
1963	 */
1964	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1965		timo = poll_ts - ticks;
1966		goto out;
1967	}
1968
1969	if (ntb_poll_link(ntb))
1970		ntb_link_event(ntb);
1971
1972	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1973		/* Link is down with error, proceed with recovery */
1974		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1975		return;
1976	}
1977
1978out:
1979	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1980}
1981
1982static void
1983atom_perform_link_restart(struct ntb_softc *ntb)
1984{
1985	uint32_t status;
1986
1987	/* Driver resets the NTB ModPhy lanes - magic! */
1988	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1989	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1990	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1991	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1992
1993	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1994	pause("ModPhy", hz / 10);
1995
1996	/* Clear AER Errors, write to clear */
1997	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1998	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1999	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
2000
2001	/* Clear unexpected electrical idle event in LTSSM, write to clear */
2002	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
2003	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2004	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2005
2006	/* Clear DeSkew Buffer error, write to clear */
2007	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2008	status |= ATOM_DESKEWSTS_DBERR;
2009	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2010
2011	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2012	status &= ATOM_IBIST_ERR_OFLOW;
2013	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2014
2015	/* Releases the NTB state machine to allow the link to retrain */
2016	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2017	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2018	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2019}
2020
2021/*
2022 * ntb_set_ctx() - associate a driver context with an ntb device
2023 * @ntb:        NTB device context
2024 * @ctx:        Driver context
2025 * @ctx_ops:    Driver context operations
2026 *
2027 * Associate a driver context and operations with a ntb device.  The context is
2028 * provided by the client driver, and the driver may associate a different
2029 * context with each ntb device.
2030 *
2031 * Return: Zero if the context is associated, otherwise an error number.
2032 */
2033int
2034ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
2035{
2036
2037	if (ctx == NULL || ops == NULL)
2038		return (EINVAL);
2039	if (ntb->ctx_ops != NULL)
2040		return (EINVAL);
2041
2042	CTX_LOCK(ntb);
2043	if (ntb->ctx_ops != NULL) {
2044		CTX_UNLOCK(ntb);
2045		return (EINVAL);
2046	}
2047	ntb->ntb_ctx = ctx;
2048	ntb->ctx_ops = ops;
2049	CTX_UNLOCK(ntb);
2050
2051	return (0);
2052}
2053
2054/*
2055 * It is expected that this will only be used from contexts where the ctx_lock
2056 * is not needed to protect ntb_ctx lifetime.
2057 */
2058void *
2059ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
2060{
2061
2062	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
2063	if (ops != NULL)
2064		*ops = ntb->ctx_ops;
2065	return (ntb->ntb_ctx);
2066}
2067
2068/*
2069 * ntb_clear_ctx() - disassociate any driver context from an ntb device
2070 * @ntb:        NTB device context
2071 *
2072 * Clear any association that may exist between a driver context and the ntb
2073 * device.
2074 */
2075void
2076ntb_clear_ctx(struct ntb_softc *ntb)
2077{
2078
2079	CTX_LOCK(ntb);
2080	ntb->ntb_ctx = NULL;
2081	ntb->ctx_ops = NULL;
2082	CTX_UNLOCK(ntb);
2083}
2084
2085/*
2086 * ntb_link_event() - notify driver context of a change in link status
2087 * @ntb:        NTB device context
2088 *
2089 * Notify the driver context that the link status may have changed.  The driver
2090 * should call ntb_link_is_up() to get the current status.
2091 */
2092void
2093ntb_link_event(struct ntb_softc *ntb)
2094{
2095
2096	CTX_LOCK(ntb);
2097	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
2098		ntb->ctx_ops->link_event(ntb->ntb_ctx);
2099	CTX_UNLOCK(ntb);
2100}
2101
2102/*
2103 * ntb_db_event() - notify driver context of a doorbell event
2104 * @ntb:        NTB device context
2105 * @vector:     Interrupt vector number
2106 *
2107 * Notify the driver context of a doorbell event.  If hardware supports
2108 * multiple interrupt vectors for doorbells, the vector number indicates which
2109 * vector received the interrupt.  The vector number is relative to the first
2110 * vector used for doorbells, starting at zero, and must be less than
2111 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
2112 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
2113 * those bits are associated with the vector number.
2114 */
2115static void
2116ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
2117{
2118
2119	CTX_LOCK(ntb);
2120	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
2121		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
2122	CTX_UNLOCK(ntb);
2123}
2124
2125/*
2126 * ntb_link_enable() - enable the link on the secondary side of the ntb
2127 * @ntb:        NTB device context
2128 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
2129 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
2130 *
2131 * Enable the link on the secondary side of the ntb.  This can only be done
2132 * from the primary side of the ntb in primary or b2b topology.  The ntb device
2133 * should train the link to its maximum speed and width, or the requested speed
2134 * and width, whichever is smaller, if supported.
2135 *
2136 * Return: Zero on success, otherwise an error number.
2137 *
2138 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
2139 *      and width input will be ignored.
2140 */
2141int
2142ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
2143    enum ntb_width w __unused)
2144{
2145	uint32_t cntl;
2146
2147	ntb_printf(2, "%s\n", __func__);
2148
2149	if (ntb->type == NTB_ATOM) {
2150		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2151		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2152		return (0);
2153	}
2154
2155	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2156		ntb_link_event(ntb);
2157		return (0);
2158	}
2159
2160	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2161	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2162	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2163	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2164	if (HAS_FEATURE(NTB_SPLIT_BAR))
2165		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2166	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2167	return (0);
2168}
2169
2170/*
2171 * ntb_link_disable() - disable the link on the secondary side of the ntb
2172 * @ntb:        NTB device context
2173 *
2174 * Disable the link on the secondary side of the ntb.  This can only be done
2175 * from the primary side of the ntb in primary or b2b topology.  The ntb device
2176 * should disable the link.  Returning from this call must indicate that a
2177 * barrier has passed, though with no more writes may pass in either direction
2178 * across the link, except if this call returns an error number.
2179 *
2180 * Return: Zero on success, otherwise an error number.
2181 */
2182int
2183ntb_link_disable(struct ntb_softc *ntb)
2184{
2185	uint32_t cntl;
2186
2187	ntb_printf(2, "%s\n", __func__);
2188
2189	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2190		ntb_link_event(ntb);
2191		return (0);
2192	}
2193
2194	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2195	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2196	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2197	if (HAS_FEATURE(NTB_SPLIT_BAR))
2198		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2199	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2200	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2201	return (0);
2202}
2203
2204bool
2205ntb_link_enabled(struct ntb_softc *ntb)
2206{
2207	uint32_t cntl;
2208
2209	if (ntb->type == NTB_ATOM) {
2210		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2211		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2212	}
2213
2214	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2215		return (true);
2216
2217	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2218	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2219}
2220
2221static void
2222recover_atom_link(void *arg)
2223{
2224	struct ntb_softc *ntb = arg;
2225	unsigned speed, width, oldspeed, oldwidth;
2226	uint32_t status32;
2227
2228	atom_perform_link_restart(ntb);
2229
2230	/*
2231	 * There is a potential race between the 2 NTB devices recovering at
2232	 * the same time.  If the times are the same, the link will not recover
2233	 * and the driver will be stuck in this loop forever.  Add a random
2234	 * interval to the recovery time to prevent this race.
2235	 */
2236	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2237	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2238
2239	if (atom_link_is_err(ntb))
2240		goto retry;
2241
2242	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
2243	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2244		goto out;
2245
2246	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
2247	width = NTB_LNK_STA_WIDTH(status32);
2248	speed = status32 & NTB_LINK_SPEED_MASK;
2249
2250	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2251	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2252	if (oldwidth != width || oldspeed != speed)
2253		goto retry;
2254
2255out:
2256	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2257	    ntb);
2258	return;
2259
2260retry:
2261	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2262	    ntb);
2263}
2264
2265/*
2266 * Polls the HW link status register(s); returns true if something has changed.
2267 */
2268static bool
2269ntb_poll_link(struct ntb_softc *ntb)
2270{
2271	uint32_t ntb_cntl;
2272	uint16_t reg_val;
2273
2274	if (ntb->type == NTB_ATOM) {
2275		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2276		if (ntb_cntl == ntb->ntb_ctl)
2277			return (false);
2278
2279		ntb->ntb_ctl = ntb_cntl;
2280		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
2281	} else {
2282		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2283
2284		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2285		if (reg_val == ntb->lnk_sta)
2286			return (false);
2287
2288		ntb->lnk_sta = reg_val;
2289
2290		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
2291			if (_xeon_link_is_up(ntb)) {
2292				if (!ntb->peer_msix_good) {
2293					callout_reset(&ntb->peer_msix_work, 0,
2294					    ntb_exchange_msix, ntb);
2295					return (false);
2296				}
2297			} else {
2298				ntb->peer_msix_good = false;
2299				ntb->peer_msix_done = false;
2300			}
2301		}
2302	}
2303	return (true);
2304}
2305
2306static inline enum ntb_speed
2307ntb_link_sta_speed(struct ntb_softc *ntb)
2308{
2309
2310	if (!link_is_up(ntb))
2311		return (NTB_SPEED_NONE);
2312	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2313}
2314
2315static inline enum ntb_width
2316ntb_link_sta_width(struct ntb_softc *ntb)
2317{
2318
2319	if (!link_is_up(ntb))
2320		return (NTB_WIDTH_NONE);
2321	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2322}
2323
2324SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2325    "Driver state, statistics, and HW registers");
2326
2327#define NTB_REGSZ_MASK	(3ul << 30)
2328#define NTB_REG_64	(1ul << 30)
2329#define NTB_REG_32	(2ul << 30)
2330#define NTB_REG_16	(3ul << 30)
2331#define NTB_REG_8	(0ul << 30)
2332
2333#define NTB_DB_READ	(1ul << 29)
2334#define NTB_PCI_REG	(1ul << 28)
2335#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2336
2337static void
2338ntb_sysctl_init(struct ntb_softc *ntb)
2339{
2340	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2341	struct sysctl_ctx_list *ctx;
2342	struct sysctl_oid *tree, *tmptree;
2343
2344	ctx = device_get_sysctl_ctx(ntb->device);
2345	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2346
2347	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2348	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
2349	    sysctl_handle_link_status_human, "A",
2350	    "Link status (human readable)");
2351	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2352	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
2353	    "IU", "Link status (1=active, 0=inactive)");
2354	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2355	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
2356	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
2357
2358	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2359	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
2360	tree_par = SYSCTL_CHILDREN(tree);
2361
2362	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2363	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2364	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2365	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2366	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2367	    &ntb->ppd, 0, "Raw PPD register (cached)");
2368
2369	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2370		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2371		    &ntb->b2b_mw_idx, 0,
2372		    "Index of the MW used for B2B remote register access");
2373		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2374		    CTLFLAG_RD, &ntb->b2b_off,
2375		    "If non-zero, offset of B2B register region in shared MW");
2376	}
2377
2378	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2379	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2380	    "Features/errata of this NTB device");
2381
2382	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2383	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2384	    "NTB CTL register (cached)");
2385	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2386	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2387	    "LNK STA register (cached)");
2388
2389	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2390	    &ntb->mw_count, 0, "MW count");
2391	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2392	    &ntb->spad_count, 0, "Scratchpad count");
2393	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2394	    &ntb->db_count, 0, "Doorbell count");
2395	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2396	    &ntb->db_vec_count, 0, "Doorbell vector count");
2397	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2398	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2399
2400	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2401	    &ntb->db_valid_mask, "Doorbell valid mask");
2402	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2403	    &ntb->db_link_mask, "Doorbell link mask");
2404	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2405	    &ntb->db_mask, "Doorbell mask (cached)");
2406
2407	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2408	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2409	regpar = SYSCTL_CHILDREN(tmptree);
2410
2411	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2412	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2413	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2414	    "NTB Control register");
2415	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2416	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2417	    0x19c, sysctl_handle_register, "IU",
2418	    "NTB Link Capabilities");
2419	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2420	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2421	    0x1a0, sysctl_handle_register, "IU",
2422	    "NTB Link Control register");
2423
2424	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2425	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2426	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2427	    sysctl_handle_register, "QU", "Doorbell mask register");
2428	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2429	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2430	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2431	    sysctl_handle_register, "QU", "Doorbell register");
2432
2433	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2434	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2435	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2436	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2437	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2438		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2439		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2440		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2441		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2442		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2443		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2444		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2445		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2446	} else {
2447		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2448		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2449		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2450		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2451	}
2452
2453	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2454	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2455	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2456	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2457	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2458		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2459		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2460		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2461		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2462		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2463		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2464		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2465		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2466	} else {
2467		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2468		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2469		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2470		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2471	}
2472
2473	if (ntb->type == NTB_ATOM)
2474		return;
2475
2476	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2477	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2478	statpar = SYSCTL_CHILDREN(tmptree);
2479	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2480	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2481	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2482	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2483
2484	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2485	    CTLFLAG_RD, NULL, "Xeon HW errors");
2486	errpar = SYSCTL_CHILDREN(tmptree);
2487
2488	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2489	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2490	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2491	    sysctl_handle_register, "CU", "PPD");
2492
2493	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2494	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2495	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2496	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2497	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2498	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2499	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2500	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2501	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2502	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2503	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2504	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2505
2506	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2507	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2508	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2509	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2510	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2511	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2512	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2513	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2514	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2515	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2516	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2517	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2518
2519	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2520	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2521	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2522	    sysctl_handle_register, "SU", "DEVSTS");
2523	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2524	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2525	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2526	    sysctl_handle_register, "SU", "LNKSTS");
2527	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2528	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2529	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2530	    sysctl_handle_register, "SU", "SLNKSTS");
2531
2532	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2533	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2534	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2535	    sysctl_handle_register, "IU", "UNCERRSTS");
2536	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2537	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2538	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2539	    sysctl_handle_register, "IU", "CORERRSTS");
2540
2541	if (ntb->conn_type != NTB_CONN_B2B)
2542		return;
2543
2544	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2545	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2546	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2547	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2548	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2549		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2550		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2551		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2552		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2553		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2554		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2555		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2556		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2557	} else {
2558		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2559		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2560		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2561		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2562	}
2563
2564	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2565	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2566	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2567	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2568	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2569		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2570		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2571		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2572		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2573		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2574		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2575		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2576		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2577	} else {
2578		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2579		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2580		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2581		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2582	}
2583
2584	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2585	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2586	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2587	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2588	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2589	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2590	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2591	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2592	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2593		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2594		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2595		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2596		    sysctl_handle_register, "IU",
2597		    "Secondary BAR4 base register");
2598		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2599		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2600		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2601		    sysctl_handle_register, "IU",
2602		    "Secondary BAR5 base register");
2603	} else {
2604		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2605		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2606		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2607		    sysctl_handle_register, "QU",
2608		    "Secondary BAR45 base register");
2609	}
2610}
2611
2612static int
2613sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2614{
2615	struct ntb_softc *ntb;
2616	struct sbuf sb;
2617	int error;
2618
2619	error = 0;
2620	ntb = arg1;
2621
2622	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2623
2624	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2625	error = sbuf_finish(&sb);
2626	sbuf_delete(&sb);
2627
2628	if (error || !req->newptr)
2629		return (error);
2630	return (EINVAL);
2631}
2632
2633static int
2634sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2635{
2636	struct ntb_softc *ntb;
2637	unsigned old, new;
2638	int error;
2639
2640	error = 0;
2641	ntb = arg1;
2642
2643	old = ntb_link_enabled(ntb);
2644
2645	error = SYSCTL_OUT(req, &old, sizeof(old));
2646	if (error != 0 || req->newptr == NULL)
2647		return (error);
2648
2649	error = SYSCTL_IN(req, &new, sizeof(new));
2650	if (error != 0)
2651		return (error);
2652
2653	ntb_printf(0, "Admin set interface state to '%sabled'\n",
2654	    (new != 0)? "en" : "dis");
2655
2656	if (new != 0)
2657		error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2658	else
2659		error = ntb_link_disable(ntb);
2660	return (error);
2661}
2662
2663static int
2664sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2665{
2666	struct ntb_softc *ntb;
2667	struct sbuf sb;
2668	enum ntb_speed speed;
2669	enum ntb_width width;
2670	int error;
2671
2672	error = 0;
2673	ntb = arg1;
2674
2675	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2676
2677	if (ntb_link_is_up(ntb, &speed, &width))
2678		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2679		    (unsigned)speed, (unsigned)width);
2680	else
2681		sbuf_printf(&sb, "down");
2682
2683	error = sbuf_finish(&sb);
2684	sbuf_delete(&sb);
2685
2686	if (error || !req->newptr)
2687		return (error);
2688	return (EINVAL);
2689}
2690
2691static int
2692sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2693{
2694	struct ntb_softc *ntb;
2695	unsigned res;
2696	int error;
2697
2698	error = 0;
2699	ntb = arg1;
2700
2701	res = ntb_link_is_up(ntb, NULL, NULL);
2702
2703	error = SYSCTL_OUT(req, &res, sizeof(res));
2704	if (error || !req->newptr)
2705		return (error);
2706	return (EINVAL);
2707}
2708
2709static int
2710sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2711{
2712	struct ntb_softc *ntb;
2713	const void *outp;
2714	uintptr_t sz;
2715	uint64_t umv;
2716	char be[sizeof(umv)];
2717	size_t outsz;
2718	uint32_t reg;
2719	bool db, pci;
2720	int error;
2721
2722	ntb = arg1;
2723	reg = arg2 & ~NTB_REGFLAGS_MASK;
2724	sz = arg2 & NTB_REGSZ_MASK;
2725	db = (arg2 & NTB_DB_READ) != 0;
2726	pci = (arg2 & NTB_PCI_REG) != 0;
2727
2728	KASSERT(!(db && pci), ("bogus"));
2729
2730	if (db) {
2731		KASSERT(sz == NTB_REG_64, ("bogus"));
2732		umv = db_ioread(ntb, reg);
2733		outsz = sizeof(uint64_t);
2734	} else {
2735		switch (sz) {
2736		case NTB_REG_64:
2737			if (pci)
2738				umv = pci_read_config(ntb->device, reg, 8);
2739			else
2740				umv = ntb_reg_read(8, reg);
2741			outsz = sizeof(uint64_t);
2742			break;
2743		case NTB_REG_32:
2744			if (pci)
2745				umv = pci_read_config(ntb->device, reg, 4);
2746			else
2747				umv = ntb_reg_read(4, reg);
2748			outsz = sizeof(uint32_t);
2749			break;
2750		case NTB_REG_16:
2751			if (pci)
2752				umv = pci_read_config(ntb->device, reg, 2);
2753			else
2754				umv = ntb_reg_read(2, reg);
2755			outsz = sizeof(uint16_t);
2756			break;
2757		case NTB_REG_8:
2758			if (pci)
2759				umv = pci_read_config(ntb->device, reg, 1);
2760			else
2761				umv = ntb_reg_read(1, reg);
2762			outsz = sizeof(uint8_t);
2763			break;
2764		default:
2765			panic("bogus");
2766			break;
2767		}
2768	}
2769
2770	/* Encode bigendian so that sysctl -x is legible. */
2771	be64enc(be, umv);
2772	outp = ((char *)be) + sizeof(umv) - outsz;
2773
2774	error = SYSCTL_OUT(req, outp, outsz);
2775	if (error || !req->newptr)
2776		return (error);
2777	return (EINVAL);
2778}
2779
2780static unsigned
2781ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2782{
2783
2784	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2785	    uidx >= ntb->b2b_mw_idx) ||
2786	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2787		uidx++;
2788	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2789	    uidx >= ntb->b2b_mw_idx) &&
2790	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2791		uidx++;
2792	return (uidx);
2793}
2794
2795static void
2796ntb_exchange_msix(void *ctx)
2797{
2798	struct ntb_softc *ntb;
2799	uint32_t val;
2800	unsigned i;
2801
2802	ntb = ctx;
2803
2804	if (ntb->peer_msix_good)
2805		goto msix_good;
2806	if (ntb->peer_msix_done)
2807		goto msix_done;
2808
2809	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2810		ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
2811		    ntb->msix_data[i].nmd_data);
2812		ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
2813		    ntb->msix_data[i].nmd_ofs);
2814	}
2815	ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2816
2817	ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
2818	if (val != NTB_MSIX_VER_GUARD)
2819		goto reschedule;
2820
2821	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2822		ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
2823		ntb->peer_msix_data[i].nmd_data = val;
2824		ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
2825		ntb->peer_msix_data[i].nmd_ofs = val;
2826	}
2827
2828	ntb->peer_msix_done = true;
2829
2830msix_done:
2831	ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2832	ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
2833	if (val != NTB_MSIX_RECEIVED)
2834		goto reschedule;
2835
2836	ntb->peer_msix_good = true;
2837	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2838	goto reschedule;
2839
2840msix_good:
2841	ntb_poll_link(ntb);
2842	ntb_link_event(ntb);
2843	return;
2844
2845reschedule:
2846	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2847	if (_xeon_link_is_up(ntb)) {
2848		callout_reset(&ntb->peer_msix_work,
2849		    hz * (ntb->peer_msix_good ? 2 : 1) / 100,
2850		    ntb_exchange_msix, ntb);
2851	} else
2852		ntb_spad_clear(ntb);
2853}
2854
2855/*
2856 * Public API to the rest of the OS
2857 */
2858
2859/**
2860 * ntb_get_max_spads() - get the total scratch regs usable
2861 * @ntb: pointer to ntb_softc instance
2862 *
2863 * This function returns the max 32bit scratchpad registers usable by the
2864 * upper layer.
2865 *
2866 * RETURNS: total number of scratch pad registers available
2867 */
2868uint8_t
2869ntb_get_max_spads(struct ntb_softc *ntb)
2870{
2871
2872	return (ntb->spad_count);
2873}
2874
2875/*
2876 * ntb_mw_count() - Get the number of memory windows available for KPI
2877 * consumers.
2878 *
2879 * (Excludes any MW wholly reserved for register access.)
2880 */
2881uint8_t
2882ntb_mw_count(struct ntb_softc *ntb)
2883{
2884	uint8_t res;
2885
2886	res = ntb->mw_count;
2887	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2888		res--;
2889	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2890		res--;
2891	return (res);
2892}
2893
2894/**
2895 * ntb_spad_write() - write to the secondary scratchpad register
2896 * @ntb: pointer to ntb_softc instance
2897 * @idx: index to the scratchpad register, 0 based
2898 * @val: the data value to put into the register
2899 *
2900 * This function allows writing of a 32bit value to the indexed scratchpad
2901 * register. The register resides on the secondary (external) side.
2902 *
2903 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2904 */
2905int
2906ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2907{
2908
2909	if (idx >= ntb->spad_count)
2910		return (EINVAL);
2911
2912	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2913
2914	return (0);
2915}
2916
2917/*
2918 * Zeros the local scratchpad.
2919 */
2920void
2921ntb_spad_clear(struct ntb_softc *ntb)
2922{
2923	unsigned i;
2924
2925	for (i = 0; i < ntb->spad_count; i++)
2926		ntb_spad_write(ntb, i, 0);
2927}
2928
2929/**
2930 * ntb_spad_read() - read from the primary scratchpad register
2931 * @ntb: pointer to ntb_softc instance
2932 * @idx: index to scratchpad register, 0 based
2933 * @val: pointer to 32bit integer for storing the register value
2934 *
2935 * This function allows reading of the 32bit scratchpad register on
2936 * the primary (internal) side.
2937 *
2938 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2939 */
2940int
2941ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2942{
2943
2944	if (idx >= ntb->spad_count)
2945		return (EINVAL);
2946
2947	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2948
2949	return (0);
2950}
2951
2952/**
2953 * ntb_peer_spad_write() - write to the secondary scratchpad register
2954 * @ntb: pointer to ntb_softc instance
2955 * @idx: index to the scratchpad register, 0 based
2956 * @val: the data value to put into the register
2957 *
2958 * This function allows writing of a 32bit value to the indexed scratchpad
2959 * register. The register resides on the secondary (external) side.
2960 *
2961 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2962 */
2963int
2964ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2965{
2966
2967	if (idx >= ntb->spad_count)
2968		return (EINVAL);
2969
2970	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2971		ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2972	else
2973		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2974
2975	return (0);
2976}
2977
2978/**
2979 * ntb_peer_spad_read() - read from the primary scratchpad register
2980 * @ntb: pointer to ntb_softc instance
2981 * @idx: index to scratchpad register, 0 based
2982 * @val: pointer to 32bit integer for storing the register value
2983 *
2984 * This function allows reading of the 32bit scratchpad register on
2985 * the primary (internal) side.
2986 *
2987 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2988 */
2989int
2990ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2991{
2992
2993	if (idx >= ntb->spad_count)
2994		return (EINVAL);
2995
2996	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2997		*val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2998	else
2999		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
3000
3001	return (0);
3002}
3003
3004/*
3005 * ntb_mw_get_range() - get the range of a memory window
3006 * @ntb:        NTB device context
3007 * @idx:        Memory window number
3008 * @base:       OUT - the base address for mapping the memory window
3009 * @size:       OUT - the size for mapping the memory window
3010 * @align:      OUT - the base alignment for translating the memory window
3011 * @align_size: OUT - the size alignment for translating the memory window
3012 *
3013 * Get the range of a memory window.  NULL may be given for any output
3014 * parameter if the value is not needed.  The base and size may be used for
3015 * mapping the memory window, to access the peer memory.  The alignment and
3016 * size may be used for translating the memory window, for the peer to access
3017 * memory on the local system.
3018 *
3019 * Return: Zero on success, otherwise an error number.
3020 */
3021int
3022ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
3023    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
3024    bus_addr_t *plimit)
3025{
3026	struct ntb_pci_bar_info *bar;
3027	bus_addr_t limit;
3028	size_t bar_b2b_off;
3029	enum ntb_bar bar_num;
3030
3031	if (mw_idx >= ntb_mw_count(ntb))
3032		return (EINVAL);
3033	mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
3034
3035	bar_num = ntb_mw_to_bar(ntb, mw_idx);
3036	bar = &ntb->bar_info[bar_num];
3037	bar_b2b_off = 0;
3038	if (mw_idx == ntb->b2b_mw_idx) {
3039		KASSERT(ntb->b2b_off != 0,
3040		    ("user shouldn't get non-shared b2b mw"));
3041		bar_b2b_off = ntb->b2b_off;
3042	}
3043
3044	if (bar_is_64bit(ntb, bar_num))
3045		limit = BUS_SPACE_MAXADDR;
3046	else
3047		limit = BUS_SPACE_MAXADDR_32BIT;
3048
3049	if (base != NULL)
3050		*base = bar->pbase + bar_b2b_off;
3051	if (vbase != NULL)
3052		*vbase = bar->vbase + bar_b2b_off;
3053	if (size != NULL)
3054		*size = bar->size - bar_b2b_off;
3055	if (align != NULL)
3056		*align = bar->size;
3057	if (align_size != NULL)
3058		*align_size = 1;
3059	if (plimit != NULL)
3060		*plimit = limit;
3061	return (0);
3062}
3063
3064/*
3065 * ntb_mw_set_trans() - set the translation of a memory window
3066 * @ntb:        NTB device context
3067 * @idx:        Memory window number
3068 * @addr:       The dma address local memory to expose to the peer
3069 * @size:       The size of the local memory to expose to the peer
3070 *
3071 * Set the translation of a memory window.  The peer may access local memory
3072 * through the window starting at the address, up to the size.  The address
3073 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
3074 * must be aligned to the size alignment specified by ntb_mw_get_range().  The
3075 * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
3076 * 32-bit BARs).
3077 *
3078 * Return: Zero on success, otherwise an error number.
3079 */
3080int
3081ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
3082    size_t size)
3083{
3084	struct ntb_pci_bar_info *bar;
3085	uint64_t base, limit, reg_val;
3086	size_t bar_size, mw_size;
3087	uint32_t base_reg, xlat_reg, limit_reg;
3088	enum ntb_bar bar_num;
3089
3090	if (idx >= ntb_mw_count(ntb))
3091		return (EINVAL);
3092	idx = ntb_user_mw_to_idx(ntb, idx);
3093
3094	bar_num = ntb_mw_to_bar(ntb, idx);
3095	bar = &ntb->bar_info[bar_num];
3096
3097	bar_size = bar->size;
3098	if (idx == ntb->b2b_mw_idx)
3099		mw_size = bar_size - ntb->b2b_off;
3100	else
3101		mw_size = bar_size;
3102
3103	/* Hardware requires that addr is aligned to bar size */
3104	if ((addr & (bar_size - 1)) != 0)
3105		return (EINVAL);
3106
3107	if (size > mw_size)
3108		return (EINVAL);
3109
3110	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
3111
3112	limit = 0;
3113	if (bar_is_64bit(ntb, bar_num)) {
3114		base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
3115
3116		if (limit_reg != 0 && size != mw_size)
3117			limit = base + size;
3118
3119		/* Set and verify translation address */
3120		ntb_reg_write(8, xlat_reg, addr);
3121		reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
3122		if (reg_val != addr) {
3123			ntb_reg_write(8, xlat_reg, 0);
3124			return (EIO);
3125		}
3126
3127		/* Set and verify the limit */
3128		ntb_reg_write(8, limit_reg, limit);
3129		reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
3130		if (reg_val != limit) {
3131			ntb_reg_write(8, limit_reg, base);
3132			ntb_reg_write(8, xlat_reg, 0);
3133			return (EIO);
3134		}
3135	} else {
3136		/* Configure 32-bit (split) BAR MW */
3137
3138		if ((addr & UINT32_MAX) != addr)
3139			return (ERANGE);
3140		if (((addr + size) & UINT32_MAX) != (addr + size))
3141			return (ERANGE);
3142
3143		base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
3144
3145		if (limit_reg != 0 && size != mw_size)
3146			limit = base + size;
3147
3148		/* Set and verify translation address */
3149		ntb_reg_write(4, xlat_reg, addr);
3150		reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
3151		if (reg_val != addr) {
3152			ntb_reg_write(4, xlat_reg, 0);
3153			return (EIO);
3154		}
3155
3156		/* Set and verify the limit */
3157		ntb_reg_write(4, limit_reg, limit);
3158		reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3159		if (reg_val != limit) {
3160			ntb_reg_write(4, limit_reg, base);
3161			ntb_reg_write(4, xlat_reg, 0);
3162			return (EIO);
3163		}
3164	}
3165	return (0);
3166}
3167
3168/*
3169 * ntb_mw_clear_trans() - clear the translation of a memory window
3170 * @ntb:	NTB device context
3171 * @idx:	Memory window number
3172 *
3173 * Clear the translation of a memory window.  The peer may no longer access
3174 * local memory through the window.
3175 *
3176 * Return: Zero on success, otherwise an error number.
3177 */
3178int
3179ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
3180{
3181
3182	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
3183}
3184
3185/*
3186 * ntb_mw_get_wc - Get the write-combine status of a memory window
3187 *
3188 * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
3189 * idx is an invalid memory window).
3190 *
3191 * Mode is a VM_MEMATTR_* type.
3192 */
3193int
3194ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
3195{
3196	struct ntb_pci_bar_info *bar;
3197
3198	if (idx >= ntb_mw_count(ntb))
3199		return (EINVAL);
3200	idx = ntb_user_mw_to_idx(ntb, idx);
3201
3202	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
3203	*mode = bar->map_mode;
3204	return (0);
3205}
3206
3207/*
3208 * ntb_mw_set_wc - Set the write-combine status of a memory window
3209 *
3210 * If 'mode' matches the current status, this does nothing and succeeds.  Mode
3211 * is a VM_MEMATTR_* type.
3212 *
3213 * Returns:  Zero on success, setting the caching attribute on the virtual
3214 * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
3215 * memory window, or if changing the caching attribute fails).
3216 */
3217int
3218ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3219{
3220
3221	if (idx >= ntb_mw_count(ntb))
3222		return (EINVAL);
3223
3224	idx = ntb_user_mw_to_idx(ntb, idx);
3225	return (ntb_mw_set_wc_internal(ntb, idx, mode));
3226}
3227
3228static int
3229ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3230{
3231	struct ntb_pci_bar_info *bar;
3232	int rc;
3233
3234	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
3235	if (bar->map_mode == mode)
3236		return (0);
3237
3238	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3239	if (rc == 0)
3240		bar->map_mode = mode;
3241
3242	return (rc);
3243}
3244
3245/**
3246 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
3247 * @ntb: pointer to ntb_softc instance
3248 * @bit: doorbell bits to ring
3249 *
3250 * This function allows triggering of a doorbell on the secondary/external
3251 * side that will initiate an interrupt on the remote host
3252 */
3253void
3254ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
3255{
3256
3257	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
3258		struct ntb_pci_bar_info *lapic;
3259		unsigned i;
3260
3261		lapic = ntb->peer_lapic_bar;
3262
3263		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3264			if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
3265				bus_space_write_4(lapic->pci_bus_tag,
3266				    lapic->pci_bus_handle,
3267				    ntb->peer_msix_data[i].nmd_ofs,
3268				    ntb->peer_msix_data[i].nmd_data);
3269		}
3270		return;
3271	}
3272
3273	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
3274		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3275		return;
3276	}
3277
3278	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3279}
3280
3281/*
3282 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
3283 * as well as the size of the register (via *sz_out).
3284 *
3285 * This function allows a caller using I/OAT DMA to chain the remote doorbell
3286 * ring to its memory window write.
3287 *
3288 * Note that writing the peer doorbell via a memory window will *not* generate
3289 * an interrupt on the remote host; that must be done separately.
3290 */
3291bus_addr_t
3292ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
3293{
3294	struct ntb_pci_bar_info *bar;
3295	uint64_t regoff;
3296
3297	KASSERT(sz_out != NULL, ("must be non-NULL"));
3298
3299	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
3300		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3301		regoff = ntb->peer_reg->db_bell;
3302	} else {
3303		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3304		    ("invalid b2b idx"));
3305
3306		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3307		regoff = XEON_PDOORBELL_OFFSET;
3308	}
3309	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3310
3311	*sz_out = ntb->reg->db_size;
3312	/* HACK: Specific to current x86 bus implementation. */
3313	return ((uint64_t)bar->pci_bus_handle + regoff);
3314}
3315
3316/*
3317 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
3318 * @ntb:	NTB device context
3319 *
3320 * Hardware may support different number or arrangement of doorbell bits.
3321 *
3322 * Return: A mask of doorbell bits supported by the ntb.
3323 */
3324uint64_t
3325ntb_db_valid_mask(struct ntb_softc *ntb)
3326{
3327
3328	return (ntb->db_valid_mask);
3329}
3330
3331/*
3332 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
3333 * @ntb:	NTB device context
3334 * @vector:	Doorbell vector number
3335 *
3336 * Each interrupt vector may have a different number or arrangement of bits.
3337 *
3338 * Return: A mask of doorbell bits serviced by a vector.
3339 */
3340uint64_t
3341ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
3342{
3343
3344	if (vector > ntb->db_vec_count)
3345		return (0);
3346	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
3347}
3348
3349/**
3350 * ntb_link_is_up() - get the current ntb link state
3351 * @ntb:        NTB device context
3352 * @speed:      OUT - The link speed expressed as PCIe generation number
3353 * @width:      OUT - The link width expressed as the number of PCIe lanes
3354 *
3355 * RETURNS: true or false based on the hardware link state
3356 */
3357bool
3358ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
3359    enum ntb_width *width)
3360{
3361
3362	if (speed != NULL)
3363		*speed = ntb_link_sta_speed(ntb);
3364	if (width != NULL)
3365		*width = ntb_link_sta_width(ntb);
3366	return (link_is_up(ntb));
3367}
3368
3369static void
3370save_bar_parameters(struct ntb_pci_bar_info *bar)
3371{
3372
3373	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3374	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3375	bar->pbase = rman_get_start(bar->pci_resource);
3376	bar->size = rman_get_size(bar->pci_resource);
3377	bar->vbase = rman_get_virtual(bar->pci_resource);
3378}
3379
3380device_t
3381ntb_get_device(struct ntb_softc *ntb)
3382{
3383
3384	return (ntb->device);
3385}
3386
3387/* Export HW-specific errata information. */
3388bool
3389ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
3390{
3391
3392	return (HAS_FEATURE(feature));
3393}
3394