ntb_hw.c revision 301903
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c 301903 2016-06-15 01:41:43Z mav $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/mutex.h>
39#include <sys/pciio.h>
40#include <sys/queue.h>
41#include <sys/rman.h>
42#include <sys/sbuf.h>
43#include <sys/sysctl.h>
44#include <vm/vm.h>
45#include <vm/pmap.h>
46#include <machine/bus.h>
47#include <machine/intr_machdep.h>
48#include <machine/pmap.h>
49#include <machine/resource.h>
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52
53#include "ntb_regs.h"
54#include "ntb_hw.h"
55
56/*
57 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
58 * allows you to connect two systems using a PCI-e link.
59 *
60 * This module contains the hardware abstraction layer for the NTB. It allows
61 * you to send and recieve interrupts, map the memory windows and send and
62 * receive messages in the scratch-pad registers.
63 *
64 * NOTE: Much of the code in this module is shared with Linux. Any patches may
65 * be picked up and redistributed in Linux with a dual GPL/BSD license.
66 */
67
68#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
69
70#define NTB_HB_TIMEOUT		1 /* second */
71#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
72#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
73
74#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
75
76#define	NTB_MSIX_VER_GUARD	0xaabbccdd
77#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
78#define	ONE_MB			(1024u * 1024)
79
80/*
81 * PCI constants could be somewhere more generic, but aren't defined/used in
82 * pci.c.
83 */
84#define	PCI_MSIX_ENTRY_SIZE		16
85#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
86#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
87#define	PCI_MSIX_ENTRY_DATA		8
88
89enum ntb_device_type {
90	NTB_XEON,
91	NTB_ATOM
92};
93
94/* ntb_conn_type are hardware numbers, cannot change. */
95enum ntb_conn_type {
96	NTB_CONN_TRANSPARENT = 0,
97	NTB_CONN_B2B = 1,
98	NTB_CONN_RP = 2,
99};
100
101enum ntb_b2b_direction {
102	NTB_DEV_USD = 0,
103	NTB_DEV_DSD = 1,
104};
105
106enum ntb_bar {
107	NTB_CONFIG_BAR = 0,
108	NTB_B2B_BAR_1,
109	NTB_B2B_BAR_2,
110	NTB_B2B_BAR_3,
111	NTB_MAX_BARS
112};
113
114enum {
115	NTB_MSIX_GUARD = 0,
116	NTB_MSIX_DATA0,
117	NTB_MSIX_DATA1,
118	NTB_MSIX_DATA2,
119	NTB_MSIX_OFS0,
120	NTB_MSIX_OFS1,
121	NTB_MSIX_OFS2,
122	NTB_MSIX_DONE,
123	NTB_MAX_MSIX_SPAD
124};
125
126/* Device features and workarounds */
127#define HAS_FEATURE(feature)	\
128	((ntb->features & (feature)) != 0)
129
130struct ntb_hw_info {
131	uint32_t		device_id;
132	const char		*desc;
133	enum ntb_device_type	type;
134	uint32_t		features;
135};
136
137struct ntb_pci_bar_info {
138	bus_space_tag_t		pci_bus_tag;
139	bus_space_handle_t	pci_bus_handle;
140	int			pci_resource_id;
141	struct resource		*pci_resource;
142	vm_paddr_t		pbase;
143	caddr_t			vbase;
144	vm_size_t		size;
145	vm_memattr_t		map_mode;
146
147	/* Configuration register offsets */
148	uint32_t		psz_off;
149	uint32_t		ssz_off;
150	uint32_t		pbarxlat_off;
151};
152
153struct ntb_int_info {
154	struct resource	*res;
155	int		rid;
156	void		*tag;
157};
158
159struct ntb_vec {
160	struct ntb_softc	*ntb;
161	uint32_t		num;
162	unsigned		masked;
163};
164
165struct ntb_reg {
166	uint32_t	ntb_ctl;
167	uint32_t	lnk_sta;
168	uint8_t		db_size;
169	unsigned	mw_bar[NTB_MAX_BARS];
170};
171
172struct ntb_alt_reg {
173	uint32_t	db_bell;
174	uint32_t	db_mask;
175	uint32_t	spad;
176};
177
178struct ntb_xlat_reg {
179	uint32_t	bar0_base;
180	uint32_t	bar2_base;
181	uint32_t	bar4_base;
182	uint32_t	bar5_base;
183
184	uint32_t	bar2_xlat;
185	uint32_t	bar4_xlat;
186	uint32_t	bar5_xlat;
187
188	uint32_t	bar2_limit;
189	uint32_t	bar4_limit;
190	uint32_t	bar5_limit;
191};
192
193struct ntb_b2b_addr {
194	uint64_t	bar0_addr;
195	uint64_t	bar2_addr64;
196	uint64_t	bar4_addr64;
197	uint64_t	bar4_addr32;
198	uint64_t	bar5_addr32;
199};
200
201struct ntb_msix_data {
202	uint32_t	nmd_ofs;
203	uint32_t	nmd_data;
204};
205
206struct ntb_softc {
207	device_t		device;
208	enum ntb_device_type	type;
209	uint32_t		features;
210
211	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
212	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
213	uint32_t		allocated_interrupts;
214
215	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
216	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
217	bool			peer_msix_good;
218	bool			peer_msix_done;
219	struct ntb_pci_bar_info	*peer_lapic_bar;
220	struct callout		peer_msix_work;
221
222	struct callout		heartbeat_timer;
223	struct callout		lr_timer;
224
225	void			*ntb_ctx;
226	const struct ntb_ctx_ops *ctx_ops;
227	struct ntb_vec		*msix_vec;
228#define CTX_LOCK(sc)		mtx_lock(&(sc)->ctx_lock)
229#define CTX_UNLOCK(sc)		mtx_unlock(&(sc)->ctx_lock)
230#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
231	struct mtx		ctx_lock;
232
233	uint32_t		ppd;
234	enum ntb_conn_type	conn_type;
235	enum ntb_b2b_direction	dev_type;
236
237	/* Offset of peer bar0 in B2B BAR */
238	uint64_t			b2b_off;
239	/* Memory window used to access peer bar0 */
240#define B2B_MW_DISABLED			UINT8_MAX
241	uint8_t				b2b_mw_idx;
242	uint8_t				msix_mw_idx;
243
244	uint8_t				mw_count;
245	uint8_t				spad_count;
246	uint8_t				db_count;
247	uint8_t				db_vec_count;
248	uint8_t				db_vec_shift;
249
250	/* Protects local db_mask. */
251#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
252#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
253#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
254	struct mtx			db_mask_lock;
255
256	volatile uint32_t		ntb_ctl;
257	volatile uint32_t		lnk_sta;
258
259	uint64_t			db_valid_mask;
260	uint64_t			db_link_mask;
261	uint64_t			db_mask;
262
263	int				last_ts;	/* ticks @ last irq */
264
265	const struct ntb_reg		*reg;
266	const struct ntb_alt_reg	*self_reg;
267	const struct ntb_alt_reg	*peer_reg;
268	const struct ntb_xlat_reg	*xlat_reg;
269};
270
271#ifdef __i386__
272static __inline uint64_t
273bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
274    bus_size_t offset)
275{
276
277	return (bus_space_read_4(tag, handle, offset) |
278	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
279}
280
281static __inline void
282bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
283    bus_size_t offset, uint64_t val)
284{
285
286	bus_space_write_4(tag, handle, offset, val);
287	bus_space_write_4(tag, handle, offset + 4, val >> 32);
288}
289#endif
290
291#define ntb_bar_read(SIZE, bar, offset) \
292	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
293	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
294#define ntb_bar_write(SIZE, bar, offset, val) \
295	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
296	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
297#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
298#define ntb_reg_write(SIZE, offset, val) \
299	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
300#define ntb_mw_read(SIZE, offset) \
301	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
302#define ntb_mw_write(SIZE, offset, val) \
303	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
304		offset, val)
305
306static int ntb_probe(device_t device);
307static int ntb_attach(device_t device);
308static int ntb_detach(device_t device);
309static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
310static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
311static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
312static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
313    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
314static int ntb_map_pci_bars(struct ntb_softc *ntb);
315static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
316    vm_memattr_t);
317static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
318    const char *);
319static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
320static int map_memory_window_bar(struct ntb_softc *ntb,
321    struct ntb_pci_bar_info *bar);
322static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
323static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
324static int ntb_init_isr(struct ntb_softc *ntb);
325static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
326static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
327static void ntb_teardown_interrupts(struct ntb_softc *ntb);
328static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
329static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
330static void ndev_vec_isr(void *arg);
331static void ndev_irq_isr(void *arg);
332static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
333static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
334static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
335static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
336static void ntb_free_msix_vec(struct ntb_softc *ntb);
337static void ntb_get_msix_info(struct ntb_softc *ntb);
338static void ntb_exchange_msix(void *);
339static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
340static void ntb_detect_max_mw(struct ntb_softc *ntb);
341static int ntb_detect_xeon(struct ntb_softc *ntb);
342static int ntb_detect_atom(struct ntb_softc *ntb);
343static int ntb_xeon_init_dev(struct ntb_softc *ntb);
344static int ntb_atom_init_dev(struct ntb_softc *ntb);
345static void ntb_teardown_xeon(struct ntb_softc *ntb);
346static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
347static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
348    enum ntb_bar regbar);
349static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
350    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
351static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
352    enum ntb_bar idx);
353static int xeon_setup_b2b_mw(struct ntb_softc *,
354    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
355static int xeon_setup_msix_bar(struct ntb_softc *);
356static inline bool link_is_up(struct ntb_softc *ntb);
357static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
358static inline bool atom_link_is_err(struct ntb_softc *ntb);
359static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
360static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
361static void atom_link_hb(void *arg);
362static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
363static void recover_atom_link(void *arg);
364static bool ntb_poll_link(struct ntb_softc *ntb);
365static void save_bar_parameters(struct ntb_pci_bar_info *bar);
366static void ntb_sysctl_init(struct ntb_softc *);
367static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
368static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
369static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
370static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
371static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
372
373static unsigned g_ntb_hw_debug_level;
374TUNABLE_INT("hw.ntb.debug_level", &g_ntb_hw_debug_level);
375SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
376    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
377#define ntb_printf(lvl, ...) do {				\
378	if ((lvl) <= g_ntb_hw_debug_level) {			\
379		device_printf(ntb->device, __VA_ARGS__);	\
380	}							\
381} while (0)
382
383#define	_NTB_PAT_UC	0
384#define	_NTB_PAT_WC	1
385#define	_NTB_PAT_WT	4
386#define	_NTB_PAT_WP	5
387#define	_NTB_PAT_WB	6
388#define	_NTB_PAT_UCM	7
389static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
390TUNABLE_INT("hw.ntb.default_mw_pat", &g_ntb_mw_pat);
391SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
392    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
393    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
394    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
395    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
396    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
397    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
398    "UC-: " __XSTRING(_NTB_PAT_UCM));
399
400static inline vm_memattr_t
401ntb_pat_flags(void)
402{
403
404	switch (g_ntb_mw_pat) {
405	case _NTB_PAT_WC:
406		return (VM_MEMATTR_WRITE_COMBINING);
407	case _NTB_PAT_WT:
408		return (VM_MEMATTR_WRITE_THROUGH);
409	case _NTB_PAT_WP:
410		return (VM_MEMATTR_WRITE_PROTECTED);
411	case _NTB_PAT_WB:
412		return (VM_MEMATTR_WRITE_BACK);
413	case _NTB_PAT_UCM:
414		return (VM_MEMATTR_WEAK_UNCACHEABLE);
415	case _NTB_PAT_UC:
416		/* FALLTHROUGH */
417	default:
418		return (VM_MEMATTR_UNCACHEABLE);
419	}
420}
421
422/*
423 * Well, this obviously doesn't belong here, but it doesn't seem to exist
424 * anywhere better yet.
425 */
426static inline const char *
427ntb_vm_memattr_to_str(vm_memattr_t pat)
428{
429
430	switch (pat) {
431	case VM_MEMATTR_WRITE_COMBINING:
432		return ("WRITE_COMBINING");
433	case VM_MEMATTR_WRITE_THROUGH:
434		return ("WRITE_THROUGH");
435	case VM_MEMATTR_WRITE_PROTECTED:
436		return ("WRITE_PROTECTED");
437	case VM_MEMATTR_WRITE_BACK:
438		return ("WRITE_BACK");
439	case VM_MEMATTR_WEAK_UNCACHEABLE:
440		return ("UNCACHED");
441	case VM_MEMATTR_UNCACHEABLE:
442		return ("UNCACHEABLE");
443	default:
444		return ("UNKNOWN");
445	}
446}
447
448static int g_ntb_msix_idx = 0;
449SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
450    0, "Use this memory window to access the peer MSIX message complex on "
451    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
452    "Like b2b_mw_idx, negative values index from the last available memory "
453    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
454
455static int g_ntb_mw_idx = -1;
456TUNABLE_INT("hw.ntb.b2b_mw_idx", &g_ntb_mw_idx);
457SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
458    0, "Use this memory window to access the peer NTB registers.  A "
459    "non-negative value starts from the first MW index; a negative value "
460    "starts from the last MW index.  The default is -1, i.e., the last "
461    "available memory window.  Both sides of the NTB MUST set the same "
462    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
463
464static struct ntb_hw_info pci_ids[] = {
465	/* XXX: PS/SS IDs left out until they are supported. */
466	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
467		NTB_ATOM, 0 },
468
469	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
470		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
471	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
472		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
473	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
474		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
475		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
476	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
477		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
478		    NTB_SB01BASE_LOCKUP },
479	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
480		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
481		    NTB_SB01BASE_LOCKUP },
482
483	{ 0x00000000, NULL, NTB_ATOM, 0 }
484};
485
486static const struct ntb_reg atom_reg = {
487	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
488	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
489	.db_size = sizeof(uint64_t),
490	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
491};
492
493static const struct ntb_alt_reg atom_pri_reg = {
494	.db_bell = ATOM_PDOORBELL_OFFSET,
495	.db_mask = ATOM_PDBMSK_OFFSET,
496	.spad = ATOM_SPAD_OFFSET,
497};
498
499static const struct ntb_alt_reg atom_b2b_reg = {
500	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
501	.spad = ATOM_B2B_SPAD_OFFSET,
502};
503
504static const struct ntb_xlat_reg atom_sec_xlat = {
505#if 0
506	/* "FIXME" says the Linux driver. */
507	.bar0_base = ATOM_SBAR0BASE_OFFSET,
508	.bar2_base = ATOM_SBAR2BASE_OFFSET,
509	.bar4_base = ATOM_SBAR4BASE_OFFSET,
510
511	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
512	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
513#endif
514
515	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
516	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
517};
518
519static const struct ntb_reg xeon_reg = {
520	.ntb_ctl = XEON_NTBCNTL_OFFSET,
521	.lnk_sta = XEON_LINK_STATUS_OFFSET,
522	.db_size = sizeof(uint16_t),
523	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
524};
525
526static const struct ntb_alt_reg xeon_pri_reg = {
527	.db_bell = XEON_PDOORBELL_OFFSET,
528	.db_mask = XEON_PDBMSK_OFFSET,
529	.spad = XEON_SPAD_OFFSET,
530};
531
532static const struct ntb_alt_reg xeon_b2b_reg = {
533	.db_bell = XEON_B2B_DOORBELL_OFFSET,
534	.spad = XEON_B2B_SPAD_OFFSET,
535};
536
537static const struct ntb_xlat_reg xeon_sec_xlat = {
538	.bar0_base = XEON_SBAR0BASE_OFFSET,
539	.bar2_base = XEON_SBAR2BASE_OFFSET,
540	.bar4_base = XEON_SBAR4BASE_OFFSET,
541	.bar5_base = XEON_SBAR5BASE_OFFSET,
542
543	.bar2_limit = XEON_SBAR2LMT_OFFSET,
544	.bar4_limit = XEON_SBAR4LMT_OFFSET,
545	.bar5_limit = XEON_SBAR5LMT_OFFSET,
546
547	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
548	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
549	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
550};
551
552static struct ntb_b2b_addr xeon_b2b_usd_addr = {
553	.bar0_addr = XEON_B2B_BAR0_ADDR,
554	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
555	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
556	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
557	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
558};
559
560static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
561	.bar0_addr = XEON_B2B_BAR0_ADDR,
562	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
563	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
564	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
565	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
566};
567
568SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
569    "B2B MW segment overrides -- MUST be the same on both sides");
570
571TUNABLE_QUAD("hw.ntb.usd_bar2_addr64", &xeon_b2b_usd_addr.bar2_addr64);
572SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
573    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
574    "hardware, use this 64-bit address on the bus between the NTB devices for "
575    "the window at BAR2, on the upstream side of the link.  MUST be the same "
576    "address on both sides.");
577TUNABLE_QUAD("hw.ntb.usd_bar4_addr64", &xeon_b2b_usd_addr.bar4_addr64);
578SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
579    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
580TUNABLE_QUAD("hw.ntb.usd_bar4_addr32", &xeon_b2b_usd_addr.bar4_addr32);
581SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
582    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
583    "(split-BAR mode).");
584TUNABLE_QUAD("hw.ntb.usd_bar5_addr32", &xeon_b2b_usd_addr.bar5_addr32);
585SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
586    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
587    "(split-BAR mode).");
588
589TUNABLE_QUAD("hw.ntb.dsd_bar2_addr64", &xeon_b2b_dsd_addr.bar2_addr64);
590SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
591    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
592    "hardware, use this 64-bit address on the bus between the NTB devices for "
593    "the window at BAR2, on the downstream side of the link.  MUST be the same"
594    " address on both sides.");
595TUNABLE_QUAD("hw.ntb.dsd_bar4_addr64", &xeon_b2b_dsd_addr.bar4_addr64);
596SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
597    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
598TUNABLE_QUAD("hw.ntb.dsd_bar4_addr32", &xeon_b2b_dsd_addr.bar4_addr32);
599SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
600    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
601    "(split-BAR mode).");
602TUNABLE_QUAD("hw.ntb.dsd_bar5_addr32", &xeon_b2b_dsd_addr.bar5_addr32);
603SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
604    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
605    "(split-BAR mode).");
606
607/*
608 * OS <-> Driver interface structures
609 */
610MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
611
612static device_method_t ntb_pci_methods[] = {
613	/* Device interface */
614	DEVMETHOD(device_probe,     ntb_probe),
615	DEVMETHOD(device_attach,    ntb_attach),
616	DEVMETHOD(device_detach,    ntb_detach),
617	DEVMETHOD_END
618};
619
620static driver_t ntb_pci_driver = {
621	"ntb_hw",
622	ntb_pci_methods,
623	sizeof(struct ntb_softc),
624};
625
626static devclass_t ntb_devclass;
627DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
628MODULE_VERSION(ntb_hw, 1);
629
630SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
631
632/*
633 * OS <-> Driver linkage functions
634 */
635static int
636ntb_probe(device_t device)
637{
638	struct ntb_hw_info *p;
639
640	p = ntb_get_device_info(pci_get_devid(device));
641	if (p == NULL)
642		return (ENXIO);
643
644	device_set_desc(device, p->desc);
645	return (0);
646}
647
648static int
649ntb_attach(device_t device)
650{
651	struct ntb_softc *ntb;
652	struct ntb_hw_info *p;
653	int error;
654
655	ntb = DEVICE2SOFTC(device);
656	p = ntb_get_device_info(pci_get_devid(device));
657
658	ntb->device = device;
659	ntb->type = p->type;
660	ntb->features = p->features;
661	ntb->b2b_mw_idx = B2B_MW_DISABLED;
662	ntb->msix_mw_idx = B2B_MW_DISABLED;
663
664	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
665	callout_init(&ntb->heartbeat_timer, CALLOUT_MPSAFE);
666	callout_init(&ntb->lr_timer, CALLOUT_MPSAFE);
667	callout_init(&ntb->peer_msix_work, 1);
668	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
669	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
670
671	if (ntb->type == NTB_ATOM)
672		error = ntb_detect_atom(ntb);
673	else
674		error = ntb_detect_xeon(ntb);
675	if (error != 0)
676		goto out;
677
678	ntb_detect_max_mw(ntb);
679
680	pci_enable_busmaster(ntb->device);
681
682	error = ntb_map_pci_bars(ntb);
683	if (error != 0)
684		goto out;
685	if (ntb->type == NTB_ATOM)
686		error = ntb_atom_init_dev(ntb);
687	else
688		error = ntb_xeon_init_dev(ntb);
689	if (error != 0)
690		goto out;
691
692	ntb_spad_clear(ntb);
693
694	ntb_poll_link(ntb);
695
696	ntb_sysctl_init(ntb);
697
698out:
699	if (error != 0)
700		ntb_detach(device);
701	return (error);
702}
703
704static int
705ntb_detach(device_t device)
706{
707	struct ntb_softc *ntb;
708
709	ntb = DEVICE2SOFTC(device);
710
711	if (ntb->self_reg != NULL) {
712		DB_MASK_LOCK(ntb);
713		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
714		DB_MASK_UNLOCK(ntb);
715	}
716	callout_drain(&ntb->heartbeat_timer);
717	callout_drain(&ntb->lr_timer);
718	callout_drain(&ntb->peer_msix_work);
719	pci_disable_busmaster(ntb->device);
720	if (ntb->type == NTB_XEON)
721		ntb_teardown_xeon(ntb);
722	ntb_teardown_interrupts(ntb);
723
724	mtx_destroy(&ntb->db_mask_lock);
725	mtx_destroy(&ntb->ctx_lock);
726
727	ntb_unmap_pci_bar(ntb);
728
729	return (0);
730}
731
732/*
733 * Driver internal routines
734 */
735static inline enum ntb_bar
736ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
737{
738
739	KASSERT(mw < ntb->mw_count,
740	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
741	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
742
743	return (ntb->reg->mw_bar[mw]);
744}
745
746static inline bool
747bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
748{
749	/* XXX This assertion could be stronger. */
750	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
751	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
752}
753
754static inline void
755bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
756    uint32_t *xlat, uint32_t *lmt)
757{
758	uint32_t basev, lmtv, xlatv;
759
760	switch (bar) {
761	case NTB_B2B_BAR_1:
762		basev = ntb->xlat_reg->bar2_base;
763		lmtv = ntb->xlat_reg->bar2_limit;
764		xlatv = ntb->xlat_reg->bar2_xlat;
765		break;
766	case NTB_B2B_BAR_2:
767		basev = ntb->xlat_reg->bar4_base;
768		lmtv = ntb->xlat_reg->bar4_limit;
769		xlatv = ntb->xlat_reg->bar4_xlat;
770		break;
771	case NTB_B2B_BAR_3:
772		basev = ntb->xlat_reg->bar5_base;
773		lmtv = ntb->xlat_reg->bar5_limit;
774		xlatv = ntb->xlat_reg->bar5_xlat;
775		break;
776	default:
777		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
778		    ("bad bar"));
779		basev = lmtv = xlatv = 0;
780		break;
781	}
782
783	if (base != NULL)
784		*base = basev;
785	if (xlat != NULL)
786		*xlat = xlatv;
787	if (lmt != NULL)
788		*lmt = lmtv;
789}
790
791static int
792ntb_map_pci_bars(struct ntb_softc *ntb)
793{
794	int rc;
795
796	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
797	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
798	if (rc != 0)
799		goto out;
800
801	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
802	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
803	if (rc != 0)
804		goto out;
805	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
806	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
807	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
808
809	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
810	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
811	if (rc != 0)
812		goto out;
813	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
814	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
815	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
816
817	if (!HAS_FEATURE(NTB_SPLIT_BAR))
818		goto out;
819
820	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
821	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
822	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
823	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
824	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
825
826out:
827	if (rc != 0)
828		device_printf(ntb->device,
829		    "unable to allocate pci resource\n");
830	return (rc);
831}
832
833static void
834print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
835    const char *kind)
836{
837
838	device_printf(ntb->device,
839	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
840	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
841	    (char *)bar->vbase + bar->size - 1,
842	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
843	    (uintmax_t)bar->size, kind);
844}
845
846static int
847map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
848{
849
850	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
851	    &bar->pci_resource_id, RF_ACTIVE);
852	if (bar->pci_resource == NULL)
853		return (ENXIO);
854
855	save_bar_parameters(bar);
856	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
857	print_map_success(ntb, bar, "mmr");
858	return (0);
859}
860
861static int
862map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
863{
864	int rc;
865	vm_memattr_t mapmode;
866	uint8_t bar_size_bits = 0;
867
868	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
869	    &bar->pci_resource_id, RF_ACTIVE);
870
871	if (bar->pci_resource == NULL)
872		return (ENXIO);
873
874	save_bar_parameters(bar);
875	/*
876	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
877	 * hardware issue. To work around this, query the size it should be
878	 * configured to by the device and modify the resource to correspond to
879	 * this new size. The BIOS on systems with this problem is required to
880	 * provide enough address space to allow the driver to make this change
881	 * safely.
882	 *
883	 * Ideally I could have just specified the size when I allocated the
884	 * resource like:
885	 *  bus_alloc_resource(ntb->device,
886	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
887	 *	1ul << bar_size_bits, RF_ACTIVE);
888	 * but the PCI driver does not honor the size in this call, so we have
889	 * to modify it after the fact.
890	 */
891	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
892		if (bar->pci_resource_id == PCIR_BAR(2))
893			bar_size_bits = pci_read_config(ntb->device,
894			    XEON_PBAR23SZ_OFFSET, 1);
895		else
896			bar_size_bits = pci_read_config(ntb->device,
897			    XEON_PBAR45SZ_OFFSET, 1);
898
899		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
900		    bar->pci_resource, bar->pbase,
901		    bar->pbase + (1ul << bar_size_bits) - 1);
902		if (rc != 0) {
903			device_printf(ntb->device,
904			    "unable to resize bar\n");
905			return (rc);
906		}
907
908		save_bar_parameters(bar);
909	}
910
911	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
912	print_map_success(ntb, bar, "mw");
913
914	/*
915	 * Optionally, mark MW BARs as anything other than UC to improve
916	 * performance.
917	 */
918	mapmode = ntb_pat_flags();
919	if (mapmode == bar->map_mode)
920		return (0);
921
922	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
923	if (rc == 0) {
924		bar->map_mode = mapmode;
925		device_printf(ntb->device,
926		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
927		    "%s.\n",
928		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
929		    (char *)bar->vbase + bar->size - 1,
930		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
931		    ntb_vm_memattr_to_str(mapmode));
932	} else
933		device_printf(ntb->device,
934		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
935		    "%s: %d\n",
936		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
937		    (char *)bar->vbase + bar->size - 1,
938		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
939		    ntb_vm_memattr_to_str(mapmode), rc);
940		/* Proceed anyway */
941	return (0);
942}
943
944static void
945ntb_unmap_pci_bar(struct ntb_softc *ntb)
946{
947	struct ntb_pci_bar_info *current_bar;
948	int i;
949
950	for (i = 0; i < NTB_MAX_BARS; i++) {
951		current_bar = &ntb->bar_info[i];
952		if (current_bar->pci_resource != NULL)
953			bus_release_resource(ntb->device, SYS_RES_MEMORY,
954			    current_bar->pci_resource_id,
955			    current_bar->pci_resource);
956	}
957}
958
959static int
960ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
961{
962	uint32_t i;
963	int rc;
964
965	for (i = 0; i < num_vectors; i++) {
966		ntb->int_info[i].rid = i + 1;
967		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
968		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
969		if (ntb->int_info[i].res == NULL) {
970			device_printf(ntb->device,
971			    "bus_alloc_resource failed\n");
972			return (ENOMEM);
973		}
974		ntb->int_info[i].tag = NULL;
975		ntb->allocated_interrupts++;
976		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
977		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
978		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
979		if (rc != 0) {
980			device_printf(ntb->device, "bus_setup_intr failed\n");
981			return (ENXIO);
982		}
983	}
984	return (0);
985}
986
987/*
988 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
989 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
990 * should be okay.  This tunable should enable us to test that hypothesis
991 * when someone gets their hands on some Xeon hardware.
992 */
993static int ntb_force_remap_mode;
994TUNABLE_INT("hw.ntb.force_remap_mode", &ntb_force_remap_mode);
995SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
996    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
997    " to a smaller number of ithreads, even if the desired number are "
998    "available");
999
1000/*
1001 * In case it is NOT ok, give consumers an abort button.
1002 */
1003static int ntb_prefer_intx;
1004TUNABLE_INT("hw.ntb.prefer_intx_to_remap", &ntb_prefer_intx);
1005SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1006    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1007    "than remapping MSI-X messages over available slots (match Linux driver "
1008    "behavior)");
1009
1010/*
1011 * Remap the desired number of MSI-X messages to available ithreads in a simple
1012 * round-robin fashion.
1013 */
1014static int
1015ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1016{
1017	u_int *vectors;
1018	uint32_t i;
1019	int rc;
1020
1021	if (ntb_prefer_intx != 0)
1022		return (ENXIO);
1023
1024	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1025
1026	for (i = 0; i < desired; i++)
1027		vectors[i] = (i % avail) + 1;
1028
1029	rc = pci_remap_msix(dev, desired, vectors);
1030	free(vectors, M_NTB);
1031	return (rc);
1032}
1033
1034static int
1035ntb_init_isr(struct ntb_softc *ntb)
1036{
1037	uint32_t desired_vectors, num_vectors;
1038	int rc;
1039
1040	ntb->allocated_interrupts = 0;
1041	ntb->last_ts = ticks;
1042
1043	/*
1044	 * Mask all doorbell interrupts.  (Except link events!)
1045	 */
1046	DB_MASK_LOCK(ntb);
1047	ntb->db_mask = ntb->db_valid_mask;
1048	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1049	DB_MASK_UNLOCK(ntb);
1050
1051	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1052	    ntb->db_count);
1053	if (desired_vectors >= 1) {
1054		rc = pci_alloc_msix(ntb->device, &num_vectors);
1055
1056		if (ntb_force_remap_mode != 0 && rc == 0 &&
1057		    num_vectors == desired_vectors)
1058			num_vectors--;
1059
1060		if (rc == 0 && num_vectors < desired_vectors) {
1061			rc = ntb_remap_msix(ntb->device, desired_vectors,
1062			    num_vectors);
1063			if (rc == 0)
1064				num_vectors = desired_vectors;
1065			else
1066				pci_release_msi(ntb->device);
1067		}
1068		if (rc != 0)
1069			num_vectors = 1;
1070	} else
1071		num_vectors = 1;
1072
1073	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1074		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1075			device_printf(ntb->device,
1076			    "Errata workaround does not support MSI or INTX\n");
1077			return (EINVAL);
1078		}
1079
1080		ntb->db_vec_count = 1;
1081		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1082		rc = ntb_setup_legacy_interrupt(ntb);
1083	} else {
1084		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1085		    HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1086			device_printf(ntb->device,
1087			    "Errata workaround expects %d doorbell bits\n",
1088			    XEON_NONLINK_DB_MSIX_BITS);
1089			return (EINVAL);
1090		}
1091
1092		ntb_create_msix_vec(ntb, num_vectors);
1093		rc = ntb_setup_msix(ntb, num_vectors);
1094		if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1095			ntb_get_msix_info(ntb);
1096	}
1097	if (rc != 0) {
1098		device_printf(ntb->device,
1099		    "Error allocating interrupts: %d\n", rc);
1100		ntb_free_msix_vec(ntb);
1101	}
1102
1103	return (rc);
1104}
1105
1106static int
1107ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1108{
1109	int rc;
1110
1111	ntb->int_info[0].rid = 0;
1112	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1113	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1114	if (ntb->int_info[0].res == NULL) {
1115		device_printf(ntb->device, "bus_alloc_resource failed\n");
1116		return (ENOMEM);
1117	}
1118
1119	ntb->int_info[0].tag = NULL;
1120	ntb->allocated_interrupts = 1;
1121
1122	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1123	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1124	    ntb, &ntb->int_info[0].tag);
1125	if (rc != 0) {
1126		device_printf(ntb->device, "bus_setup_intr failed\n");
1127		return (ENXIO);
1128	}
1129
1130	return (0);
1131}
1132
1133static void
1134ntb_teardown_interrupts(struct ntb_softc *ntb)
1135{
1136	struct ntb_int_info *current_int;
1137	int i;
1138
1139	for (i = 0; i < ntb->allocated_interrupts; i++) {
1140		current_int = &ntb->int_info[i];
1141		if (current_int->tag != NULL)
1142			bus_teardown_intr(ntb->device, current_int->res,
1143			    current_int->tag);
1144
1145		if (current_int->res != NULL)
1146			bus_release_resource(ntb->device, SYS_RES_IRQ,
1147			    rman_get_rid(current_int->res), current_int->res);
1148	}
1149
1150	ntb_free_msix_vec(ntb);
1151	pci_release_msi(ntb->device);
1152}
1153
1154/*
1155 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1156 * out to make code clearer.
1157 */
1158static inline uint64_t
1159db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1160{
1161
1162	if (ntb->type == NTB_ATOM)
1163		return (ntb_reg_read(8, regoff));
1164
1165	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1166
1167	return (ntb_reg_read(2, regoff));
1168}
1169
1170static inline void
1171db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1172{
1173
1174	KASSERT((val & ~ntb->db_valid_mask) == 0,
1175	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1176	     (uintmax_t)(val & ~ntb->db_valid_mask),
1177	     (uintmax_t)ntb->db_valid_mask));
1178
1179	if (regoff == ntb->self_reg->db_mask)
1180		DB_MASK_ASSERT(ntb, MA_OWNED);
1181	db_iowrite_raw(ntb, regoff, val);
1182}
1183
1184static inline void
1185db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1186{
1187
1188	if (ntb->type == NTB_ATOM) {
1189		ntb_reg_write(8, regoff, val);
1190		return;
1191	}
1192
1193	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1194	ntb_reg_write(2, regoff, (uint16_t)val);
1195}
1196
1197void
1198ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1199{
1200
1201	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1202		return;
1203
1204	DB_MASK_LOCK(ntb);
1205	ntb->db_mask |= bits;
1206	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1207	DB_MASK_UNLOCK(ntb);
1208}
1209
1210void
1211ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1212{
1213
1214	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1215	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1216	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1217	     (uintmax_t)ntb->db_valid_mask));
1218
1219	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1220		return;
1221
1222	DB_MASK_LOCK(ntb);
1223	ntb->db_mask &= ~bits;
1224	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1225	DB_MASK_UNLOCK(ntb);
1226}
1227
1228uint64_t
1229ntb_db_read(struct ntb_softc *ntb)
1230{
1231
1232	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1233		uint64_t res;
1234		unsigned i;
1235
1236		res = 0;
1237		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1238			if (ntb->msix_vec[i].masked != 0)
1239				res |= ntb_db_vector_mask(ntb, i);
1240		}
1241		return (res);
1242	}
1243
1244	return (db_ioread(ntb, ntb->self_reg->db_bell));
1245}
1246
1247void
1248ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1249{
1250
1251	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1252	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1253	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1254	     (uintmax_t)ntb->db_valid_mask));
1255
1256	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1257		unsigned i;
1258
1259		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1260			if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
1261				DB_MASK_LOCK(ntb);
1262				if (ntb->msix_vec[i].masked != 0) {
1263					/* XXX These need a public API. */
1264#if 0
1265					pci_unmask_msix(ntb->device, i);
1266#endif
1267					ntb->msix_vec[i].masked = 0;
1268				}
1269				DB_MASK_UNLOCK(ntb);
1270			}
1271		}
1272		return;
1273	}
1274
1275	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1276}
1277
1278static inline uint64_t
1279ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1280{
1281	uint64_t shift, mask;
1282
1283	shift = ntb->db_vec_shift;
1284	mask = (1ull << shift) - 1;
1285	return (mask << (shift * db_vector));
1286}
1287
1288static void
1289ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1290{
1291	uint64_t vec_mask;
1292
1293	ntb->last_ts = ticks;
1294	vec_mask = ntb_vec_mask(ntb, vec);
1295
1296	if ((vec_mask & ntb->db_link_mask) != 0) {
1297		if (ntb_poll_link(ntb))
1298			ntb_link_event(ntb);
1299	}
1300
1301	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
1302	    (vec_mask & ntb->db_link_mask) == 0) {
1303		DB_MASK_LOCK(ntb);
1304		if (ntb->msix_vec[vec].masked == 0) {
1305			/* XXX These need a public API. */
1306#if 0
1307			pci_mask_msix(ntb->device, vec);
1308#endif
1309			ntb->msix_vec[vec].masked = 1;
1310		}
1311		DB_MASK_UNLOCK(ntb);
1312	}
1313
1314	if ((vec_mask & ntb->db_valid_mask) != 0)
1315		ntb_db_event(ntb, vec);
1316}
1317
1318static void
1319ndev_vec_isr(void *arg)
1320{
1321	struct ntb_vec *nvec = arg;
1322
1323	ntb_interrupt(nvec->ntb, nvec->num);
1324}
1325
1326static void
1327ndev_irq_isr(void *arg)
1328{
1329	/* If we couldn't set up MSI-X, we only have the one vector. */
1330	ntb_interrupt(arg, 0);
1331}
1332
1333static int
1334ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1335{
1336	uint32_t i;
1337
1338	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1339	    M_ZERO | M_WAITOK);
1340	for (i = 0; i < num_vectors; i++) {
1341		ntb->msix_vec[i].num = i;
1342		ntb->msix_vec[i].ntb = ntb;
1343	}
1344
1345	return (0);
1346}
1347
1348static void
1349ntb_free_msix_vec(struct ntb_softc *ntb)
1350{
1351
1352	if (ntb->msix_vec == NULL)
1353		return;
1354
1355	free(ntb->msix_vec, M_NTB);
1356	ntb->msix_vec = NULL;
1357}
1358
1359static void
1360ntb_get_msix_info(struct ntb_softc *ntb)
1361{
1362	struct pci_devinfo *dinfo;
1363	struct pcicfg_msix *msix;
1364	uint32_t laddr, data, i, offset;
1365
1366	dinfo = device_get_ivars(ntb->device);
1367	msix = &dinfo->cfg.msix;
1368
1369	laddr = data = 0;
1370
1371	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1372
1373	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1374		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1375
1376		laddr = bus_read_4(msix->msix_table_res, offset +
1377		    PCI_MSIX_ENTRY_LOWER_ADDR);
1378		ntb_printf(2, "local lower MSIX addr(%u): 0x%x\n", i, laddr);
1379
1380		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1381		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1382		     MSI_INTEL_ADDR_BASE));
1383		ntb->msix_data[i].nmd_ofs = laddr & ~MSI_INTEL_ADDR_BASE;
1384
1385		data = bus_read_4(msix->msix_table_res, offset +
1386		    PCI_MSIX_ENTRY_DATA);
1387		ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1388
1389		ntb->msix_data[i].nmd_data = data;
1390	}
1391}
1392
1393static struct ntb_hw_info *
1394ntb_get_device_info(uint32_t device_id)
1395{
1396	struct ntb_hw_info *ep = pci_ids;
1397
1398	while (ep->device_id) {
1399		if (ep->device_id == device_id)
1400			return (ep);
1401		++ep;
1402	}
1403	return (NULL);
1404}
1405
1406static void
1407ntb_teardown_xeon(struct ntb_softc *ntb)
1408{
1409
1410	if (ntb->reg != NULL)
1411		ntb_link_disable(ntb);
1412}
1413
1414static void
1415ntb_detect_max_mw(struct ntb_softc *ntb)
1416{
1417
1418	if (ntb->type == NTB_ATOM) {
1419		ntb->mw_count = ATOM_MW_COUNT;
1420		return;
1421	}
1422
1423	if (HAS_FEATURE(NTB_SPLIT_BAR))
1424		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1425	else
1426		ntb->mw_count = XEON_SNB_MW_COUNT;
1427}
1428
1429static int
1430ntb_detect_xeon(struct ntb_softc *ntb)
1431{
1432	uint8_t ppd, conn_type;
1433
1434	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1435	ntb->ppd = ppd;
1436
1437	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1438		ntb->dev_type = NTB_DEV_DSD;
1439	else
1440		ntb->dev_type = NTB_DEV_USD;
1441
1442	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1443		ntb->features |= NTB_SPLIT_BAR;
1444
1445	/*
1446	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1447	 * errata workaround; only do one at a time.
1448	 */
1449	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1450		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1451
1452	conn_type = ppd & XEON_PPD_CONN_TYPE;
1453	switch (conn_type) {
1454	case NTB_CONN_B2B:
1455		ntb->conn_type = conn_type;
1456		break;
1457	case NTB_CONN_RP:
1458	case NTB_CONN_TRANSPARENT:
1459	default:
1460		device_printf(ntb->device, "Unsupported connection type: %u\n",
1461		    (unsigned)conn_type);
1462		return (ENXIO);
1463	}
1464	return (0);
1465}
1466
1467static int
1468ntb_detect_atom(struct ntb_softc *ntb)
1469{
1470	uint32_t ppd, conn_type;
1471
1472	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1473	ntb->ppd = ppd;
1474
1475	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1476		ntb->dev_type = NTB_DEV_DSD;
1477	else
1478		ntb->dev_type = NTB_DEV_USD;
1479
1480	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1481	switch (conn_type) {
1482	case NTB_CONN_B2B:
1483		ntb->conn_type = conn_type;
1484		break;
1485	default:
1486		device_printf(ntb->device, "Unsupported NTB configuration\n");
1487		return (ENXIO);
1488	}
1489	return (0);
1490}
1491
1492static int
1493ntb_xeon_init_dev(struct ntb_softc *ntb)
1494{
1495	int rc;
1496
1497	ntb->spad_count		= XEON_SPAD_COUNT;
1498	ntb->db_count		= XEON_DB_COUNT;
1499	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1500	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1501	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1502
1503	if (ntb->conn_type != NTB_CONN_B2B) {
1504		device_printf(ntb->device, "Connection type %d not supported\n",
1505		    ntb->conn_type);
1506		return (ENXIO);
1507	}
1508
1509	ntb->reg = &xeon_reg;
1510	ntb->self_reg = &xeon_pri_reg;
1511	ntb->peer_reg = &xeon_b2b_reg;
1512	ntb->xlat_reg = &xeon_sec_xlat;
1513
1514	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1515		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1516		    ntb->mw_count;
1517		ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1518		    g_ntb_msix_idx, ntb->msix_mw_idx);
1519		rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1520		    VM_MEMATTR_UNCACHEABLE);
1521		KASSERT(rc == 0, ("shouldn't fail"));
1522	} else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
1523		/*
1524		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1525		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1526		 * which may hang the system.  To workaround this, use a memory
1527		 * window to access the interrupt and scratch pad registers on the
1528		 * remote system.
1529		 */
1530		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1531		    ntb->mw_count;
1532		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1533		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1534		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1535		    VM_MEMATTR_UNCACHEABLE);
1536		KASSERT(rc == 0, ("shouldn't fail"));
1537	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1538		/*
1539		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1540		 * mirrored to the remote system.  Shrink the number of bits by one,
1541		 * since bit 14 is the last bit.
1542		 *
1543		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1544		 * anyway.  Nor for non-B2B connection types.
1545		 */
1546		ntb->db_count = XEON_DB_COUNT - 1;
1547
1548	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1549
1550	if (ntb->dev_type == NTB_DEV_USD)
1551		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1552		    &xeon_b2b_usd_addr);
1553	else
1554		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1555		    &xeon_b2b_dsd_addr);
1556	if (rc != 0)
1557		return (rc);
1558
1559	/* Enable Bus Master and Memory Space on the secondary side */
1560	ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1561	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1562
1563	/*
1564	 * Mask all doorbell interrupts.
1565	 */
1566	DB_MASK_LOCK(ntb);
1567	ntb->db_mask = ntb->db_valid_mask;
1568	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1569	DB_MASK_UNLOCK(ntb);
1570
1571	rc = xeon_setup_msix_bar(ntb);
1572	if (rc != 0)
1573		return (rc);
1574
1575	rc = ntb_init_isr(ntb);
1576	return (rc);
1577}
1578
1579static int
1580ntb_atom_init_dev(struct ntb_softc *ntb)
1581{
1582	int error;
1583
1584	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1585	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1586
1587	ntb->spad_count		 = ATOM_SPAD_COUNT;
1588	ntb->db_count		 = ATOM_DB_COUNT;
1589	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1590	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1591	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1592
1593	ntb->reg = &atom_reg;
1594	ntb->self_reg = &atom_pri_reg;
1595	ntb->peer_reg = &atom_b2b_reg;
1596	ntb->xlat_reg = &atom_sec_xlat;
1597
1598	/*
1599	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1600	 * resolved.  Mask transaction layer internal parity errors.
1601	 */
1602	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1603
1604	configure_atom_secondary_side_bars(ntb);
1605
1606	/* Enable Bus Master and Memory Space on the secondary side */
1607	ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1608	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1609
1610	error = ntb_init_isr(ntb);
1611	if (error != 0)
1612		return (error);
1613
1614	/* Initiate PCI-E link training */
1615	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1616
1617	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1618
1619	return (0);
1620}
1621
1622/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1623static void
1624configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1625{
1626
1627	if (ntb->dev_type == NTB_DEV_USD) {
1628		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1629		    XEON_B2B_BAR2_ADDR64);
1630		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1631		    XEON_B2B_BAR4_ADDR64);
1632		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1633		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1634	} else {
1635		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1636		    XEON_B2B_BAR2_ADDR64);
1637		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1638		    XEON_B2B_BAR4_ADDR64);
1639		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1640		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1641	}
1642}
1643
1644
1645/*
1646 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1647 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1648 * remains for use by a higher layer.
1649 *
1650 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1651 * MW size is sufficiently large.
1652 */
1653static unsigned int ntb_b2b_mw_share;
1654TUNABLE_INT("hw.ntb.b2b_mw_share", &ntb_b2b_mw_share);
1655SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1656    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1657    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1658    "value here.");
1659
1660static void
1661xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1662    enum ntb_bar regbar)
1663{
1664	struct ntb_pci_bar_info *bar;
1665	uint8_t bar_sz;
1666
1667	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1668		return;
1669
1670	bar = &ntb->bar_info[idx];
1671	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1672	if (idx == regbar) {
1673		if (ntb->b2b_off != 0)
1674			bar_sz--;
1675		else
1676			bar_sz = 0;
1677	} else if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
1678	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx) {
1679		/* Restrict LAPIC BAR to 1MB */
1680		pci_write_config(ntb->device, bar->psz_off, 20, 1);
1681		pci_write_config(ntb->device, bar->ssz_off, 20, 1);
1682		bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1683		bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1684		(void)bar_sz;
1685		return;
1686	}
1687	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1688	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1689	(void)bar_sz;
1690}
1691
1692static void
1693xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1694    enum ntb_bar idx, enum ntb_bar regbar)
1695{
1696	uint64_t reg_val, lmt_addr;
1697	uint32_t base_reg, lmt_reg;
1698
1699	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1700	if (idx == regbar)
1701		bar_addr += ntb->b2b_off;
1702	lmt_addr = bar_addr;
1703
1704	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
1705	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx)
1706		lmt_addr += ONE_MB;
1707
1708	/*
1709	 * Set limit registers first to avoid an errata where setting the base
1710	 * registers locks the limit registers.
1711	 */
1712	if (!bar_is_64bit(ntb, idx)) {
1713		ntb_reg_write(4, lmt_reg, lmt_addr);
1714		reg_val = ntb_reg_read(4, lmt_reg);
1715		(void)reg_val;
1716
1717		ntb_reg_write(4, base_reg, bar_addr);
1718		reg_val = ntb_reg_read(4, base_reg);
1719		(void)reg_val;
1720	} else {
1721		ntb_reg_write(8, lmt_reg, lmt_addr);
1722		reg_val = ntb_reg_read(8, lmt_reg);
1723		(void)reg_val;
1724
1725		ntb_reg_write(8, base_reg, bar_addr);
1726		reg_val = ntb_reg_read(8, base_reg);
1727		(void)reg_val;
1728	}
1729}
1730
1731static void
1732xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1733{
1734	struct ntb_pci_bar_info *bar;
1735
1736	bar = &ntb->bar_info[idx];
1737	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1738		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1739		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1740	} else {
1741		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1742		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1743	}
1744	(void)base_addr;
1745}
1746
1747static int
1748xeon_setup_msix_bar(struct ntb_softc *ntb)
1749{
1750	struct ntb_pci_bar_info *lapic_bar;
1751	enum ntb_bar bar_num;
1752	int rc;
1753
1754	if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1755		return (0);
1756
1757	bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1758	lapic_bar = &ntb->bar_info[bar_num];
1759
1760	/* Restrict LAPIC BAR to 1MB */
1761	if (lapic_bar->size > ONE_MB) {
1762		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
1763		    lapic_bar->pci_resource, lapic_bar->pbase,
1764		    lapic_bar->pbase + ONE_MB - 1);
1765		if (rc == 0)
1766			lapic_bar->size = ONE_MB;
1767		else {
1768			ntb_printf(0, "Failed to shrink LAPIC BAR resource to "
1769			    "1 MB: %d\n", rc);
1770			/* Ignore error */
1771		}
1772	}
1773
1774	ntb->peer_lapic_bar = lapic_bar;
1775	return (0);
1776}
1777
1778static int
1779xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1780    const struct ntb_b2b_addr *peer_addr)
1781{
1782	struct ntb_pci_bar_info *b2b_bar;
1783	vm_size_t bar_size;
1784	uint64_t bar_addr;
1785	enum ntb_bar b2b_bar_num, i;
1786
1787	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1788		b2b_bar = NULL;
1789		b2b_bar_num = NTB_CONFIG_BAR;
1790		ntb->b2b_off = 0;
1791	} else {
1792		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1793		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1794		    ("invalid b2b mw bar"));
1795
1796		b2b_bar = &ntb->bar_info[b2b_bar_num];
1797		bar_size = b2b_bar->size;
1798
1799		if (ntb_b2b_mw_share != 0 &&
1800		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1801			ntb->b2b_off = bar_size >> 1;
1802		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1803			ntb->b2b_off = 0;
1804		} else {
1805			device_printf(ntb->device,
1806			    "B2B bar size is too small!\n");
1807			return (EIO);
1808		}
1809	}
1810
1811	/*
1812	 * Reset the secondary bar sizes to match the primary bar sizes.
1813	 * (Except, disable or halve the size of the B2B secondary bar.)
1814	 */
1815	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1816		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1817
1818	bar_addr = 0;
1819	if (b2b_bar_num == NTB_CONFIG_BAR)
1820		bar_addr = addr->bar0_addr;
1821	else if (b2b_bar_num == NTB_B2B_BAR_1)
1822		bar_addr = addr->bar2_addr64;
1823	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1824		bar_addr = addr->bar4_addr64;
1825	else if (b2b_bar_num == NTB_B2B_BAR_2)
1826		bar_addr = addr->bar4_addr32;
1827	else if (b2b_bar_num == NTB_B2B_BAR_3)
1828		bar_addr = addr->bar5_addr32;
1829	else
1830		KASSERT(false, ("invalid bar"));
1831
1832	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1833
1834	/*
1835	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1836	 * register BAR.  The B2B BAR is either disabled above or configured
1837	 * half-size.  It starts at PBAR xlat + offset.
1838	 *
1839	 * Also set up incoming BAR limits == base (zero length window).
1840	 */
1841	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1842	    b2b_bar_num);
1843	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1844		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1845		    NTB_B2B_BAR_2, b2b_bar_num);
1846		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1847		    NTB_B2B_BAR_3, b2b_bar_num);
1848	} else
1849		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1850		    NTB_B2B_BAR_2, b2b_bar_num);
1851
1852	/* Zero incoming translation addrs */
1853	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1854	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1855
1856	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
1857		size_t size, xlatoffset;
1858
1859		switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
1860		case NTB_B2B_BAR_1:
1861			size = 8;
1862			xlatoffset = XEON_SBAR2XLAT_OFFSET;
1863			break;
1864		case NTB_B2B_BAR_2:
1865			xlatoffset = XEON_SBAR4XLAT_OFFSET;
1866			if (HAS_FEATURE(NTB_SPLIT_BAR))
1867				size = 4;
1868			else
1869				size = 8;
1870			break;
1871		case NTB_B2B_BAR_3:
1872			xlatoffset = XEON_SBAR5XLAT_OFFSET;
1873			size = 4;
1874			break;
1875		default:
1876			KASSERT(false, ("Bogus msix mw idx: %u",
1877			    ntb->msix_mw_idx));
1878			return (EINVAL);
1879		}
1880
1881		/*
1882		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1883		 * workaround
1884		 */
1885		if (size == 4)
1886			ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
1887		else
1888			ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
1889	}
1890	(void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1891	(void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1892
1893	/* Zero outgoing translation limits (whole bar size windows) */
1894	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1895	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1896
1897	/* Set outgoing translation offsets */
1898	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1899	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1900		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1901		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1902	} else
1903		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1904
1905	/* Set the translation offset for B2B registers */
1906	bar_addr = 0;
1907	if (b2b_bar_num == NTB_CONFIG_BAR)
1908		bar_addr = peer_addr->bar0_addr;
1909	else if (b2b_bar_num == NTB_B2B_BAR_1)
1910		bar_addr = peer_addr->bar2_addr64;
1911	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1912		bar_addr = peer_addr->bar4_addr64;
1913	else if (b2b_bar_num == NTB_B2B_BAR_2)
1914		bar_addr = peer_addr->bar4_addr32;
1915	else if (b2b_bar_num == NTB_B2B_BAR_3)
1916		bar_addr = peer_addr->bar5_addr32;
1917	else
1918		KASSERT(false, ("invalid bar"));
1919
1920	/*
1921	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1922	 * at a time.
1923	 */
1924	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1925	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1926	return (0);
1927}
1928
1929static inline bool
1930_xeon_link_is_up(struct ntb_softc *ntb)
1931{
1932
1933	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1934		return (true);
1935	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1936}
1937
1938static inline bool
1939link_is_up(struct ntb_softc *ntb)
1940{
1941
1942	if (ntb->type == NTB_XEON)
1943		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1944		    !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
1945
1946	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1947	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1948}
1949
1950static inline bool
1951atom_link_is_err(struct ntb_softc *ntb)
1952{
1953	uint32_t status;
1954
1955	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1956
1957	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1958	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1959		return (true);
1960
1961	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1962	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1963}
1964
1965/* Atom does not have link status interrupt, poll on that platform */
1966static void
1967atom_link_hb(void *arg)
1968{
1969	struct ntb_softc *ntb = arg;
1970	sbintime_t timo, poll_ts;
1971
1972	timo = NTB_HB_TIMEOUT * hz;
1973	poll_ts = ntb->last_ts + timo;
1974
1975	/*
1976	 * Delay polling the link status if an interrupt was received, unless
1977	 * the cached link status says the link is down.
1978	 */
1979	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1980		timo = poll_ts - ticks;
1981		goto out;
1982	}
1983
1984	if (ntb_poll_link(ntb))
1985		ntb_link_event(ntb);
1986
1987	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1988		/* Link is down with error, proceed with recovery */
1989		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1990		return;
1991	}
1992
1993out:
1994	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1995}
1996
1997static void
1998atom_perform_link_restart(struct ntb_softc *ntb)
1999{
2000	uint32_t status;
2001
2002	/* Driver resets the NTB ModPhy lanes - magic! */
2003	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
2004	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
2005	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
2006	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
2007
2008	/* Driver waits 100ms to allow the NTB ModPhy to settle */
2009	pause("ModPhy", hz / 10);
2010
2011	/* Clear AER Errors, write to clear */
2012	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
2013	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
2014	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
2015
2016	/* Clear unexpected electrical idle event in LTSSM, write to clear */
2017	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
2018	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2019	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2020
2021	/* Clear DeSkew Buffer error, write to clear */
2022	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2023	status |= ATOM_DESKEWSTS_DBERR;
2024	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2025
2026	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2027	status &= ATOM_IBIST_ERR_OFLOW;
2028	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2029
2030	/* Releases the NTB state machine to allow the link to retrain */
2031	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2032	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2033	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2034}
2035
2036/*
2037 * ntb_set_ctx() - associate a driver context with an ntb device
2038 * @ntb:        NTB device context
2039 * @ctx:        Driver context
2040 * @ctx_ops:    Driver context operations
2041 *
2042 * Associate a driver context and operations with a ntb device.  The context is
2043 * provided by the client driver, and the driver may associate a different
2044 * context with each ntb device.
2045 *
2046 * Return: Zero if the context is associated, otherwise an error number.
2047 */
2048int
2049ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
2050{
2051
2052	if (ctx == NULL || ops == NULL)
2053		return (EINVAL);
2054	if (ntb->ctx_ops != NULL)
2055		return (EINVAL);
2056
2057	CTX_LOCK(ntb);
2058	if (ntb->ctx_ops != NULL) {
2059		CTX_UNLOCK(ntb);
2060		return (EINVAL);
2061	}
2062	ntb->ntb_ctx = ctx;
2063	ntb->ctx_ops = ops;
2064	CTX_UNLOCK(ntb);
2065
2066	return (0);
2067}
2068
2069/*
2070 * It is expected that this will only be used from contexts where the ctx_lock
2071 * is not needed to protect ntb_ctx lifetime.
2072 */
2073void *
2074ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
2075{
2076
2077	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
2078	if (ops != NULL)
2079		*ops = ntb->ctx_ops;
2080	return (ntb->ntb_ctx);
2081}
2082
2083/*
2084 * ntb_clear_ctx() - disassociate any driver context from an ntb device
2085 * @ntb:        NTB device context
2086 *
2087 * Clear any association that may exist between a driver context and the ntb
2088 * device.
2089 */
2090void
2091ntb_clear_ctx(struct ntb_softc *ntb)
2092{
2093
2094	CTX_LOCK(ntb);
2095	ntb->ntb_ctx = NULL;
2096	ntb->ctx_ops = NULL;
2097	CTX_UNLOCK(ntb);
2098}
2099
2100/*
2101 * ntb_link_event() - notify driver context of a change in link status
2102 * @ntb:        NTB device context
2103 *
2104 * Notify the driver context that the link status may have changed.  The driver
2105 * should call ntb_link_is_up() to get the current status.
2106 */
2107void
2108ntb_link_event(struct ntb_softc *ntb)
2109{
2110
2111	CTX_LOCK(ntb);
2112	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
2113		ntb->ctx_ops->link_event(ntb->ntb_ctx);
2114	CTX_UNLOCK(ntb);
2115}
2116
2117/*
2118 * ntb_db_event() - notify driver context of a doorbell event
2119 * @ntb:        NTB device context
2120 * @vector:     Interrupt vector number
2121 *
2122 * Notify the driver context of a doorbell event.  If hardware supports
2123 * multiple interrupt vectors for doorbells, the vector number indicates which
2124 * vector received the interrupt.  The vector number is relative to the first
2125 * vector used for doorbells, starting at zero, and must be less than
2126 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
2127 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
2128 * those bits are associated with the vector number.
2129 */
2130static void
2131ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
2132{
2133
2134	CTX_LOCK(ntb);
2135	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
2136		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
2137	CTX_UNLOCK(ntb);
2138}
2139
2140/*
2141 * ntb_link_enable() - enable the link on the secondary side of the ntb
2142 * @ntb:        NTB device context
2143 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
2144 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
2145 *
2146 * Enable the link on the secondary side of the ntb.  This can only be done
2147 * from the primary side of the ntb in primary or b2b topology.  The ntb device
2148 * should train the link to its maximum speed and width, or the requested speed
2149 * and width, whichever is smaller, if supported.
2150 *
2151 * Return: Zero on success, otherwise an error number.
2152 *
2153 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
2154 *      and width input will be ignored.
2155 */
2156int
2157ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
2158    enum ntb_width w __unused)
2159{
2160	uint32_t cntl;
2161
2162	ntb_printf(2, "%s\n", __func__);
2163
2164	if (ntb->type == NTB_ATOM) {
2165		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2166		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2167		return (0);
2168	}
2169
2170	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2171		ntb_link_event(ntb);
2172		return (0);
2173	}
2174
2175	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2176	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2177	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2178	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2179	if (HAS_FEATURE(NTB_SPLIT_BAR))
2180		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2181	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2182	return (0);
2183}
2184
2185/*
2186 * ntb_link_disable() - disable the link on the secondary side of the ntb
2187 * @ntb:        NTB device context
2188 *
2189 * Disable the link on the secondary side of the ntb.  This can only be done
2190 * from the primary side of the ntb in primary or b2b topology.  The ntb device
2191 * should disable the link.  Returning from this call must indicate that a
2192 * barrier has passed, though with no more writes may pass in either direction
2193 * across the link, except if this call returns an error number.
2194 *
2195 * Return: Zero on success, otherwise an error number.
2196 */
2197int
2198ntb_link_disable(struct ntb_softc *ntb)
2199{
2200	uint32_t cntl;
2201
2202	ntb_printf(2, "%s\n", __func__);
2203
2204	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2205		ntb_link_event(ntb);
2206		return (0);
2207	}
2208
2209	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2210	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2211	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2212	if (HAS_FEATURE(NTB_SPLIT_BAR))
2213		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2214	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2215	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2216	return (0);
2217}
2218
2219bool
2220ntb_link_enabled(struct ntb_softc *ntb)
2221{
2222	uint32_t cntl;
2223
2224	if (ntb->type == NTB_ATOM) {
2225		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2226		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2227	}
2228
2229	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2230		return (true);
2231
2232	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2233	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2234}
2235
2236static void
2237recover_atom_link(void *arg)
2238{
2239	struct ntb_softc *ntb = arg;
2240	unsigned speed, width, oldspeed, oldwidth;
2241	uint32_t status32;
2242
2243	atom_perform_link_restart(ntb);
2244
2245	/*
2246	 * There is a potential race between the 2 NTB devices recovering at
2247	 * the same time.  If the times are the same, the link will not recover
2248	 * and the driver will be stuck in this loop forever.  Add a random
2249	 * interval to the recovery time to prevent this race.
2250	 */
2251	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2252	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2253
2254	if (atom_link_is_err(ntb))
2255		goto retry;
2256
2257	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
2258	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2259		goto out;
2260
2261	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
2262	width = NTB_LNK_STA_WIDTH(status32);
2263	speed = status32 & NTB_LINK_SPEED_MASK;
2264
2265	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2266	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2267	if (oldwidth != width || oldspeed != speed)
2268		goto retry;
2269
2270out:
2271	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2272	    ntb);
2273	return;
2274
2275retry:
2276	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2277	    ntb);
2278}
2279
2280/*
2281 * Polls the HW link status register(s); returns true if something has changed.
2282 */
2283static bool
2284ntb_poll_link(struct ntb_softc *ntb)
2285{
2286	uint32_t ntb_cntl;
2287	uint16_t reg_val;
2288
2289	if (ntb->type == NTB_ATOM) {
2290		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
2291		if (ntb_cntl == ntb->ntb_ctl)
2292			return (false);
2293
2294		ntb->ntb_ctl = ntb_cntl;
2295		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
2296	} else {
2297		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2298
2299		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2300		if (reg_val == ntb->lnk_sta)
2301			return (false);
2302
2303		ntb->lnk_sta = reg_val;
2304
2305		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
2306			if (_xeon_link_is_up(ntb)) {
2307				if (!ntb->peer_msix_good) {
2308					callout_reset(&ntb->peer_msix_work, 0,
2309					    ntb_exchange_msix, ntb);
2310					return (false);
2311				}
2312			} else {
2313				ntb->peer_msix_good = false;
2314				ntb->peer_msix_done = false;
2315			}
2316		}
2317	}
2318	return (true);
2319}
2320
2321static inline enum ntb_speed
2322ntb_link_sta_speed(struct ntb_softc *ntb)
2323{
2324
2325	if (!link_is_up(ntb))
2326		return (NTB_SPEED_NONE);
2327	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2328}
2329
2330static inline enum ntb_width
2331ntb_link_sta_width(struct ntb_softc *ntb)
2332{
2333
2334	if (!link_is_up(ntb))
2335		return (NTB_WIDTH_NONE);
2336	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2337}
2338
2339SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2340    "Driver state, statistics, and HW registers");
2341
2342#define NTB_REGSZ_MASK	(3ul << 30)
2343#define NTB_REG_64	(1ul << 30)
2344#define NTB_REG_32	(2ul << 30)
2345#define NTB_REG_16	(3ul << 30)
2346#define NTB_REG_8	(0ul << 30)
2347
2348#define NTB_DB_READ	(1ul << 29)
2349#define NTB_PCI_REG	(1ul << 28)
2350#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2351
2352static void
2353ntb_sysctl_init(struct ntb_softc *ntb)
2354{
2355	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2356	struct sysctl_ctx_list *ctx;
2357	struct sysctl_oid *tree, *tmptree;
2358
2359	ctx = device_get_sysctl_ctx(ntb->device);
2360	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2361
2362	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2363	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
2364	    sysctl_handle_link_status_human, "A",
2365	    "Link status (human readable)");
2366	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2367	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
2368	    "IU", "Link status (1=active, 0=inactive)");
2369	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2370	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
2371	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
2372
2373	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2374	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
2375	tree_par = SYSCTL_CHILDREN(tree);
2376
2377	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2378	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2379	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2380	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2381	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2382	    &ntb->ppd, 0, "Raw PPD register (cached)");
2383
2384	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2385#ifdef notyet
2386		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2387		    &ntb->b2b_mw_idx, 0,
2388		    "Index of the MW used for B2B remote register access");
2389#endif
2390		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2391		    CTLFLAG_RD, &ntb->b2b_off,
2392		    "If non-zero, offset of B2B register region in shared MW");
2393	}
2394
2395	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2396	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2397	    "Features/errata of this NTB device");
2398
2399	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2400	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2401	    "NTB CTL register (cached)");
2402	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2403	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2404	    "LNK STA register (cached)");
2405
2406#ifdef notyet
2407	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2408	    &ntb->mw_count, 0, "MW count");
2409	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2410	    &ntb->spad_count, 0, "Scratchpad count");
2411	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2412	    &ntb->db_count, 0, "Doorbell count");
2413	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2414	    &ntb->db_vec_count, 0, "Doorbell vector count");
2415	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2416	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2417#endif
2418
2419	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2420	    &ntb->db_valid_mask, "Doorbell valid mask");
2421	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2422	    &ntb->db_link_mask, "Doorbell link mask");
2423	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2424	    &ntb->db_mask, "Doorbell mask (cached)");
2425
2426	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2427	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2428	regpar = SYSCTL_CHILDREN(tmptree);
2429
2430	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2431	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2432	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2433	    "NTB Control register");
2434	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2435	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2436	    0x19c, sysctl_handle_register, "IU",
2437	    "NTB Link Capabilities");
2438	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2439	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2440	    0x1a0, sysctl_handle_register, "IU",
2441	    "NTB Link Control register");
2442
2443	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2444	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2445	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2446	    sysctl_handle_register, "QU", "Doorbell mask register");
2447	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2448	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2449	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2450	    sysctl_handle_register, "QU", "Doorbell register");
2451
2452	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2453	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2454	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2455	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2456	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2457		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2458		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2459		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2460		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2461		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2462		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2463		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2464		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2465	} else {
2466		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2467		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2468		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2469		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2470	}
2471
2472	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2473	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2474	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2475	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2476	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2477		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2478		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2479		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2480		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2481		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2482		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2483		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2484		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2485	} else {
2486		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2487		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2488		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2489		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2490	}
2491
2492	if (ntb->type == NTB_ATOM)
2493		return;
2494
2495	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2496	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2497	statpar = SYSCTL_CHILDREN(tmptree);
2498	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2499	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2500	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2501	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2502
2503	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2504	    CTLFLAG_RD, NULL, "Xeon HW errors");
2505	errpar = SYSCTL_CHILDREN(tmptree);
2506
2507	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2508	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2509	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2510	    sysctl_handle_register, "CU", "PPD");
2511
2512	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2513	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2514	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2515	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2516	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2517	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2518	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2519	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2520	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2521	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2522	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2523	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2524
2525	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2526	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2527	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2528	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2529	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2530	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2531	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2532	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2533	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2534	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2535	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2536	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2537
2538	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2539	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2540	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2541	    sysctl_handle_register, "SU", "DEVSTS");
2542	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2543	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2544	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2545	    sysctl_handle_register, "SU", "LNKSTS");
2546	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2547	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2548	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2549	    sysctl_handle_register, "SU", "SLNKSTS");
2550
2551	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2552	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2553	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2554	    sysctl_handle_register, "IU", "UNCERRSTS");
2555	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2556	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2557	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2558	    sysctl_handle_register, "IU", "CORERRSTS");
2559
2560	if (ntb->conn_type != NTB_CONN_B2B)
2561		return;
2562
2563	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2564	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2565	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2566	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2567	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2568		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2569		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2570		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2571		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2572		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2573		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2574		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2575		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2576	} else {
2577		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2578		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2579		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2580		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2581	}
2582
2583	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2584	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2585	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2586	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2587	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2588		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2589		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2590		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2591		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2592		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2593		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2594		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2595		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2596	} else {
2597		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2598		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2599		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2600		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2601	}
2602
2603	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2604	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2605	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2606	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2607	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2608	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2609	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2610	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2611	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2612		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2613		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2614		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2615		    sysctl_handle_register, "IU",
2616		    "Secondary BAR4 base register");
2617		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2618		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2619		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2620		    sysctl_handle_register, "IU",
2621		    "Secondary BAR5 base register");
2622	} else {
2623		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2624		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2625		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2626		    sysctl_handle_register, "QU",
2627		    "Secondary BAR45 base register");
2628	}
2629}
2630
2631static int
2632sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2633{
2634	struct ntb_softc *ntb;
2635	struct sbuf sb;
2636	int error;
2637
2638	error = 0;
2639	ntb = arg1;
2640
2641	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2642
2643	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2644	error = sbuf_finish(&sb);
2645	sbuf_delete(&sb);
2646
2647	if (error || !req->newptr)
2648		return (error);
2649	return (EINVAL);
2650}
2651
2652static int
2653sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2654{
2655	struct ntb_softc *ntb;
2656	unsigned old, new;
2657	int error;
2658
2659	error = 0;
2660	ntb = arg1;
2661
2662	old = ntb_link_enabled(ntb);
2663
2664	error = SYSCTL_OUT(req, &old, sizeof(old));
2665	if (error != 0 || req->newptr == NULL)
2666		return (error);
2667
2668	error = SYSCTL_IN(req, &new, sizeof(new));
2669	if (error != 0)
2670		return (error);
2671
2672	ntb_printf(0, "Admin set interface state to '%sabled'\n",
2673	    (new != 0)? "en" : "dis");
2674
2675	if (new != 0)
2676		error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2677	else
2678		error = ntb_link_disable(ntb);
2679	return (error);
2680}
2681
2682static int
2683sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2684{
2685	struct ntb_softc *ntb;
2686	struct sbuf sb;
2687	enum ntb_speed speed;
2688	enum ntb_width width;
2689	int error;
2690
2691	error = 0;
2692	ntb = arg1;
2693
2694	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2695
2696	if (ntb_link_is_up(ntb, &speed, &width))
2697		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2698		    (unsigned)speed, (unsigned)width);
2699	else
2700		sbuf_printf(&sb, "down");
2701
2702	error = sbuf_finish(&sb);
2703	sbuf_delete(&sb);
2704
2705	if (error || !req->newptr)
2706		return (error);
2707	return (EINVAL);
2708}
2709
2710static int
2711sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2712{
2713	struct ntb_softc *ntb;
2714	unsigned res;
2715	int error;
2716
2717	error = 0;
2718	ntb = arg1;
2719
2720	res = ntb_link_is_up(ntb, NULL, NULL);
2721
2722	error = SYSCTL_OUT(req, &res, sizeof(res));
2723	if (error || !req->newptr)
2724		return (error);
2725	return (EINVAL);
2726}
2727
2728static int
2729sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2730{
2731	struct ntb_softc *ntb;
2732	const void *outp;
2733	uintptr_t sz;
2734	uint64_t umv;
2735	char be[sizeof(umv)];
2736	size_t outsz;
2737	uint32_t reg;
2738	bool db, pci;
2739	int error;
2740
2741	ntb = arg1;
2742	reg = arg2 & ~NTB_REGFLAGS_MASK;
2743	sz = arg2 & NTB_REGSZ_MASK;
2744	db = (arg2 & NTB_DB_READ) != 0;
2745	pci = (arg2 & NTB_PCI_REG) != 0;
2746
2747	KASSERT(!(db && pci), ("bogus"));
2748
2749	if (db) {
2750		KASSERT(sz == NTB_REG_64, ("bogus"));
2751		umv = db_ioread(ntb, reg);
2752		outsz = sizeof(uint64_t);
2753	} else {
2754		switch (sz) {
2755		case NTB_REG_64:
2756			if (pci)
2757				umv = pci_read_config(ntb->device, reg, 8);
2758			else
2759				umv = ntb_reg_read(8, reg);
2760			outsz = sizeof(uint64_t);
2761			break;
2762		case NTB_REG_32:
2763			if (pci)
2764				umv = pci_read_config(ntb->device, reg, 4);
2765			else
2766				umv = ntb_reg_read(4, reg);
2767			outsz = sizeof(uint32_t);
2768			break;
2769		case NTB_REG_16:
2770			if (pci)
2771				umv = pci_read_config(ntb->device, reg, 2);
2772			else
2773				umv = ntb_reg_read(2, reg);
2774			outsz = sizeof(uint16_t);
2775			break;
2776		case NTB_REG_8:
2777			if (pci)
2778				umv = pci_read_config(ntb->device, reg, 1);
2779			else
2780				umv = ntb_reg_read(1, reg);
2781			outsz = sizeof(uint8_t);
2782			break;
2783		default:
2784			panic("bogus");
2785			break;
2786		}
2787	}
2788
2789	/* Encode bigendian so that sysctl -x is legible. */
2790	be64enc(be, umv);
2791	outp = ((char *)be) + sizeof(umv) - outsz;
2792
2793	error = SYSCTL_OUT(req, outp, outsz);
2794	if (error || !req->newptr)
2795		return (error);
2796	return (EINVAL);
2797}
2798
2799static unsigned
2800ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2801{
2802
2803	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2804	    uidx >= ntb->b2b_mw_idx) ||
2805	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2806		uidx++;
2807	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2808	    uidx >= ntb->b2b_mw_idx) &&
2809	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2810		uidx++;
2811	return (uidx);
2812}
2813
2814static void
2815ntb_exchange_msix(void *ctx)
2816{
2817	struct ntb_softc *ntb;
2818	uint32_t val;
2819	unsigned i;
2820
2821	ntb = ctx;
2822
2823	if (ntb->peer_msix_good)
2824		goto msix_good;
2825	if (ntb->peer_msix_done)
2826		goto msix_done;
2827
2828	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2829		ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
2830		    ntb->msix_data[i].nmd_data);
2831		ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
2832		    ntb->msix_data[i].nmd_ofs);
2833	}
2834	ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2835
2836	ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
2837	if (val != NTB_MSIX_VER_GUARD)
2838		goto reschedule;
2839
2840	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2841		ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
2842		ntb->peer_msix_data[i].nmd_data = val;
2843		ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
2844		ntb->peer_msix_data[i].nmd_ofs = val;
2845	}
2846
2847	ntb->peer_msix_done = true;
2848
2849msix_done:
2850	ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2851	ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
2852	if (val != NTB_MSIX_RECEIVED)
2853		goto reschedule;
2854
2855	ntb->peer_msix_good = true;
2856	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2857	goto reschedule;
2858
2859msix_good:
2860	ntb_poll_link(ntb);
2861	ntb_link_event(ntb);
2862	return;
2863
2864reschedule:
2865	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2866	if (_xeon_link_is_up(ntb)) {
2867		callout_reset(&ntb->peer_msix_work,
2868		    hz * (ntb->peer_msix_good ? 2 : 1) / 100,
2869		    ntb_exchange_msix, ntb);
2870	} else
2871		ntb_spad_clear(ntb);
2872}
2873
2874/*
2875 * Public API to the rest of the OS
2876 */
2877
2878/**
2879 * ntb_get_max_spads() - get the total scratch regs usable
2880 * @ntb: pointer to ntb_softc instance
2881 *
2882 * This function returns the max 32bit scratchpad registers usable by the
2883 * upper layer.
2884 *
2885 * RETURNS: total number of scratch pad registers available
2886 */
2887uint8_t
2888ntb_get_max_spads(struct ntb_softc *ntb)
2889{
2890
2891	return (ntb->spad_count);
2892}
2893
2894/*
2895 * ntb_mw_count() - Get the number of memory windows available for KPI
2896 * consumers.
2897 *
2898 * (Excludes any MW wholly reserved for register access.)
2899 */
2900uint8_t
2901ntb_mw_count(struct ntb_softc *ntb)
2902{
2903	uint8_t res;
2904
2905	res = ntb->mw_count;
2906	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2907		res--;
2908	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2909		res--;
2910	return (res);
2911}
2912
2913/**
2914 * ntb_spad_write() - write to the secondary scratchpad register
2915 * @ntb: pointer to ntb_softc instance
2916 * @idx: index to the scratchpad register, 0 based
2917 * @val: the data value to put into the register
2918 *
2919 * This function allows writing of a 32bit value to the indexed scratchpad
2920 * register. The register resides on the secondary (external) side.
2921 *
2922 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2923 */
2924int
2925ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2926{
2927
2928	if (idx >= ntb->spad_count)
2929		return (EINVAL);
2930
2931	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2932
2933	return (0);
2934}
2935
2936/*
2937 * Zeros the local scratchpad.
2938 */
2939void
2940ntb_spad_clear(struct ntb_softc *ntb)
2941{
2942	unsigned i;
2943
2944	for (i = 0; i < ntb->spad_count; i++)
2945		ntb_spad_write(ntb, i, 0);
2946}
2947
2948/**
2949 * ntb_spad_read() - read from the primary scratchpad register
2950 * @ntb: pointer to ntb_softc instance
2951 * @idx: index to scratchpad register, 0 based
2952 * @val: pointer to 32bit integer for storing the register value
2953 *
2954 * This function allows reading of the 32bit scratchpad register on
2955 * the primary (internal) side.
2956 *
2957 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2958 */
2959int
2960ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2961{
2962
2963	if (idx >= ntb->spad_count)
2964		return (EINVAL);
2965
2966	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2967
2968	return (0);
2969}
2970
2971/**
2972 * ntb_peer_spad_write() - write to the secondary scratchpad register
2973 * @ntb: pointer to ntb_softc instance
2974 * @idx: index to the scratchpad register, 0 based
2975 * @val: the data value to put into the register
2976 *
2977 * This function allows writing of a 32bit value to the indexed scratchpad
2978 * register. The register resides on the secondary (external) side.
2979 *
2980 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2981 */
2982int
2983ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2984{
2985
2986	if (idx >= ntb->spad_count)
2987		return (EINVAL);
2988
2989	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2990		ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2991	else
2992		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2993
2994	return (0);
2995}
2996
2997/**
2998 * ntb_peer_spad_read() - read from the primary scratchpad register
2999 * @ntb: pointer to ntb_softc instance
3000 * @idx: index to scratchpad register, 0 based
3001 * @val: pointer to 32bit integer for storing the register value
3002 *
3003 * This function allows reading of the 32bit scratchpad register on
3004 * the primary (internal) side.
3005 *
3006 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
3007 */
3008int
3009ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
3010{
3011
3012	if (idx >= ntb->spad_count)
3013		return (EINVAL);
3014
3015	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
3016		*val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
3017	else
3018		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
3019
3020	return (0);
3021}
3022
3023/*
3024 * ntb_mw_get_range() - get the range of a memory window
3025 * @ntb:        NTB device context
3026 * @idx:        Memory window number
3027 * @base:       OUT - the base address for mapping the memory window
3028 * @size:       OUT - the size for mapping the memory window
3029 * @align:      OUT - the base alignment for translating the memory window
3030 * @align_size: OUT - the size alignment for translating the memory window
3031 *
3032 * Get the range of a memory window.  NULL may be given for any output
3033 * parameter if the value is not needed.  The base and size may be used for
3034 * mapping the memory window, to access the peer memory.  The alignment and
3035 * size may be used for translating the memory window, for the peer to access
3036 * memory on the local system.
3037 *
3038 * Return: Zero on success, otherwise an error number.
3039 */
3040int
3041ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
3042    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
3043    bus_addr_t *plimit)
3044{
3045	struct ntb_pci_bar_info *bar;
3046	bus_addr_t limit;
3047	size_t bar_b2b_off;
3048	enum ntb_bar bar_num;
3049
3050	if (mw_idx >= ntb_mw_count(ntb))
3051		return (EINVAL);
3052	mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
3053
3054	bar_num = ntb_mw_to_bar(ntb, mw_idx);
3055	bar = &ntb->bar_info[bar_num];
3056	bar_b2b_off = 0;
3057	if (mw_idx == ntb->b2b_mw_idx) {
3058		KASSERT(ntb->b2b_off != 0,
3059		    ("user shouldn't get non-shared b2b mw"));
3060		bar_b2b_off = ntb->b2b_off;
3061	}
3062
3063	if (bar_is_64bit(ntb, bar_num))
3064		limit = BUS_SPACE_MAXADDR;
3065	else
3066		limit = BUS_SPACE_MAXADDR_32BIT;
3067
3068	if (base != NULL)
3069		*base = bar->pbase + bar_b2b_off;
3070	if (vbase != NULL)
3071		*vbase = bar->vbase + bar_b2b_off;
3072	if (size != NULL)
3073		*size = bar->size - bar_b2b_off;
3074	if (align != NULL)
3075		*align = bar->size;
3076	if (align_size != NULL)
3077		*align_size = 1;
3078	if (plimit != NULL)
3079		*plimit = limit;
3080	return (0);
3081}
3082
3083/*
3084 * ntb_mw_set_trans() - set the translation of a memory window
3085 * @ntb:        NTB device context
3086 * @idx:        Memory window number
3087 * @addr:       The dma address local memory to expose to the peer
3088 * @size:       The size of the local memory to expose to the peer
3089 *
3090 * Set the translation of a memory window.  The peer may access local memory
3091 * through the window starting at the address, up to the size.  The address
3092 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
3093 * must be aligned to the size alignment specified by ntb_mw_get_range().  The
3094 * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
3095 * 32-bit BARs).
3096 *
3097 * Return: Zero on success, otherwise an error number.
3098 */
3099int
3100ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
3101    size_t size)
3102{
3103	struct ntb_pci_bar_info *bar;
3104	uint64_t base, limit, reg_val;
3105	size_t bar_size, mw_size;
3106	uint32_t base_reg, xlat_reg, limit_reg;
3107	enum ntb_bar bar_num;
3108
3109	if (idx >= ntb_mw_count(ntb))
3110		return (EINVAL);
3111	idx = ntb_user_mw_to_idx(ntb, idx);
3112
3113	bar_num = ntb_mw_to_bar(ntb, idx);
3114	bar = &ntb->bar_info[bar_num];
3115
3116	bar_size = bar->size;
3117	if (idx == ntb->b2b_mw_idx)
3118		mw_size = bar_size - ntb->b2b_off;
3119	else
3120		mw_size = bar_size;
3121
3122	/* Hardware requires that addr is aligned to bar size */
3123	if ((addr & (bar_size - 1)) != 0)
3124		return (EINVAL);
3125
3126	if (size > mw_size)
3127		return (EINVAL);
3128
3129	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
3130
3131	limit = 0;
3132	if (bar_is_64bit(ntb, bar_num)) {
3133		base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
3134
3135		if (limit_reg != 0 && size != mw_size)
3136			limit = base + size;
3137
3138		/* Set and verify translation address */
3139		ntb_reg_write(8, xlat_reg, addr);
3140		reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
3141		if (reg_val != addr) {
3142			ntb_reg_write(8, xlat_reg, 0);
3143			return (EIO);
3144		}
3145
3146		/* Set and verify the limit */
3147		ntb_reg_write(8, limit_reg, limit);
3148		reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
3149		if (reg_val != limit) {
3150			ntb_reg_write(8, limit_reg, base);
3151			ntb_reg_write(8, xlat_reg, 0);
3152			return (EIO);
3153		}
3154	} else {
3155		/* Configure 32-bit (split) BAR MW */
3156
3157		if ((addr & UINT32_MAX) != addr)
3158			return (ERANGE);
3159		if (((addr + size) & UINT32_MAX) != (addr + size))
3160			return (ERANGE);
3161
3162		base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
3163
3164		if (limit_reg != 0 && size != mw_size)
3165			limit = base + size;
3166
3167		/* Set and verify translation address */
3168		ntb_reg_write(4, xlat_reg, addr);
3169		reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
3170		if (reg_val != addr) {
3171			ntb_reg_write(4, xlat_reg, 0);
3172			return (EIO);
3173		}
3174
3175		/* Set and verify the limit */
3176		ntb_reg_write(4, limit_reg, limit);
3177		reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3178		if (reg_val != limit) {
3179			ntb_reg_write(4, limit_reg, base);
3180			ntb_reg_write(4, xlat_reg, 0);
3181			return (EIO);
3182		}
3183	}
3184	return (0);
3185}
3186
3187/*
3188 * ntb_mw_clear_trans() - clear the translation of a memory window
3189 * @ntb:	NTB device context
3190 * @idx:	Memory window number
3191 *
3192 * Clear the translation of a memory window.  The peer may no longer access
3193 * local memory through the window.
3194 *
3195 * Return: Zero on success, otherwise an error number.
3196 */
3197int
3198ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
3199{
3200
3201	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
3202}
3203
3204/*
3205 * ntb_mw_get_wc - Get the write-combine status of a memory window
3206 *
3207 * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
3208 * idx is an invalid memory window).
3209 *
3210 * Mode is a VM_MEMATTR_* type.
3211 */
3212int
3213ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
3214{
3215	struct ntb_pci_bar_info *bar;
3216
3217	if (idx >= ntb_mw_count(ntb))
3218		return (EINVAL);
3219	idx = ntb_user_mw_to_idx(ntb, idx);
3220
3221	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
3222	*mode = bar->map_mode;
3223	return (0);
3224}
3225
3226/*
3227 * ntb_mw_set_wc - Set the write-combine status of a memory window
3228 *
3229 * If 'mode' matches the current status, this does nothing and succeeds.  Mode
3230 * is a VM_MEMATTR_* type.
3231 *
3232 * Returns:  Zero on success, setting the caching attribute on the virtual
3233 * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
3234 * memory window, or if changing the caching attribute fails).
3235 */
3236int
3237ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3238{
3239
3240	if (idx >= ntb_mw_count(ntb))
3241		return (EINVAL);
3242
3243	idx = ntb_user_mw_to_idx(ntb, idx);
3244	return (ntb_mw_set_wc_internal(ntb, idx, mode));
3245}
3246
3247static int
3248ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3249{
3250	struct ntb_pci_bar_info *bar;
3251	int rc;
3252
3253	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
3254	if (bar->map_mode == mode)
3255		return (0);
3256
3257	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3258	if (rc == 0)
3259		bar->map_mode = mode;
3260
3261	return (rc);
3262}
3263
3264/**
3265 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
3266 * @ntb: pointer to ntb_softc instance
3267 * @bit: doorbell bits to ring
3268 *
3269 * This function allows triggering of a doorbell on the secondary/external
3270 * side that will initiate an interrupt on the remote host
3271 */
3272void
3273ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
3274{
3275
3276	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
3277		struct ntb_pci_bar_info *lapic;
3278		unsigned i;
3279
3280		lapic = ntb->peer_lapic_bar;
3281
3282		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3283			if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
3284				bus_space_write_4(lapic->pci_bus_tag,
3285				    lapic->pci_bus_handle,
3286				    ntb->peer_msix_data[i].nmd_ofs,
3287				    ntb->peer_msix_data[i].nmd_data);
3288		}
3289		return;
3290	}
3291
3292	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
3293		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3294		return;
3295	}
3296
3297	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3298}
3299
3300/*
3301 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
3302 * as well as the size of the register (via *sz_out).
3303 *
3304 * This function allows a caller using I/OAT DMA to chain the remote doorbell
3305 * ring to its memory window write.
3306 *
3307 * Note that writing the peer doorbell via a memory window will *not* generate
3308 * an interrupt on the remote host; that must be done seperately.
3309 */
3310bus_addr_t
3311ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
3312{
3313	struct ntb_pci_bar_info *bar;
3314	uint64_t regoff;
3315
3316	KASSERT(sz_out != NULL, ("must be non-NULL"));
3317
3318	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
3319		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3320		regoff = ntb->peer_reg->db_bell;
3321	} else {
3322		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3323		    ("invalid b2b idx"));
3324
3325		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3326		regoff = XEON_PDOORBELL_OFFSET;
3327	}
3328	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3329
3330	*sz_out = ntb->reg->db_size;
3331	/* HACK: Specific to current x86 bus implementation. */
3332	return ((uint64_t)bar->pci_bus_handle + regoff);
3333}
3334
3335/*
3336 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
3337 * @ntb:	NTB device context
3338 *
3339 * Hardware may support different number or arrangement of doorbell bits.
3340 *
3341 * Return: A mask of doorbell bits supported by the ntb.
3342 */
3343uint64_t
3344ntb_db_valid_mask(struct ntb_softc *ntb)
3345{
3346
3347	return (ntb->db_valid_mask);
3348}
3349
3350/*
3351 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
3352 * @ntb:	NTB device context
3353 * @vector:	Doorbell vector number
3354 *
3355 * Each interrupt vector may have a different number or arrangement of bits.
3356 *
3357 * Return: A mask of doorbell bits serviced by a vector.
3358 */
3359uint64_t
3360ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
3361{
3362
3363	if (vector > ntb->db_vec_count)
3364		return (0);
3365	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
3366}
3367
3368/**
3369 * ntb_link_is_up() - get the current ntb link state
3370 * @ntb:        NTB device context
3371 * @speed:      OUT - The link speed expressed as PCIe generation number
3372 * @width:      OUT - The link width expressed as the number of PCIe lanes
3373 *
3374 * RETURNS: true or false based on the hardware link state
3375 */
3376bool
3377ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
3378    enum ntb_width *width)
3379{
3380
3381	if (speed != NULL)
3382		*speed = ntb_link_sta_speed(ntb);
3383	if (width != NULL)
3384		*width = ntb_link_sta_width(ntb);
3385	return (link_is_up(ntb));
3386}
3387
3388static void
3389save_bar_parameters(struct ntb_pci_bar_info *bar)
3390{
3391
3392	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3393	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3394	bar->pbase = rman_get_start(bar->pci_resource);
3395	bar->size = rman_get_size(bar->pci_resource);
3396	bar->vbase = rman_get_virtual(bar->pci_resource);
3397}
3398
3399device_t
3400ntb_get_device(struct ntb_softc *ntb)
3401{
3402
3403	return (ntb->device);
3404}
3405
3406/* Export HW-specific errata information. */
3407bool
3408ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
3409{
3410
3411	return (HAS_FEATURE(feature));
3412}
3413