1/*-
2 * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34 *
35 * NOTE: Much of the code in this module is shared with Linux. Any patches may
36 * be picked up and redistributed in Linux with a dual GPL/BSD license.
37 */
38
39#include <sys/param.h>
40#include <sys/kernel.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/interrupt.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/module.h>
48#include <sys/mutex.h>
49#include <sys/pciio.h>
50#include <sys/taskqueue.h>
51#include <sys/tree.h>
52#include <sys/queue.h>
53#include <sys/rman.h>
54#include <sys/sbuf.h>
55#include <sys/sysctl.h>
56#include <vm/vm.h>
57#include <vm/pmap.h>
58#include <machine/bus.h>
59#include <machine/intr_machdep.h>
60#include <machine/resource.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/iommu/iommu.h>
64
65#include "ntb_hw_intel.h"
66#include "../ntb.h"
67
68#define MAX_MSIX_INTERRUPTS	\
69	MAX(MAX(XEON_DB_COUNT, ATOM_DB_COUNT), XEON_GEN3_DB_COUNT)
70
71#define NTB_HB_TIMEOUT		1 /* second */
72#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
73#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
74
75#define	NTB_MSIX_VER_GUARD	0xaabbccdd
76#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
77
78/*
79 * PCI constants could be somewhere more generic, but aren't defined/used in
80 * pci.c.
81 */
82#define	PCI_MSIX_ENTRY_SIZE		16
83#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
84#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
85#define	PCI_MSIX_ENTRY_DATA		8
86
87enum ntb_device_type {
88	NTB_XEON_GEN1,
89	NTB_XEON_GEN3,
90	NTB_XEON_GEN4,
91	NTB_ATOM
92};
93
94/* ntb_conn_type are hardware numbers, cannot change. */
95enum ntb_conn_type {
96	NTB_CONN_TRANSPARENT = 0,
97	NTB_CONN_B2B = 1,
98	NTB_CONN_RP = 2,
99};
100
101enum ntb_b2b_direction {
102	NTB_DEV_USD = 0,
103	NTB_DEV_DSD = 1,
104};
105
106enum ntb_bar {
107	NTB_CONFIG_BAR = 0,
108	NTB_B2B_BAR_1,
109	NTB_B2B_BAR_2,
110	NTB_B2B_BAR_3,
111	NTB_MAX_BARS
112};
113
114enum {
115	NTB_MSIX_GUARD = 0,
116	NTB_MSIX_DATA0,
117	NTB_MSIX_DATA1,
118	NTB_MSIX_DATA2,
119	NTB_MSIX_OFS0,
120	NTB_MSIX_OFS1,
121	NTB_MSIX_OFS2,
122	NTB_MSIX_DONE,
123	NTB_MAX_MSIX_SPAD
124};
125
126/* Device features and workarounds */
127#define HAS_FEATURE(ntb, feature)	\
128	(((ntb)->features & (feature)) != 0)
129
130struct ntb_hw_info {
131	uint32_t		device_id;
132	const char		*desc;
133	enum ntb_device_type	type;
134	uint32_t		features;
135};
136
137struct ntb_pci_bar_info {
138	bus_space_tag_t		pci_bus_tag;
139	bus_space_handle_t	pci_bus_handle;
140	int			pci_resource_id;
141	struct resource		*pci_resource;
142	vm_paddr_t		pbase;
143	caddr_t			vbase;
144	vm_size_t		size;
145	vm_memattr_t		map_mode;
146
147	/* Configuration register offsets */
148	uint32_t		psz_off;
149	uint32_t		ssz_off;
150	uint32_t		pbarxlat_off;
151};
152
153struct ntb_int_info {
154	struct resource	*res;
155	int		rid;
156	void		*tag;
157};
158
159struct ntb_vec {
160	struct ntb_softc	*ntb;
161	uint32_t		num;
162	unsigned		masked;
163};
164
165struct ntb_reg {
166	uint32_t	ntb_ctl;
167	uint32_t	lnk_sta;
168	uint8_t		db_size;
169	unsigned	mw_bar[NTB_MAX_BARS];
170};
171
172struct ntb_alt_reg {
173	uint32_t	db_bell;
174	uint32_t	db_mask;
175	uint32_t	db_clear;
176	uint32_t	spad;
177};
178
179struct ntb_xlat_reg {
180	uint32_t	bar0_base;
181	uint32_t	bar2_base;
182	uint32_t	bar4_base;
183	uint32_t	bar5_base;
184
185	uint32_t	bar2_xlat;
186	uint32_t	bar4_xlat;
187	uint32_t	bar5_xlat;
188
189	uint32_t	bar2_limit;
190	uint32_t	bar4_limit;
191	uint32_t	bar5_limit;
192};
193
194struct ntb_b2b_addr {
195	uint64_t	bar0_addr;
196	uint64_t	bar2_addr64;
197	uint64_t	bar4_addr64;
198	uint64_t	bar4_addr32;
199	uint64_t	bar5_addr32;
200};
201
202struct ntb_msix_data {
203	uint32_t	nmd_ofs;
204	uint32_t	nmd_data;
205};
206
207struct ntb_softc {
208	/* ntb.c context. Do not move! Must go first! */
209	void			*ntb_store;
210
211	device_t		device;
212	enum ntb_device_type	type;
213	uint32_t		features;
214
215	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
216	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
217	uint32_t		allocated_interrupts;
218
219	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
220	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
221	bool			peer_msix_good;
222	bool			peer_msix_done;
223	struct ntb_pci_bar_info	*peer_lapic_bar;
224	struct callout		peer_msix_work;
225
226	bus_dma_tag_t		bar0_dma_tag;
227	bus_dmamap_t		bar0_dma_map;
228
229	struct callout		heartbeat_timer;
230	struct callout		lr_timer;
231
232	struct ntb_vec		*msix_vec;
233
234	uint32_t		ppd;
235	enum ntb_conn_type	conn_type;
236	enum ntb_b2b_direction	dev_type;
237
238	/* Offset of peer bar0 in B2B BAR */
239	uint64_t			b2b_off;
240	/* Memory window used to access peer bar0 */
241#define B2B_MW_DISABLED			UINT8_MAX
242	uint8_t				b2b_mw_idx;
243	uint32_t			msix_xlat;
244	uint8_t				msix_mw_idx;
245
246	uint8_t				mw_count;
247	uint8_t				spad_count;
248	uint8_t				db_count;
249	uint8_t				db_vec_count;
250	uint8_t				db_vec_shift;
251
252	/* Protects local db_mask. */
253#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
254#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
255#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
256	struct mtx			db_mask_lock;
257
258	volatile uint32_t		ntb_ctl;
259	volatile uint32_t		lnk_sta;
260
261	uint64_t			db_valid_mask;
262	uint64_t			db_link_mask;
263	uint64_t			db_mask;
264	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
265	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
266
267	int				last_ts;	/* ticks @ last irq */
268
269	const struct ntb_reg		*reg;
270	const struct ntb_alt_reg	*self_reg;
271	const struct ntb_alt_reg	*peer_reg;
272	const struct ntb_xlat_reg	*xlat_reg;
273};
274
275#ifdef __i386__
276static __inline uint64_t
277bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
278    bus_size_t offset)
279{
280
281	return (bus_space_read_4(tag, handle, offset) |
282	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
283}
284
285static __inline void
286bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
287    bus_size_t offset, uint64_t val)
288{
289
290	bus_space_write_4(tag, handle, offset, val);
291	bus_space_write_4(tag, handle, offset + 4, val >> 32);
292}
293#endif
294
295#define intel_ntb_bar_read(SIZE, bar, offset) \
296	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
297	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
298#define intel_ntb_bar_write(SIZE, bar, offset, val) \
299	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
300	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
301#define intel_ntb_reg_read(SIZE, offset) \
302	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
303#define intel_ntb_reg_write(SIZE, offset, val) \
304	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
305#define intel_ntb_mw_read(SIZE, offset) \
306	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
307		offset)
308#define intel_ntb_mw_write(SIZE, offset, val) \
309	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
310		offset, val)
311
312static int intel_ntb_probe(device_t device);
313static int intel_ntb_attach(device_t device);
314static int intel_ntb_detach(device_t device);
315static uint64_t intel_ntb_db_valid_mask(device_t dev);
316static void intel_ntb_spad_clear(device_t dev);
317static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
318static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
319    enum ntb_width *width);
320static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
321    enum ntb_width width);
322static int intel_ntb_link_disable(device_t dev);
323static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
324static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
325
326static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
327static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
328static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
329static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
330    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
331static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
332static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
333    vm_memattr_t);
334static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
335    const char *);
336static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
337static int map_memory_window_bar(struct ntb_softc *ntb,
338    struct ntb_pci_bar_info *bar);
339static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
340static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
341static int intel_ntb_init_isr(struct ntb_softc *ntb);
342static int intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb);
343static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
344static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
345static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
346static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
347static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
348static void ndev_vec_isr(void *arg);
349static void ndev_irq_isr(void *arg);
350static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
351static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
352static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
353static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
354static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
355static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
356static void intel_ntb_exchange_msix(void *);
357static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
358static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
359static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
360static int intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb);
361static int intel_ntb_detect_xeon_gen4(struct ntb_softc *ntb);
362static int intel_ntb_detect_xeon_gen4_cfg(struct ntb_softc *ntb);
363static int intel_ntb_detect_atom(struct ntb_softc *ntb);
364static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
365static int intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb);
366static int intel_ntb_xeon_gen4_init_dev(struct ntb_softc *ntb);
367static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
368static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
369static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
370static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
371    enum ntb_bar regbar);
372static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
373    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
374static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
375    enum ntb_bar idx);
376static int xeon_setup_b2b_mw(struct ntb_softc *,
377    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
378static int xeon_gen3_setup_b2b_mw(struct ntb_softc *);
379static int xeon_gen4_setup_b2b_mw(struct ntb_softc *);
380static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr,
381    size_t size);
382static inline bool link_is_up(struct ntb_softc *ntb);
383static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
384static inline bool atom_link_is_err(struct ntb_softc *ntb);
385static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
386static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
387static void atom_link_hb(void *arg);
388static void recover_atom_link(void *arg);
389static bool intel_ntb_poll_link(struct ntb_softc *ntb);
390static void save_bar_parameters(struct ntb_pci_bar_info *bar);
391static void intel_ntb_sysctl_init(struct ntb_softc *);
392static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
393static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
394static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
395static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
396static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
397
398static unsigned g_ntb_hw_debug_level;
399SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
400    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
401#define intel_ntb_printf(lvl, ...) do {				\
402	if ((lvl) <= g_ntb_hw_debug_level) {			\
403		device_printf(ntb->device, __VA_ARGS__);	\
404	}							\
405} while (0)
406
407#define	_NTB_PAT_UC	0
408#define	_NTB_PAT_WC	1
409#define	_NTB_PAT_WT	4
410#define	_NTB_PAT_WP	5
411#define	_NTB_PAT_WB	6
412#define	_NTB_PAT_UCM	7
413static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
414SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
415    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
416    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
417    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
418    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
419    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
420    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
421    "UC-: " __XSTRING(_NTB_PAT_UCM));
422
423static inline vm_memattr_t
424intel_ntb_pat_flags(void)
425{
426
427	switch (g_ntb_mw_pat) {
428	case _NTB_PAT_WC:
429		return (VM_MEMATTR_WRITE_COMBINING);
430	case _NTB_PAT_WT:
431		return (VM_MEMATTR_WRITE_THROUGH);
432	case _NTB_PAT_WP:
433		return (VM_MEMATTR_WRITE_PROTECTED);
434	case _NTB_PAT_WB:
435		return (VM_MEMATTR_WRITE_BACK);
436	case _NTB_PAT_UCM:
437		return (VM_MEMATTR_WEAK_UNCACHEABLE);
438	case _NTB_PAT_UC:
439		/* FALLTHROUGH */
440	default:
441		return (VM_MEMATTR_UNCACHEABLE);
442	}
443}
444
445/*
446 * Well, this obviously doesn't belong here, but it doesn't seem to exist
447 * anywhere better yet.
448 */
449static inline const char *
450intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
451{
452
453	switch (pat) {
454	case VM_MEMATTR_WRITE_COMBINING:
455		return ("WRITE_COMBINING");
456	case VM_MEMATTR_WRITE_THROUGH:
457		return ("WRITE_THROUGH");
458	case VM_MEMATTR_WRITE_PROTECTED:
459		return ("WRITE_PROTECTED");
460	case VM_MEMATTR_WRITE_BACK:
461		return ("WRITE_BACK");
462	case VM_MEMATTR_WEAK_UNCACHEABLE:
463		return ("UNCACHED");
464	case VM_MEMATTR_UNCACHEABLE:
465		return ("UNCACHEABLE");
466	default:
467		return ("UNKNOWN");
468	}
469}
470
471static int g_ntb_msix_idx = 1;
472SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
473    0, "Use this memory window to access the peer MSIX message complex on "
474    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
475    "Like b2b_mw_idx, negative values index from the last available memory "
476    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
477
478static int g_ntb_mw_idx = -1;
479SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
480    0, "Use this memory window to access the peer NTB registers.  A "
481    "non-negative value starts from the first MW index; a negative value "
482    "starts from the last MW index.  The default is -1, i.e., the last "
483    "available memory window.  Both sides of the NTB MUST set the same "
484    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
485
486/* Hardware owns the low 16 bits of features. */
487#define NTB_BAR_SIZE_4K		(1 << 0)
488#define NTB_SDOORBELL_LOCKUP	(1 << 1)
489#define NTB_SB01BASE_LOCKUP	(1 << 2)
490#define NTB_B2BDOORBELL_BIT14	(1 << 3)
491#define NTB_BAR_ALIGN		(1 << 4)
492#define NTB_LTR_BAD			(1 << 5)
493/* Software/configuration owns the top 16 bits. */
494#define NTB_SPLIT_BAR		(1ull << 16)
495#define NTB_ONE_MSIX		(1ull << 17)
496
497#define NTB_FEATURES_STR \
498    "\20\21SPLIT_BAR4\06LTR_BAD\05BAR_ALIGN"  \
499	"\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
500    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
501
502static struct ntb_hw_info pci_ids[] = {
503	/* XXX: PS/SS IDs left out until they are supported. */
504	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
505		NTB_ATOM, 0 },
506
507	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
508		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
509	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
510		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
511	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B",
512		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
513		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
514	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B",
515		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
516		    NTB_SB01BASE_LOCKUP },
517	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B",
518		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
519		    NTB_SB01BASE_LOCKUP },
520
521	{ 0x201C8086, "SKL Xeon E5 V5 Non-Transparent Bridge B2B",
522		NTB_XEON_GEN3, 0 },
523
524	{ 0x347e8086, "ICX/SPR Xeon Non-Transparent Bridge B2B",
525	    NTB_XEON_GEN4, 0 },
526};
527
528static const struct ntb_reg atom_reg = {
529	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
530	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
531	.db_size = sizeof(uint64_t),
532	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
533};
534
535static const struct ntb_alt_reg atom_pri_reg = {
536	.db_bell = ATOM_PDOORBELL_OFFSET,
537	.db_mask = ATOM_PDBMSK_OFFSET,
538	.spad = ATOM_SPAD_OFFSET,
539};
540
541static const struct ntb_alt_reg atom_b2b_reg = {
542	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
543	.spad = ATOM_B2B_SPAD_OFFSET,
544};
545
546static const struct ntb_xlat_reg atom_sec_xlat = {
547#if 0
548	/* "FIXME" says the Linux driver. */
549	.bar0_base = ATOM_SBAR0BASE_OFFSET,
550	.bar2_base = ATOM_SBAR2BASE_OFFSET,
551	.bar4_base = ATOM_SBAR4BASE_OFFSET,
552
553	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
554	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
555#endif
556
557	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
558	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
559};
560
561static const struct ntb_reg xeon_reg = {
562	.ntb_ctl = XEON_NTBCNTL_OFFSET,
563	.lnk_sta = XEON_LINK_STATUS_OFFSET,
564	.db_size = sizeof(uint16_t),
565	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
566};
567
568static const struct ntb_alt_reg xeon_pri_reg = {
569	.db_bell = XEON_PDOORBELL_OFFSET,
570	.db_mask = XEON_PDBMSK_OFFSET,
571	.spad = XEON_SPAD_OFFSET,
572};
573
574static const struct ntb_alt_reg xeon_b2b_reg = {
575	.db_bell = XEON_B2B_DOORBELL_OFFSET,
576	.spad = XEON_B2B_SPAD_OFFSET,
577};
578
579static const struct ntb_xlat_reg xeon_sec_xlat = {
580	.bar0_base = XEON_SBAR0BASE_OFFSET,
581	.bar2_base = XEON_SBAR2BASE_OFFSET,
582	.bar4_base = XEON_SBAR4BASE_OFFSET,
583	.bar5_base = XEON_SBAR5BASE_OFFSET,
584
585	.bar2_limit = XEON_SBAR2LMT_OFFSET,
586	.bar4_limit = XEON_SBAR4LMT_OFFSET,
587	.bar5_limit = XEON_SBAR5LMT_OFFSET,
588
589	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
590	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
591	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
592};
593
594static struct ntb_b2b_addr xeon_b2b_usd_addr = {
595	.bar0_addr = XEON_B2B_BAR0_ADDR,
596	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
597	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
598	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
599	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
600};
601
602static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
603	.bar0_addr = XEON_B2B_BAR0_ADDR,
604	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
605	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
606	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
607	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
608};
609
610static const struct ntb_reg xeon_gen3_reg = {
611	.ntb_ctl = XEON_GEN3_REG_IMNTB_CTRL,
612	.lnk_sta = XEON_GEN3_INT_LNK_STS_OFFSET,
613	.db_size = sizeof(uint32_t),
614	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
615};
616
617static const struct ntb_alt_reg xeon_gen3_pri_reg = {
618	.db_bell = XEON_GEN3_REG_EMDOORBELL,
619	.db_mask = XEON_GEN3_REG_IMINT_DISABLE,
620	.spad = XEON_GEN3_REG_IMSPAD,
621};
622
623static const struct ntb_alt_reg xeon_gen3_b2b_reg = {
624	.db_bell = XEON_GEN3_REG_IMDOORBELL,
625	.db_mask = XEON_GEN3_REG_EMINT_DISABLE,
626	.spad = XEON_GEN3_REG_IMB2B_SSPAD,
627};
628
629static const struct ntb_xlat_reg xeon_gen3_sec_xlat = {
630	.bar0_base = XEON_GEN3_EXT_REG_BAR0BASE,
631	.bar2_base = XEON_GEN3_EXT_REG_BAR1BASE,
632	.bar4_base = XEON_GEN3_EXT_REG_BAR2BASE,
633
634	.bar2_limit = XEON_GEN3_REG_IMBAR1XLIMIT,
635	.bar4_limit = XEON_GEN3_REG_IMBAR2XLIMIT,
636
637	.bar2_xlat = XEON_GEN3_REG_IMBAR1XBASE,
638	.bar4_xlat = XEON_GEN3_REG_IMBAR2XBASE,
639};
640
641static const struct ntb_reg xeon_gen4_reg = {
642	.ntb_ctl = XEON_GEN4_REG_IMNTB_CTL,
643	.lnk_sta = XEON_GEN4_REG_LINK_STATUS, /* mmio */
644	.db_size = sizeof(uint32_t),
645	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
646};
647
648static const struct ntb_alt_reg xeon_gen4_pri_reg = {
649	.db_clear = XEON_GEN4_REG_IMINT_STATUS,
650	.db_mask = XEON_GEN4_REG_IMINT_DISABLE,
651	.spad = XEON_GEN4_REG_IMSPAD,
652};
653
654static const struct ntb_alt_reg xeon_gen4_b2b_reg = {
655	.db_bell = XEON_GEN4_REG_IMDOORBELL,
656	.spad = XEON_GEN4_REG_EMSPAD,
657};
658
659static const struct ntb_xlat_reg xeon_gen4_sec_xlat = {
660	.bar2_limit = XEON_GEN4_REG_IMBAR1XLIMIT,
661	.bar2_xlat = XEON_GEN4_REG_IMBAR1XBASE,
662
663	.bar4_limit = XEON_GEN4_REG_IMBAR1XLIMIT,
664	.bar4_xlat = XEON_GEN4_REG_IMBAR2XBASE,
665};
666
667SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
668    "B2B MW segment overrides -- MUST be the same on both sides");
669
670SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
671    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
672    "hardware, use this 64-bit address on the bus between the NTB devices for "
673    "the window at BAR2, on the upstream side of the link.  MUST be the same "
674    "address on both sides.");
675SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
676    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
677SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
678    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
679    "(split-BAR mode).");
680SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
681    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
682    "(split-BAR mode).");
683
684SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
685    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
686    "hardware, use this 64-bit address on the bus between the NTB devices for "
687    "the window at BAR2, on the downstream side of the link.  MUST be the same"
688    " address on both sides.");
689SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
690    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
691SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
692    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
693    "(split-BAR mode).");
694SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
695    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
696    "(split-BAR mode).");
697
698/*
699 * OS <-> Driver interface structures
700 */
701MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
702
703/*
704 * OS <-> Driver linkage functions
705 */
706static int
707intel_ntb_probe(device_t device)
708{
709	struct ntb_hw_info *p;
710
711	p = intel_ntb_get_device_info(pci_get_devid(device));
712	if (p == NULL)
713		return (ENXIO);
714
715	device_set_desc(device, p->desc);
716	return (0);
717}
718
719static int
720intel_ntb_attach(device_t device)
721{
722	struct ntb_softc *ntb;
723	struct ntb_hw_info *p;
724	int error;
725
726	ntb = device_get_softc(device);
727	p = intel_ntb_get_device_info(pci_get_devid(device));
728
729	ntb->device = device;
730	ntb->type = p->type;
731	ntb->features = p->features;
732	ntb->b2b_mw_idx = B2B_MW_DISABLED;
733	ntb->msix_mw_idx = B2B_MW_DISABLED;
734
735	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
736	callout_init(&ntb->heartbeat_timer, 1);
737	callout_init(&ntb->lr_timer, 1);
738	callout_init(&ntb->peer_msix_work, 1);
739	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
740
741	if (ntb->type == NTB_ATOM)
742		error = intel_ntb_detect_atom(ntb);
743	else if (ntb->type == NTB_XEON_GEN3)
744		error = intel_ntb_detect_xeon_gen3(ntb);
745	else if (ntb->type == NTB_XEON_GEN4)
746		error = intel_ntb_detect_xeon_gen4(ntb);
747	else
748		error = intel_ntb_detect_xeon(ntb);
749	if (error != 0)
750		goto out;
751
752	intel_ntb_detect_max_mw(ntb);
753
754	pci_enable_busmaster(ntb->device);
755
756	error = intel_ntb_map_pci_bars(ntb);
757	if (error != 0)
758		goto out;
759	if (ntb->type == NTB_ATOM)
760		error = intel_ntb_atom_init_dev(ntb);
761	else if (ntb->type == NTB_XEON_GEN3)
762		error = intel_ntb_xeon_gen3_init_dev(ntb);
763	else if (ntb->type == NTB_XEON_GEN4)
764		error = intel_ntb_xeon_gen4_init_dev(ntb);
765	else
766		error = intel_ntb_xeon_init_dev(ntb);
767	if (error != 0)
768		goto out;
769
770	intel_ntb_spad_clear(device);
771
772	intel_ntb_poll_link(ntb);
773
774	intel_ntb_sysctl_init(ntb);
775
776	/* Attach children to this controller */
777	error = ntb_register_device(device);
778
779out:
780	if (error != 0)
781		intel_ntb_detach(device);
782	return (error);
783}
784
785static int
786intel_ntb_detach(device_t device)
787{
788	struct ntb_softc *ntb;
789
790	ntb = device_get_softc(device);
791
792	/* Detach & delete all children */
793	ntb_unregister_device(device);
794
795	if (ntb->self_reg != NULL) {
796		DB_MASK_LOCK(ntb);
797		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
798		DB_MASK_UNLOCK(ntb);
799	}
800	callout_drain(&ntb->heartbeat_timer);
801	callout_drain(&ntb->lr_timer);
802	callout_drain(&ntb->peer_msix_work);
803	pci_disable_busmaster(ntb->device);
804	if (ntb->type == NTB_XEON_GEN1)
805		intel_ntb_teardown_xeon(ntb);
806	intel_ntb_teardown_interrupts(ntb);
807
808	mtx_destroy(&ntb->db_mask_lock);
809
810	intel_ntb_unmap_pci_bar(ntb);
811
812	return (0);
813}
814
815/*
816 * Driver internal routines
817 */
818static inline enum ntb_bar
819intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
820{
821
822	KASSERT(mw < ntb->mw_count,
823	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
824	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
825
826	return (ntb->reg->mw_bar[mw]);
827}
828
829static inline bool
830bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
831{
832	/* XXX This assertion could be stronger. */
833	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
834	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
835}
836
837static inline void
838bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
839    uint32_t *xlat, uint32_t *lmt)
840{
841	uint32_t basev, lmtv, xlatv;
842
843	switch (bar) {
844	case NTB_B2B_BAR_1:
845		basev = ntb->xlat_reg->bar2_base;
846		lmtv = ntb->xlat_reg->bar2_limit;
847		xlatv = ntb->xlat_reg->bar2_xlat;
848		break;
849	case NTB_B2B_BAR_2:
850		basev = ntb->xlat_reg->bar4_base;
851		lmtv = ntb->xlat_reg->bar4_limit;
852		xlatv = ntb->xlat_reg->bar4_xlat;
853		break;
854	case NTB_B2B_BAR_3:
855		basev = ntb->xlat_reg->bar5_base;
856		lmtv = ntb->xlat_reg->bar5_limit;
857		xlatv = ntb->xlat_reg->bar5_xlat;
858		break;
859	default:
860		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
861		    ("bad bar"));
862		basev = lmtv = xlatv = 0;
863		break;
864	}
865
866	if (base != NULL)
867		*base = basev;
868	if (xlat != NULL)
869		*xlat = xlatv;
870	if (lmt != NULL)
871		*lmt = lmtv;
872}
873
874static int
875intel_ntb_map_pci_bars(struct ntb_softc *ntb)
876{
877	struct ntb_pci_bar_info *bar;
878	int rc;
879
880	bar = &ntb->bar_info[NTB_CONFIG_BAR];
881	bar->pci_resource_id = PCIR_BAR(0);
882	rc = map_mmr_bar(ntb, bar);
883	if (rc != 0)
884		goto out;
885
886	/*
887	 * At least on Xeon v4 NTB device leaks to host some remote side
888	 * BAR0 writes supposed to update scratchpad registers.  I am not
889	 * sure why it happens, but it may be related to the fact that
890	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
891	 * Without this hack DMAR blocks those accesses as not allowed.
892	 */
893	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
894	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
895	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
896		device_printf(ntb->device, "Unable to create BAR0 tag\n");
897		return (ENOMEM);
898	}
899	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
900		device_printf(ntb->device, "Unable to create BAR0 map\n");
901		return (ENOMEM);
902	}
903	if (bus_dma_iommu_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
904	    bar->pbase, bar->size, 0)) {
905		device_printf(ntb->device, "Unable to load BAR0 map\n");
906		return (ENOMEM);
907	}
908
909	bar = &ntb->bar_info[NTB_B2B_BAR_1];
910	bar->pci_resource_id = PCIR_BAR(2);
911	rc = map_memory_window_bar(ntb, bar);
912	if (rc != 0)
913		goto out;
914	if (ntb->type == NTB_XEON_GEN3) {
915		bar->psz_off = XEON_GEN3_INT_REG_IMBAR1SZ;
916		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR1SZ;
917		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR1XBASE;
918	} else if (ntb->type == NTB_XEON_GEN4) {
919		bar->psz_off = XEON_GEN4_CFG_REG_IMBAR1SZ;
920		bar->ssz_off = XEON_GEN4_CFG_REG_EMBAR1SZ;
921		bar->pbarxlat_off = XEON_GEN4_REG_EXT_BAR1BASE;
922	} else {
923		bar->psz_off = XEON_PBAR23SZ_OFFSET;
924		bar->ssz_off = XEON_SBAR23SZ_OFFSET;
925		bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
926	}
927
928	bar = &ntb->bar_info[NTB_B2B_BAR_2];
929	bar->pci_resource_id = PCIR_BAR(4);
930	rc = map_memory_window_bar(ntb, bar);
931	if (rc != 0)
932		goto out;
933	if (ntb->type == NTB_XEON_GEN3) {
934		bar->psz_off = XEON_GEN3_INT_REG_IMBAR2SZ;
935		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR2SZ;
936		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR2XBASE;
937	} else if (ntb->type == NTB_XEON_GEN4) {
938		bar->psz_off = XEON_GEN4_CFG_REG_IMBAR2SZ;
939		bar->ssz_off = XEON_GEN4_CFG_REG_EMBAR2SZ;
940		bar->pbarxlat_off = XEON_GEN4_REG_EXT_BAR2BASE;
941	} else {
942		bar->psz_off = XEON_PBAR4SZ_OFFSET;
943		bar->ssz_off = XEON_SBAR4SZ_OFFSET;
944		bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
945	}
946
947	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
948		goto out;
949
950	if (ntb->type == NTB_XEON_GEN3 ||
951	    ntb->type == NTB_XEON_GEN4) {
952		device_printf(ntb->device, "no split bar support\n");
953		return (ENXIO);
954	}
955
956	bar = &ntb->bar_info[NTB_B2B_BAR_3];
957	bar->pci_resource_id = PCIR_BAR(5);
958	rc = map_memory_window_bar(ntb, bar);
959	bar->psz_off = XEON_PBAR5SZ_OFFSET;
960	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
961	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
962
963out:
964	if (rc != 0)
965		device_printf(ntb->device,
966		    "unable to allocate pci resource\n");
967	return (rc);
968}
969
970static void
971print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
972    const char *kind)
973{
974
975	device_printf(ntb->device,
976	    "Mapped BAR%d v:[%p-%p] p:[0x%jx-0x%jx] (0x%jx bytes) (%s)\n",
977	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
978	    (char *)bar->vbase + bar->size - 1,
979	    (uintmax_t)bar->pbase, (uintmax_t)(bar->pbase + bar->size - 1),
980	    (uintmax_t)bar->size, kind);
981}
982
983static int
984map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
985{
986
987	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
988	    &bar->pci_resource_id, RF_ACTIVE);
989	if (bar->pci_resource == NULL)
990		return (ENXIO);
991
992	save_bar_parameters(bar);
993	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
994	print_map_success(ntb, bar, "mmr");
995	return (0);
996}
997
998static int
999map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
1000{
1001	int rc;
1002	vm_memattr_t mapmode;
1003	uint8_t bar_size_bits = 0;
1004
1005	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
1006	    &bar->pci_resource_id, RF_ACTIVE);
1007
1008	if (bar->pci_resource == NULL)
1009		return (ENXIO);
1010
1011	save_bar_parameters(bar);
1012	/*
1013	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
1014	 * hardware issue. To work around this, query the size it should be
1015	 * configured to by the device and modify the resource to correspond to
1016	 * this new size. The BIOS on systems with this problem is required to
1017	 * provide enough address space to allow the driver to make this change
1018	 * safely.
1019	 *
1020	 * Ideally I could have just specified the size when I allocated the
1021	 * resource like:
1022	 *  bus_alloc_resource(ntb->device,
1023	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
1024	 *	1ul << bar_size_bits, RF_ACTIVE);
1025	 * but the PCI driver does not honor the size in this call, so we have
1026	 * to modify it after the fact.
1027	 */
1028	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
1029		if (bar->pci_resource_id == PCIR_BAR(2))
1030			bar_size_bits = pci_read_config(ntb->device,
1031			    XEON_PBAR23SZ_OFFSET, 1);
1032		else
1033			bar_size_bits = pci_read_config(ntb->device,
1034			    XEON_PBAR45SZ_OFFSET, 1);
1035
1036		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
1037		    bar->pci_resource, bar->pbase,
1038		    bar->pbase + (1ul << bar_size_bits) - 1);
1039		if (rc != 0) {
1040			device_printf(ntb->device,
1041			    "unable to resize bar\n");
1042			return (rc);
1043		}
1044
1045		save_bar_parameters(bar);
1046	}
1047
1048	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
1049	print_map_success(ntb, bar, "mw");
1050
1051	/*
1052	 * Optionally, mark MW BARs as anything other than UC to improve
1053	 * performance.
1054	 */
1055	mapmode = intel_ntb_pat_flags();
1056	if (mapmode == bar->map_mode)
1057		return (0);
1058
1059	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
1060	if (rc == 0) {
1061		bar->map_mode = mapmode;
1062		device_printf(ntb->device,
1063		    "Marked BAR%d v:[%p-%p] p:[0x%jx-0x%jx] as "
1064		    "%s.\n",
1065		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
1066		    (char *)bar->vbase + bar->size - 1,
1067		    (uintmax_t)bar->pbase,
1068		    (uintmax_t)(bar->pbase + bar->size - 1),
1069		    intel_ntb_vm_memattr_to_str(mapmode));
1070	} else
1071		device_printf(ntb->device,
1072		    "Unable to mark BAR%d v:[%p-%p] p:[0x%jx-0x%jx] as "
1073		    "%s: %d\n",
1074		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
1075		    (char *)bar->vbase + bar->size - 1,
1076		    (uintmax_t)bar->pbase,
1077		    (uintmax_t)(bar->pbase + bar->size - 1),
1078		    intel_ntb_vm_memattr_to_str(mapmode), rc);
1079		/* Proceed anyway */
1080	return (0);
1081}
1082
1083static void
1084intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
1085{
1086	struct ntb_pci_bar_info *bar;
1087	int i;
1088
1089	if (ntb->bar0_dma_map != NULL) {
1090		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
1091		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
1092	}
1093	if (ntb->bar0_dma_tag != NULL)
1094		bus_dma_tag_destroy(ntb->bar0_dma_tag);
1095	for (i = 0; i < NTB_MAX_BARS; i++) {
1096		bar = &ntb->bar_info[i];
1097		if (bar->pci_resource != NULL)
1098			bus_release_resource(ntb->device, SYS_RES_MEMORY,
1099			    bar->pci_resource_id, bar->pci_resource);
1100	}
1101}
1102
1103static int
1104intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
1105{
1106	uint32_t i;
1107	int rc;
1108
1109	for (i = 0; i < num_vectors; i++) {
1110		ntb->int_info[i].rid = i + 1;
1111		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
1112		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
1113		if (ntb->int_info[i].res == NULL) {
1114			device_printf(ntb->device,
1115			    "bus_alloc_resource failed\n");
1116			return (ENOMEM);
1117		}
1118		ntb->int_info[i].tag = NULL;
1119		ntb->allocated_interrupts++;
1120		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
1121		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
1122		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
1123		if (rc != 0) {
1124			device_printf(ntb->device, "bus_setup_intr failed\n");
1125			return (ENXIO);
1126		}
1127	}
1128	return (0);
1129}
1130
1131/*
1132 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
1133 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
1134 * should be okay.  This tunable should enable us to test that hypothesis
1135 * when someone gets their hands on some Xeon hardware.
1136 */
1137static int ntb_force_remap_mode;
1138SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1139    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1140    " to a smaller number of ithreads, even if the desired number are "
1141    "available");
1142
1143/*
1144 * In case it is NOT ok, give consumers an abort button.
1145 */
1146static int ntb_prefer_intx;
1147SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1148    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1149    "than remapping MSI-X messages over available slots (match Linux driver "
1150    "behavior)");
1151
1152/*
1153 * Remap the desired number of MSI-X messages to available ithreads in a simple
1154 * round-robin fashion.
1155 */
1156static int
1157intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1158{
1159	u_int *vectors;
1160	uint32_t i;
1161	int rc;
1162
1163	if (ntb_prefer_intx != 0)
1164		return (ENXIO);
1165
1166	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1167
1168	for (i = 0; i < desired; i++)
1169		vectors[i] = (i % avail) + 1;
1170
1171	rc = pci_remap_msix(dev, desired, vectors);
1172	free(vectors, M_NTB);
1173	return (rc);
1174}
1175
1176static int
1177intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb)
1178{
1179	uint64_t i, reg;
1180	uint32_t desired_vectors, num_vectors;
1181	int rc;
1182
1183	ntb->allocated_interrupts = 0;
1184	ntb->last_ts = ticks;
1185
1186	/* Mask all the interrupts, including hardware interrupt */
1187	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, ~0ULL);
1188
1189	/* Clear Interrupt Status */
1190	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS);
1191	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS, reg);
1192
1193	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1194	    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
1195
1196	rc = pci_alloc_msix(ntb->device, &num_vectors);
1197	if (rc != 0) {
1198		device_printf(ntb->device,
1199		    "Interrupt allocation failed %d\n", rc);
1200		return (rc);
1201	}
1202	if (desired_vectors != num_vectors) {
1203		device_printf(ntb->device, "Couldn't get %d vectors\n",
1204		    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
1205		return (ENXIO);
1206	}
1207	/* 32 db + 1 hardware */
1208	if (num_vectors == XEON_GEN3_DB_MSIX_VECTOR_COUNT) {
1209		/* Program INTVECXX source register */
1210		for (i = 0; i < XEON_GEN3_DB_MSIX_VECTOR_COUNT; i++) {
1211			/* interrupt source i for vector i */
1212			intel_ntb_reg_write(1, XEON_GEN3_REG_IMINTVEC00 + i, i);
1213			if (i == (XEON_GEN3_DB_MSIX_VECTOR_COUNT - 1)) {
1214				intel_ntb_reg_write(1,
1215				    XEON_GEN3_REG_IMINTVEC00 + i,
1216				    XEON_GEN3_LINK_VECTOR_INDEX);
1217			}
1218		}
1219
1220		intel_ntb_create_msix_vec(ntb, num_vectors);
1221		rc = intel_ntb_setup_msix(ntb, num_vectors);
1222
1223		/* enable all interrupts */
1224		intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, 0ULL);
1225	} else {
1226		device_printf(ntb->device, "need to remap interrupts, giving up.\n");
1227		return (ENXIO);
1228	}
1229
1230	return (rc);
1231}
1232
1233static int
1234intel_ntb_xeon_gen4_init_isr(struct ntb_softc *ntb)
1235{
1236	uint64_t i, reg;
1237	uint32_t desired_vectors, num_vectors;
1238	int rc;
1239
1240	ntb->allocated_interrupts = 0;
1241	ntb->last_ts = ticks;
1242
1243	/* Mask all the interrupts, including hardware interrupt */
1244	intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_DISABLE, ~0ULL);
1245
1246	/* Clear Interrupt Status */
1247	reg = intel_ntb_reg_read(8, XEON_GEN4_REG_IMINT_STATUS);
1248	intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_STATUS, reg);
1249
1250	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1251	    XEON_GEN4_DB_MSIX_VECTOR_COUNT);
1252
1253	rc = pci_alloc_msix(ntb->device, &num_vectors);
1254	if (rc != 0) {
1255		device_printf(ntb->device,
1256		    "Interrupt allocation failed %d\n", rc);
1257		return (rc);
1258	}
1259	if (desired_vectors != num_vectors) {
1260		device_printf(ntb->device, "Couldn't get %d vectors\n",
1261		    XEON_GEN4_DB_MSIX_VECTOR_COUNT);
1262		return (ENXIO);
1263	}
1264	if (num_vectors != XEON_GEN4_DB_MSIX_VECTOR_COUNT) {
1265		device_printf(ntb->device,
1266		    "Need to remap interrupts, giving up\n");
1267		return (ENXIO);
1268	}
1269
1270	/*
1271	 * The MSIX vectors and the interrupt status bits are not lined up
1272	 * on Gen3 (Skylake) and Gen4. By default the link status bit is bit
1273	 * 32, however it is by default MSIX vector0. We need to fixup to
1274	 * line them up. The vectors at reset is 1-32,0. We need to reprogram
1275	 * to 0-32.
1276	 */
1277	for (i = 0; i < XEON_GEN4_DB_MSIX_VECTOR_COUNT; i++)
1278		intel_ntb_reg_write(1, XEON_GEN4_REG_INTVEC + i, i);
1279
1280	intel_ntb_create_msix_vec(ntb, num_vectors);
1281	rc = intel_ntb_setup_msix(ntb, num_vectors);
1282
1283	/* enable all interrupts */
1284	intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_DISABLE, 0ULL);
1285
1286	return (rc);
1287}
1288
1289static int
1290intel_ntb_init_isr(struct ntb_softc *ntb)
1291{
1292	uint32_t desired_vectors, num_vectors;
1293	int rc;
1294
1295	ntb->allocated_interrupts = 0;
1296	ntb->last_ts = ticks;
1297
1298	/*
1299	 * Mask all doorbell interrupts.  (Except link events!)
1300	 */
1301	DB_MASK_LOCK(ntb);
1302	ntb->db_mask = ntb->db_valid_mask;
1303	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1304	DB_MASK_UNLOCK(ntb);
1305
1306	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1307	    ntb->db_count);
1308	if (desired_vectors >= 1) {
1309		rc = pci_alloc_msix(ntb->device, &num_vectors);
1310
1311		if (ntb_force_remap_mode != 0 && rc == 0 &&
1312		    num_vectors == desired_vectors)
1313			num_vectors--;
1314
1315		if (rc == 0 && num_vectors < desired_vectors) {
1316			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1317			    num_vectors);
1318			if (rc == 0)
1319				num_vectors = desired_vectors;
1320			else
1321				pci_release_msi(ntb->device);
1322		}
1323		if (rc != 0)
1324			num_vectors = 1;
1325	} else
1326		num_vectors = 1;
1327
1328	if (ntb->type == NTB_XEON_GEN1 && num_vectors < ntb->db_vec_count) {
1329		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1330			device_printf(ntb->device,
1331			    "Errata workaround does not support MSI or INTX\n");
1332			return (EINVAL);
1333		}
1334
1335		ntb->db_vec_count = 1;
1336		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1337		rc = intel_ntb_setup_legacy_interrupt(ntb);
1338	} else {
1339		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1340		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1341			device_printf(ntb->device,
1342			    "Errata workaround expects %d doorbell bits\n",
1343			    XEON_NONLINK_DB_MSIX_BITS);
1344			return (EINVAL);
1345		}
1346
1347		intel_ntb_create_msix_vec(ntb, num_vectors);
1348		rc = intel_ntb_setup_msix(ntb, num_vectors);
1349	}
1350	if (rc != 0) {
1351		device_printf(ntb->device,
1352		    "Error allocating interrupts: %d\n", rc);
1353		intel_ntb_free_msix_vec(ntb);
1354	}
1355
1356	return (rc);
1357}
1358
1359static int
1360intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1361{
1362	int rc;
1363
1364	ntb->int_info[0].rid = 0;
1365	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1366	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1367	if (ntb->int_info[0].res == NULL) {
1368		device_printf(ntb->device, "bus_alloc_resource failed\n");
1369		return (ENOMEM);
1370	}
1371
1372	ntb->int_info[0].tag = NULL;
1373	ntb->allocated_interrupts = 1;
1374
1375	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1376	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1377	    ntb, &ntb->int_info[0].tag);
1378	if (rc != 0) {
1379		device_printf(ntb->device, "bus_setup_intr failed\n");
1380		return (ENXIO);
1381	}
1382
1383	return (0);
1384}
1385
1386static void
1387intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1388{
1389	struct ntb_int_info *current_int;
1390	int i;
1391
1392	for (i = 0; i < ntb->allocated_interrupts; i++) {
1393		current_int = &ntb->int_info[i];
1394		if (current_int->tag != NULL)
1395			bus_teardown_intr(ntb->device, current_int->res,
1396			    current_int->tag);
1397
1398		if (current_int->res != NULL)
1399			bus_release_resource(ntb->device, SYS_RES_IRQ,
1400			    rman_get_rid(current_int->res), current_int->res);
1401	}
1402
1403	intel_ntb_free_msix_vec(ntb);
1404	pci_release_msi(ntb->device);
1405}
1406
1407static inline uint64_t
1408db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1409{
1410
1411	switch (ntb->type) {
1412	case NTB_ATOM:
1413	case NTB_XEON_GEN3:
1414	case NTB_XEON_GEN4:
1415		return (intel_ntb_reg_read(8, regoff));
1416	case NTB_XEON_GEN1:
1417		return (intel_ntb_reg_read(2, regoff));
1418	}
1419	__assert_unreachable();
1420}
1421
1422static inline void
1423db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1424{
1425
1426	KASSERT((val & ~ntb->db_valid_mask) == 0,
1427	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1428	     (uintmax_t)(val & ~ntb->db_valid_mask),
1429	     (uintmax_t)ntb->db_valid_mask));
1430
1431	if (regoff == ntb->self_reg->db_mask)
1432		DB_MASK_ASSERT(ntb, MA_OWNED);
1433	db_iowrite_raw(ntb, regoff, val);
1434}
1435
1436static inline void
1437db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1438{
1439
1440	switch (ntb->type) {
1441	case NTB_ATOM:
1442	case NTB_XEON_GEN3:
1443	case NTB_XEON_GEN4:
1444		intel_ntb_reg_write(8, regoff, val);
1445		break;
1446	case NTB_XEON_GEN1:
1447		intel_ntb_reg_write(2, regoff, (uint16_t)val);
1448		break;
1449	}
1450}
1451
1452static void
1453intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1454{
1455	struct ntb_softc *ntb = device_get_softc(dev);
1456
1457	DB_MASK_LOCK(ntb);
1458	ntb->db_mask |= bits;
1459	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1460		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1461	DB_MASK_UNLOCK(ntb);
1462}
1463
1464static void
1465intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1466{
1467	struct ntb_softc *ntb = device_get_softc(dev);
1468	uint64_t ibits;
1469	int i;
1470
1471	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1472	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1473	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1474	     (uintmax_t)ntb->db_valid_mask));
1475
1476	DB_MASK_LOCK(ntb);
1477	ibits = ntb->fake_db & ntb->db_mask & bits;
1478	ntb->db_mask &= ~bits;
1479	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1480		/* Simulate fake interrupts if unmasked DB bits are set. */
1481		ntb->force_db |= ibits;
1482		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1483			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1484				swi_sched(ntb->int_info[i].tag, 0);
1485		}
1486	} else {
1487		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1488	}
1489	DB_MASK_UNLOCK(ntb);
1490}
1491
1492static uint64_t
1493intel_ntb_db_read(device_t dev)
1494{
1495	struct ntb_softc *ntb = device_get_softc(dev);
1496
1497	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1498		return (ntb->fake_db);
1499	if (ntb->type == NTB_XEON_GEN3)
1500		return (intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
1501	else
1502		return (db_ioread(ntb, ntb->self_reg->db_bell));
1503}
1504
1505static void
1506intel_ntb_db_clear(device_t dev, uint64_t bits)
1507{
1508	struct ntb_softc *ntb = device_get_softc(dev);
1509
1510	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1511	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1512	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1513	     (uintmax_t)ntb->db_valid_mask));
1514
1515	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1516		DB_MASK_LOCK(ntb);
1517		ntb->fake_db &= ~bits;
1518		DB_MASK_UNLOCK(ntb);
1519		return;
1520	}
1521
1522	if (ntb->type == NTB_XEON_GEN3)
1523		intel_ntb_reg_write(4, XEON_GEN3_REG_IMINT_STATUS,
1524		    (uint32_t)bits);
1525	else
1526		db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1527}
1528
1529static inline uint64_t
1530intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1531{
1532	uint64_t shift, mask;
1533
1534	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1535		/*
1536		 * Remap vectors in custom way to make at least first
1537		 * three doorbells to not generate stray events.
1538		 * This breaks Linux compatibility (if one existed)
1539		 * when more then one DB is used (not by if_ntb).
1540		 */
1541		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1542			return (1 << db_vector);
1543		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1544			return (0x7ffc);
1545	}
1546
1547	shift = ntb->db_vec_shift;
1548	mask = (1ull << shift) - 1;
1549	return (mask << (shift * db_vector));
1550}
1551
1552static void
1553intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1554{
1555	uint64_t vec_mask;
1556
1557	ntb->last_ts = ticks;
1558	vec_mask = intel_ntb_vec_mask(ntb, vec);
1559
1560	if ((ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4) &&
1561	    vec == XEON_GEN3_LINK_VECTOR_INDEX)
1562		vec_mask |= ntb->db_link_mask;
1563	if ((vec_mask & ntb->db_link_mask) != 0) {
1564		if (intel_ntb_poll_link(ntb))
1565			ntb_link_event(ntb->device);
1566		if (ntb->type == NTB_XEON_GEN3)
1567			intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS,
1568			    intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
1569		if (ntb->type == NTB_XEON_GEN4)
1570			intel_ntb_reg_write(8, XEON_GEN4_REG_IMINT_STATUS,
1571			    intel_ntb_reg_read(8, XEON_GEN4_REG_IMINT_STATUS));
1572	}
1573
1574	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1575	    (vec_mask & ntb->db_link_mask) == 0) {
1576		DB_MASK_LOCK(ntb);
1577
1578		/*
1579		 * Do not report same DB events again if not cleared yet,
1580		 * unless the mask was just cleared for them and this
1581		 * interrupt handler call can be the consequence of it.
1582		 */
1583		vec_mask &= ~ntb->fake_db | ntb->force_db;
1584		ntb->force_db &= ~vec_mask;
1585
1586		/* Update our internal doorbell register. */
1587		ntb->fake_db |= vec_mask;
1588
1589		/* Do not report masked DB events. */
1590		vec_mask &= ~ntb->db_mask;
1591
1592		DB_MASK_UNLOCK(ntb);
1593	}
1594
1595	if ((vec_mask & ntb->db_valid_mask) != 0)
1596		ntb_db_event(ntb->device, vec);
1597}
1598
1599static void
1600ndev_vec_isr(void *arg)
1601{
1602	struct ntb_vec *nvec = arg;
1603
1604	intel_ntb_interrupt(nvec->ntb, nvec->num);
1605}
1606
1607static void
1608ndev_irq_isr(void *arg)
1609{
1610	/* If we couldn't set up MSI-X, we only have the one vector. */
1611	intel_ntb_interrupt(arg, 0);
1612}
1613
1614static int
1615intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1616{
1617	uint32_t i;
1618
1619	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1620	    M_ZERO | M_WAITOK);
1621	for (i = 0; i < num_vectors; i++) {
1622		ntb->msix_vec[i].num = i;
1623		ntb->msix_vec[i].ntb = ntb;
1624	}
1625
1626	return (0);
1627}
1628
1629static void
1630intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1631{
1632
1633	if (ntb->msix_vec == NULL)
1634		return;
1635
1636	free(ntb->msix_vec, M_NTB);
1637	ntb->msix_vec = NULL;
1638}
1639
1640static void
1641intel_ntb_get_msix_info(struct ntb_softc *ntb)
1642{
1643	struct pci_devinfo *dinfo;
1644	struct pcicfg_msix *msix;
1645	uint32_t laddr, data, i, offset;
1646
1647	dinfo = device_get_ivars(ntb->device);
1648	msix = &dinfo->cfg.msix;
1649
1650	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1651
1652	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1653		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1654
1655		laddr = bus_read_4(msix->msix_table_res, offset +
1656		    PCI_MSIX_ENTRY_LOWER_ADDR);
1657		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1658
1659		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1660		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1661		     MSI_INTEL_ADDR_BASE));
1662		ntb->msix_data[i].nmd_ofs = laddr;
1663
1664		data = bus_read_4(msix->msix_table_res, offset +
1665		    PCI_MSIX_ENTRY_DATA);
1666		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1667
1668		ntb->msix_data[i].nmd_data = data;
1669	}
1670}
1671
1672static struct ntb_hw_info *
1673intel_ntb_get_device_info(uint32_t device_id)
1674{
1675	struct ntb_hw_info *ep;
1676
1677	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
1678		if (ep->device_id == device_id)
1679			return (ep);
1680	}
1681	return (NULL);
1682}
1683
1684static void
1685intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1686{
1687
1688	if (ntb->reg != NULL)
1689		intel_ntb_link_disable(ntb->device);
1690}
1691
1692static void
1693intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1694{
1695
1696	switch (ntb->type) {
1697	case NTB_ATOM:
1698		ntb->mw_count = ATOM_MW_COUNT;
1699		break;
1700	case NTB_XEON_GEN1:
1701		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1702			ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1703		else
1704			ntb->mw_count = XEON_SNB_MW_COUNT;
1705		break;
1706	case NTB_XEON_GEN3:
1707	case NTB_XEON_GEN4:
1708		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1709			ntb->mw_count = XEON_GEN3_SPLIT_MW_COUNT;
1710		else
1711			ntb->mw_count = XEON_GEN3_MW_COUNT;
1712		break;
1713	}
1714}
1715
1716static int
1717intel_ntb_detect_xeon(struct ntb_softc *ntb)
1718{
1719	uint8_t ppd, conn_type;
1720
1721	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1722	ntb->ppd = ppd;
1723
1724	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1725		ntb->dev_type = NTB_DEV_DSD;
1726	else
1727		ntb->dev_type = NTB_DEV_USD;
1728
1729	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1730		ntb->features |= NTB_SPLIT_BAR;
1731
1732	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1733	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1734		device_printf(ntb->device,
1735		    "Can not apply SB01BASE_LOCKUP workaround "
1736		    "with split BARs disabled!\n");
1737		device_printf(ntb->device,
1738		    "Expect system hangs under heavy NTB traffic!\n");
1739		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1740	}
1741
1742	/*
1743	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1744	 * errata workaround; only do one at a time.
1745	 */
1746	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1747		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1748
1749	conn_type = ppd & XEON_PPD_CONN_TYPE;
1750	switch (conn_type) {
1751	case NTB_CONN_B2B:
1752		ntb->conn_type = conn_type;
1753		break;
1754	case NTB_CONN_RP:
1755	case NTB_CONN_TRANSPARENT:
1756	default:
1757		device_printf(ntb->device, "Unsupported connection type: %u\n",
1758		    (unsigned)conn_type);
1759		return (ENXIO);
1760	}
1761	return (0);
1762}
1763
1764static int
1765intel_ntb_detect_atom(struct ntb_softc *ntb)
1766{
1767	uint32_t ppd, conn_type;
1768
1769	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1770	ntb->ppd = ppd;
1771
1772	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1773		ntb->dev_type = NTB_DEV_DSD;
1774	else
1775		ntb->dev_type = NTB_DEV_USD;
1776
1777	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1778	switch (conn_type) {
1779	case NTB_CONN_B2B:
1780		ntb->conn_type = conn_type;
1781		break;
1782	default:
1783		device_printf(ntb->device, "Unsupported NTB configuration\n");
1784		return (ENXIO);
1785	}
1786	return (0);
1787}
1788
1789static int
1790intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb)
1791{
1792	uint8_t ppd, conn_type;
1793
1794	ppd = pci_read_config(ntb->device, XEON_GEN3_INT_REG_PPD, 1);
1795	ntb->ppd = ppd;
1796
1797	/* check port definition */
1798	conn_type = XEON_GEN3_REG_PPD_PORT_DEF_F(ppd);
1799	switch (conn_type) {
1800	case NTB_CONN_B2B:
1801		ntb->conn_type = conn_type;
1802		break;
1803	default:
1804		device_printf(ntb->device, "Unsupported connection type: %u\n",
1805		    conn_type);
1806		return (ENXIO);
1807	}
1808
1809	/* check cross link configuration status */
1810	if (XEON_GEN3_REG_PPD_CONF_STS_F(ppd)) {
1811		/* NTB Port is configured as DSD/USP */
1812		ntb->dev_type = NTB_DEV_DSD;
1813	} else {
1814		/* NTB Port is configured as USD/DSP */
1815		ntb->dev_type = NTB_DEV_USD;
1816	}
1817
1818	if (XEON_GEN3_REG_PPD_ONE_MSIX_F(ppd)) {
1819		/*
1820		 * This bit when set, causes only a single MSI-X message to be
1821		 * generated if MSI-X is enabled.
1822		 */
1823		ntb->features |= NTB_ONE_MSIX;
1824	}
1825
1826	if (XEON_GEN3_REG_PPD_BAR45_SPL_F(ppd)) {
1827		/* BARs 4 and 5 are presented as two 32b non-prefetchable BARs */
1828		ntb->features |= NTB_SPLIT_BAR;
1829	}
1830
1831	device_printf(ntb->device, "conn type 0x%02x, dev type 0x%02x,"
1832	    "features 0x%02x\n", ntb->conn_type, ntb->dev_type, ntb->features);
1833
1834	return (0);
1835}
1836
1837static int
1838intel_ntb_is_ICX(struct ntb_softc *ntb)
1839{
1840	uint8_t revision;
1841
1842	revision = pci_get_revid(ntb->device);
1843	if (ntb->type == NTB_XEON_GEN4 &&
1844	    revision >= PCI_DEV_REV_ICX_MIN &&
1845	    revision <= PCI_DEV_REV_ICX_MAX)
1846		return (1);
1847
1848	return (0);
1849}
1850
1851static int
1852intel_ntb_is_SPR(struct ntb_softc *ntb)
1853{
1854	uint8_t revision;
1855
1856	revision = pci_get_revid(ntb->device);
1857	if (ntb->type == NTB_XEON_GEN4 &&
1858	    revision > PCI_DEV_REV_ICX_MAX)
1859		return (1);
1860
1861	return (0);
1862}
1863
1864static int
1865intel_ntb_detect_xeon_gen4(struct ntb_softc *ntb)
1866{
1867	if (intel_ntb_is_ICX(ntb)) {
1868		ntb->features |= NTB_BAR_ALIGN;
1869		ntb->features |= NTB_LTR_BAD;
1870	}
1871	return (0);
1872}
1873
1874static int
1875intel_ntb_detect_xeon_gen4_cfg(struct ntb_softc *ntb)
1876{
1877	uint32_t ppd1;
1878
1879	ppd1 = intel_ntb_reg_read(4, XEON_GEN4_REG_PPD1);
1880	ntb->ppd = ppd1;
1881	if (intel_ntb_is_ICX(ntb)) {
1882		if ((ppd1 & GEN4_PPD_TOPO_MASK) == GEN4_PPD_TOPO_B2B_USD) {
1883			/* NTB Port is configured as USD/DSP */
1884			ntb->conn_type = NTB_CONN_B2B;
1885			ntb->dev_type = NTB_DEV_USD;
1886		} else if ((ppd1 & GEN4_PPD_TOPO_MASK) == GEN4_PPD_TOPO_B2B_DSD) {
1887			/* NTB Port is configured as DSD/USP */
1888			ntb->conn_type = NTB_CONN_B2B;
1889			ntb->dev_type = NTB_DEV_DSD;
1890		} else {
1891			device_printf(ntb->device, "Unsupported connection type: %u\n",
1892			    (ppd1 & GEN4_PPD_CONN_MASK));
1893			return (ENXIO);
1894		}
1895	} else if (intel_ntb_is_SPR(ntb)) {
1896		if ((ppd1 & SPR_PPD_TOPO_MASK) == SPR_PPD_TOPO_B2B_USD) {
1897			/* NTB Port is configured as USD/DSP */
1898			ntb->conn_type = NTB_CONN_B2B;
1899			ntb->dev_type = NTB_DEV_USD;
1900		} else if ((ppd1 & SPR_PPD_TOPO_MASK) == SPR_PPD_TOPO_B2B_DSD) {
1901			/* NTB Port is configured as DSD/USP */
1902			ntb->conn_type = NTB_CONN_B2B;
1903			ntb->dev_type = NTB_DEV_DSD;
1904		} else {
1905			device_printf(ntb->device, "Unsupported connection type: %u\n",
1906			    (ppd1 & SPR_PPD_CONN_MASK));
1907			return (ENXIO);
1908		}
1909	}
1910
1911	device_printf(ntb->device, "conn type 0x%02x, dev type 0x%02x,"
1912	    "features 0x%02x\n", ntb->conn_type, ntb->dev_type, ntb->features);
1913
1914	return (0);
1915}
1916
1917static int
1918intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1919{
1920	int rc;
1921
1922	ntb->spad_count		= XEON_SPAD_COUNT;
1923	ntb->db_count		= XEON_DB_COUNT;
1924	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1925	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1926	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1927
1928	if (ntb->conn_type != NTB_CONN_B2B) {
1929		device_printf(ntb->device, "Connection type %d not supported\n",
1930		    ntb->conn_type);
1931		return (ENXIO);
1932	}
1933
1934	ntb->reg = &xeon_reg;
1935	ntb->self_reg = &xeon_pri_reg;
1936	ntb->peer_reg = &xeon_b2b_reg;
1937	ntb->xlat_reg = &xeon_sec_xlat;
1938
1939	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1940		ntb->force_db = ntb->fake_db = 0;
1941		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1942		    ntb->mw_count;
1943		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1944		    g_ntb_msix_idx, ntb->msix_mw_idx);
1945		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1946		    VM_MEMATTR_UNCACHEABLE);
1947		KASSERT(rc == 0, ("shouldn't fail"));
1948	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1949		/*
1950		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1951		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1952		 * which may hang the system.  To workaround this, use a memory
1953		 * window to access the interrupt and scratch pad registers on the
1954		 * remote system.
1955		 */
1956		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1957		    ntb->mw_count;
1958		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1959		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1960		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1961		    VM_MEMATTR_UNCACHEABLE);
1962		KASSERT(rc == 0, ("shouldn't fail"));
1963	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1964		/*
1965		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1966		 * mirrored to the remote system.  Shrink the number of bits by one,
1967		 * since bit 14 is the last bit.
1968		 *
1969		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1970		 * anyway.  Nor for non-B2B connection types.
1971		 */
1972		ntb->db_count = XEON_DB_COUNT - 1;
1973
1974	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1975
1976	if (ntb->dev_type == NTB_DEV_USD)
1977		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1978		    &xeon_b2b_usd_addr);
1979	else
1980		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1981		    &xeon_b2b_dsd_addr);
1982	if (rc != 0)
1983		return (rc);
1984
1985	/* Enable Bus Master and Memory Space on the secondary side */
1986	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1987	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1988
1989	/*
1990	 * Mask all doorbell interrupts.
1991	 */
1992	DB_MASK_LOCK(ntb);
1993	ntb->db_mask = ntb->db_valid_mask;
1994	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1995	DB_MASK_UNLOCK(ntb);
1996
1997	rc = intel_ntb_init_isr(ntb);
1998	return (rc);
1999}
2000
2001static int
2002intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb)
2003{
2004	int rc;
2005
2006	ntb->spad_count = XEON_GEN3_SPAD_COUNT;
2007	ntb->db_count = XEON_GEN3_DB_COUNT;
2008	ntb->db_link_mask = XEON_GEN3_DB_LINK_BIT;
2009	ntb->db_vec_count = XEON_GEN3_DB_MSIX_VECTOR_COUNT;
2010	ntb->db_vec_shift = XEON_GEN3_DB_MSIX_VECTOR_SHIFT;
2011
2012	if (ntb->conn_type != NTB_CONN_B2B) {
2013		device_printf(ntb->device, "Connection type %d not supported\n",
2014		    ntb->conn_type);
2015		return (ENXIO);
2016	}
2017
2018	ntb->reg = &xeon_gen3_reg;
2019	ntb->self_reg = &xeon_gen3_pri_reg;
2020	ntb->peer_reg = &xeon_gen3_b2b_reg;
2021	ntb->xlat_reg = &xeon_gen3_sec_xlat;
2022
2023	ntb->db_valid_mask = (1ULL << ntb->db_count) - 1;
2024
2025	xeon_gen3_setup_b2b_mw(ntb);
2026
2027	/* Enable Bus Master and Memory Space on the External Side */
2028	intel_ntb_reg_write(2, XEON_GEN3_EXT_REG_PCI_CMD,
2029	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
2030
2031	/* Setup Interrupt */
2032	rc = intel_ntb_xeon_gen3_init_isr(ntb);
2033
2034	return (rc);
2035}
2036
2037static int
2038intel_ntb_xeon_gen4_init_dev(struct ntb_softc *ntb)
2039{
2040	int rc;
2041	uint16_t lnkctl;
2042
2043	ntb->spad_count = XEON_GEN4_SPAD_COUNT;
2044	ntb->db_count = XEON_GEN4_DB_COUNT;
2045	ntb->db_link_mask = XEON_GEN4_DB_LINK_BIT;
2046	ntb->db_vec_count = XEON_GEN4_DB_MSIX_VECTOR_COUNT;
2047	ntb->db_vec_shift = XEON_GEN4_DB_MSIX_VECTOR_SHIFT;
2048
2049	if (intel_ntb_detect_xeon_gen4_cfg(ntb) != 0)
2050		return (ENXIO);
2051
2052	ntb->reg = &xeon_gen4_reg;
2053	ntb->self_reg = &xeon_gen4_pri_reg;
2054	ntb->peer_reg = &xeon_gen4_b2b_reg;
2055	ntb->xlat_reg = &xeon_gen4_sec_xlat;
2056
2057	ntb->db_valid_mask = (1ULL << ntb->db_count) - 1;
2058	xeon_gen4_setup_b2b_mw(ntb);
2059
2060	/* init link setup */
2061	lnkctl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2062	lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE;
2063	intel_ntb_reg_write(2, XEON_GEN4_REG_LINK_CTRL, lnkctl);
2064
2065	/* Setup Interrupt */
2066	rc = intel_ntb_xeon_gen4_init_isr(ntb);
2067	return (rc);
2068}
2069
2070static int
2071intel_ntb_atom_init_dev(struct ntb_softc *ntb)
2072{
2073	int error;
2074
2075	KASSERT(ntb->conn_type == NTB_CONN_B2B,
2076	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
2077
2078	ntb->spad_count		 = ATOM_SPAD_COUNT;
2079	ntb->db_count		 = ATOM_DB_COUNT;
2080	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
2081	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
2082	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
2083
2084	ntb->reg = &atom_reg;
2085	ntb->self_reg = &atom_pri_reg;
2086	ntb->peer_reg = &atom_b2b_reg;
2087	ntb->xlat_reg = &atom_sec_xlat;
2088
2089	/*
2090	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
2091	 * resolved.  Mask transaction layer internal parity errors.
2092	 */
2093	pci_write_config(ntb->device, 0xFC, 0x4, 4);
2094
2095	configure_atom_secondary_side_bars(ntb);
2096
2097	/* Enable Bus Master and Memory Space on the secondary side */
2098	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
2099	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
2100
2101	error = intel_ntb_init_isr(ntb);
2102	if (error != 0)
2103		return (error);
2104
2105	/* Initiate PCI-E link training */
2106	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2107
2108	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
2109
2110	return (0);
2111}
2112
2113/* XXX: Linux driver doesn't seem to do any of this for Atom. */
2114static void
2115configure_atom_secondary_side_bars(struct ntb_softc *ntb)
2116{
2117
2118	if (ntb->dev_type == NTB_DEV_USD) {
2119		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
2120		    XEON_B2B_BAR2_ADDR64);
2121		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
2122		    XEON_B2B_BAR4_ADDR64);
2123		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
2124		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
2125	} else {
2126		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
2127		    XEON_B2B_BAR2_ADDR64);
2128		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
2129		    XEON_B2B_BAR4_ADDR64);
2130		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
2131		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
2132	}
2133}
2134
2135/*
2136 * When working around Xeon SDOORBELL errata by remapping remote registers in a
2137 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
2138 * remains for use by a higher layer.
2139 *
2140 * Will only be used if working around SDOORBELL errata and the BIOS-configured
2141 * MW size is sufficiently large.
2142 */
2143static unsigned int ntb_b2b_mw_share;
2144SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
2145    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
2146    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
2147    "value here.");
2148
2149static void
2150xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
2151    enum ntb_bar regbar)
2152{
2153	struct ntb_pci_bar_info *bar;
2154	uint8_t bar_sz;
2155
2156	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
2157		return;
2158
2159	bar = &ntb->bar_info[idx];
2160	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
2161	if (idx == regbar) {
2162		if (ntb->b2b_off != 0)
2163			bar_sz--;
2164		else
2165			bar_sz = 0;
2166	}
2167	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
2168	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
2169	(void)bar_sz;
2170}
2171
2172static void
2173xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
2174    enum ntb_bar idx, enum ntb_bar regbar)
2175{
2176	uint64_t reg_val;
2177	uint32_t base_reg, lmt_reg;
2178
2179	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
2180	if (idx == regbar) {
2181		if (ntb->b2b_off)
2182			bar_addr += ntb->b2b_off;
2183		else
2184			bar_addr = 0;
2185	}
2186
2187	if (!bar_is_64bit(ntb, idx)) {
2188		intel_ntb_reg_write(4, base_reg, bar_addr);
2189		reg_val = intel_ntb_reg_read(4, base_reg);
2190		(void)reg_val;
2191
2192		intel_ntb_reg_write(4, lmt_reg, bar_addr);
2193		reg_val = intel_ntb_reg_read(4, lmt_reg);
2194		(void)reg_val;
2195	} else {
2196		intel_ntb_reg_write(8, base_reg, bar_addr);
2197		reg_val = intel_ntb_reg_read(8, base_reg);
2198		(void)reg_val;
2199
2200		intel_ntb_reg_write(8, lmt_reg, bar_addr);
2201		reg_val = intel_ntb_reg_read(8, lmt_reg);
2202		(void)reg_val;
2203	}
2204}
2205
2206static void
2207xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
2208{
2209	struct ntb_pci_bar_info *bar;
2210
2211	bar = &ntb->bar_info[idx];
2212	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
2213		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
2214		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
2215	} else {
2216		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
2217		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
2218	}
2219	(void)base_addr;
2220}
2221
2222static int
2223xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
2224    const struct ntb_b2b_addr *peer_addr)
2225{
2226	struct ntb_pci_bar_info *b2b_bar;
2227	vm_size_t bar_size;
2228	uint64_t bar_addr;
2229	enum ntb_bar b2b_bar_num, i;
2230
2231	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
2232		b2b_bar = NULL;
2233		b2b_bar_num = NTB_CONFIG_BAR;
2234		ntb->b2b_off = 0;
2235	} else {
2236		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
2237		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
2238		    ("invalid b2b mw bar"));
2239
2240		b2b_bar = &ntb->bar_info[b2b_bar_num];
2241		bar_size = b2b_bar->size;
2242
2243		if (ntb_b2b_mw_share != 0 &&
2244		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
2245			ntb->b2b_off = bar_size >> 1;
2246		else if (bar_size >= XEON_B2B_MIN_SIZE) {
2247			ntb->b2b_off = 0;
2248		} else {
2249			device_printf(ntb->device,
2250			    "B2B bar size is too small!\n");
2251			return (EIO);
2252		}
2253	}
2254
2255	/*
2256	 * Reset the secondary bar sizes to match the primary bar sizes.
2257	 * (Except, disable or halve the size of the B2B secondary bar.)
2258	 */
2259	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
2260		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
2261
2262	bar_addr = 0;
2263	if (b2b_bar_num == NTB_CONFIG_BAR)
2264		bar_addr = addr->bar0_addr;
2265	else if (b2b_bar_num == NTB_B2B_BAR_1)
2266		bar_addr = addr->bar2_addr64;
2267	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2268		bar_addr = addr->bar4_addr64;
2269	else if (b2b_bar_num == NTB_B2B_BAR_2)
2270		bar_addr = addr->bar4_addr32;
2271	else if (b2b_bar_num == NTB_B2B_BAR_3)
2272		bar_addr = addr->bar5_addr32;
2273	else
2274		KASSERT(false, ("invalid bar"));
2275
2276	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
2277
2278	/*
2279	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
2280	 * register BAR.  The B2B BAR is either disabled above or configured
2281	 * half-size.  It starts at PBAR xlat + offset.
2282	 *
2283	 * Also set up incoming BAR limits == base (zero length window).
2284	 */
2285	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
2286	    b2b_bar_num);
2287	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2288		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
2289		    NTB_B2B_BAR_2, b2b_bar_num);
2290		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
2291		    NTB_B2B_BAR_3, b2b_bar_num);
2292	} else
2293		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
2294		    NTB_B2B_BAR_2, b2b_bar_num);
2295
2296	/* Zero incoming translation addrs */
2297	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
2298	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
2299
2300	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2301		uint32_t xlat_reg, lmt_reg;
2302		enum ntb_bar bar_num;
2303
2304		/*
2305		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
2306		 * workaround
2307		 */
2308		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
2309		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
2310		if (bar_is_64bit(ntb, bar_num)) {
2311			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
2312			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
2313			intel_ntb_reg_write(8, lmt_reg, 0);
2314		} else {
2315			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
2316			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
2317			intel_ntb_reg_write(4, lmt_reg, 0);
2318		}
2319
2320		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
2321	}
2322	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
2323	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
2324
2325	/* Zero outgoing translation limits (whole bar size windows) */
2326	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
2327	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
2328
2329	/* Set outgoing translation offsets */
2330	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
2331	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2332		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
2333		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
2334	} else
2335		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
2336
2337	/* Set the translation offset for B2B registers */
2338	bar_addr = 0;
2339	if (b2b_bar_num == NTB_CONFIG_BAR)
2340		bar_addr = peer_addr->bar0_addr;
2341	else if (b2b_bar_num == NTB_B2B_BAR_1)
2342		bar_addr = peer_addr->bar2_addr64;
2343	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2344		bar_addr = peer_addr->bar4_addr64;
2345	else if (b2b_bar_num == NTB_B2B_BAR_2)
2346		bar_addr = peer_addr->bar4_addr32;
2347	else if (b2b_bar_num == NTB_B2B_BAR_3)
2348		bar_addr = peer_addr->bar5_addr32;
2349	else
2350		KASSERT(false, ("invalid bar"));
2351
2352	/*
2353	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
2354	 * at a time.
2355	 */
2356	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
2357	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
2358	return (0);
2359}
2360
2361static int
2362xeon_gen3_setup_b2b_mw(struct ntb_softc *ntb)
2363{
2364	uint64_t reg;
2365	uint32_t embarsz, imbarsz;
2366
2367	/* IMBAR1SZ should be equal to EMBAR1SZ */
2368	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR1SZ, 1);
2369	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR1SZ, 1);
2370	if (embarsz != imbarsz) {
2371		device_printf(ntb->device,
2372		    "IMBAR1SZ (%u) should be equal to EMBAR1SZ (%u)\n",
2373		    imbarsz, embarsz);
2374		return (EIO);
2375	}
2376
2377	/* IMBAR2SZ should be equal to EMBAR2SZ */
2378	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR2SZ, 1);
2379	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR2SZ, 1);
2380	if (embarsz != imbarsz) {
2381		device_printf(ntb->device,
2382		    "IMBAR2SZ (%u) should be equal to EMBAR2SZ (%u)\n",
2383		    imbarsz, embarsz);
2384		return (EIO);
2385	}
2386
2387	/* Client will provide the incoming IMBAR1/2XBASE, zero it for now */
2388	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XBASE, 0);
2389	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XBASE, 0);
2390
2391	/*
2392	 * If the value in IMBAR1XLIMIT is set equal to the value in IMBAR1XBASE,
2393	 * the local memory window exposure from EMBAR1 is disabled.
2394	 * Note: It is needed to avoid malicious access.
2395	 */
2396	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XLIMIT, 0);
2397	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XLIMIT, 0);
2398
2399	/* Config outgoing translation limits (whole bar size windows) */
2400	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_EMBAR1XBASE);
2401	reg += ntb->bar_info[NTB_B2B_BAR_1].size;
2402	intel_ntb_reg_write(8, XEON_GEN3_REG_EMBAR1XLIMIT, reg);
2403
2404	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_EMBAR2XBASE);
2405	reg += ntb->bar_info[NTB_B2B_BAR_2].size;
2406	intel_ntb_reg_write(8, XEON_GEN3_REG_EMBAR2XLIMIT, reg);
2407
2408	return (0);
2409}
2410
2411static int
2412xeon_gen4_setup_b2b_mw(struct ntb_softc *ntb)
2413{
2414	uint32_t embarsz, imbarsz;
2415
2416	/* IMBAR23SZ should be equal to EMBAR23SZ */
2417	imbarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_IMBAR1SZ, 1);
2418	embarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_EMBAR1SZ, 1);
2419	if (embarsz != imbarsz) {
2420		device_printf(ntb->device,
2421		    "IMBAR23SZ (%u) should be equal to EMBAR23SZ (%u)\n",
2422		    imbarsz, embarsz);
2423		return (EIO);
2424	}
2425	/* IMBAR45SZ should be equal to EMBAR45SZ */
2426	imbarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_IMBAR2SZ, 1);
2427	embarsz = pci_read_config(ntb->device, XEON_GEN4_CFG_REG_EMBAR2SZ, 1);
2428	if (embarsz != imbarsz) {
2429		device_printf(ntb->device,
2430		    "IMBAR45SZ (%u) should be equal to EMBAR45SZ (%u)\n",
2431		    imbarsz, embarsz);
2432		return (EIO);
2433	}
2434
2435	/* Client will provide the incoming IMBARXBASE, zero it for now */
2436	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR1XBASE, 0);
2437	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR2XBASE, 0);
2438
2439	/*
2440	 * If the value in IMBARXLIMIT is set equal to the value in IMBARXBASE,
2441	 * the local memory window exposure from EMBAR is disabled.
2442	 * Note: It is needed to avoid malicious access.
2443	 */
2444	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR1XLIMIT, 0);
2445	intel_ntb_reg_write(8, XEON_GEN4_REG_IMBAR2XLIMIT, 0);
2446
2447	/* EMBARXLIMIT & EMBARXBASE are gone for gen4, noop here */
2448
2449	return (0);
2450}
2451
2452static inline bool
2453_xeon_link_is_up(struct ntb_softc *ntb)
2454{
2455
2456	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2457		return (true);
2458	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
2459}
2460
2461static inline bool
2462link_is_up(struct ntb_softc *ntb)
2463{
2464
2465	if (ntb->type == NTB_XEON_GEN1 ||
2466	    ntb->type == NTB_XEON_GEN3 ||
2467	    ntb->type == NTB_XEON_GEN4)
2468		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
2469		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
2470
2471	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
2472	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
2473}
2474
2475static inline bool
2476atom_link_is_err(struct ntb_softc *ntb)
2477{
2478	uint32_t status;
2479
2480	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
2481
2482	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2483	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
2484		return (true);
2485
2486	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2487	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
2488}
2489
2490/* Atom does not have link status interrupt, poll on that platform */
2491static void
2492atom_link_hb(void *arg)
2493{
2494	struct ntb_softc *ntb = arg;
2495	sbintime_t timo, poll_ts;
2496
2497	timo = NTB_HB_TIMEOUT * hz;
2498	poll_ts = ntb->last_ts + timo;
2499
2500	/*
2501	 * Delay polling the link status if an interrupt was received, unless
2502	 * the cached link status says the link is down.
2503	 */
2504	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
2505		timo = poll_ts - ticks;
2506		goto out;
2507	}
2508
2509	if (intel_ntb_poll_link(ntb))
2510		ntb_link_event(ntb->device);
2511
2512	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
2513		/* Link is down with error, proceed with recovery */
2514		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
2515		return;
2516	}
2517
2518out:
2519	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
2520}
2521
2522static void
2523atom_perform_link_restart(struct ntb_softc *ntb)
2524{
2525	uint32_t status;
2526
2527	/* Driver resets the NTB ModPhy lanes - magic! */
2528	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
2529	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
2530	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
2531	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
2532
2533	/* Driver waits 100ms to allow the NTB ModPhy to settle */
2534	pause("ModPhy", hz / 10);
2535
2536	/* Clear AER Errors, write to clear */
2537	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
2538	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
2539	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
2540
2541	/* Clear unexpected electrical idle event in LTSSM, write to clear */
2542	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
2543	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2544	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2545
2546	/* Clear DeSkew Buffer error, write to clear */
2547	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2548	status |= ATOM_DESKEWSTS_DBERR;
2549	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2550
2551	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2552	status &= ATOM_IBIST_ERR_OFLOW;
2553	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2554
2555	/* Releases the NTB state machine to allow the link to retrain */
2556	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2557	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2558	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2559}
2560
2561static int
2562intel_ntb_port_number(device_t dev)
2563{
2564	struct ntb_softc *ntb = device_get_softc(dev);
2565
2566	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
2567}
2568
2569static int
2570intel_ntb_peer_port_count(device_t dev)
2571{
2572
2573	return (1);
2574}
2575
2576static int
2577intel_ntb_peer_port_number(device_t dev, int pidx)
2578{
2579	struct ntb_softc *ntb = device_get_softc(dev);
2580
2581	if (pidx != 0)
2582		return (-EINVAL);
2583
2584	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
2585}
2586
2587static int
2588intel_ntb_peer_port_idx(device_t dev, int port)
2589{
2590	int peer_port;
2591
2592	peer_port = intel_ntb_peer_port_number(dev, 0);
2593	if (peer_port == -EINVAL || port != peer_port)
2594		return (-EINVAL);
2595
2596	return (0);
2597}
2598
2599static int
2600intel_ntb4_link_enable(device_t dev, enum ntb_speed speed __unused,
2601    enum ntb_width width __unused)
2602{
2603	struct ntb_softc *ntb = device_get_softc(dev);
2604	uint32_t cntl, ppd0, ltr;
2605	uint16_t lnkctl;
2606
2607	if (!HAS_FEATURE(ntb, NTB_LTR_BAD)) {
2608		/* Setup active snoop LTR values */
2609		ltr = NTB_LTR_ACTIVE_REQMNT | NTB_LTR_ACTIVE_VAL | NTB_LTR_ACTIVE_LATSCALE;
2610		/* Setup active non-snoop values */
2611		ltr = (ltr << NTB_LTR_NS_SHIFT) | ltr;
2612		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_ACTIVE, ltr);
2613
2614		/* Setup idle snoop LTR values */
2615		ltr = NTB_LTR_IDLE_VAL | NTB_LTR_IDLE_LATSCALE | NTB_LTR_IDLE_REQMNT;
2616		/* Setup idle non-snoop values */
2617		ltr = (ltr << NTB_LTR_NS_SHIFT) | ltr;
2618		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_IDLE, ltr);
2619
2620		/* setup PCIe LTR to active */
2621		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_SWSEL, NTB_LTR_SWSEL_ACTIVE);
2622	}
2623
2624	cntl = NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP;
2625	cntl |= NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP;
2626	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2627
2628	lnkctl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2629	lnkctl &= ~GEN4_LINK_CTRL_LINK_DISABLE;
2630	intel_ntb_reg_write(2, XEON_GEN4_REG_LINK_CTRL, lnkctl);
2631
2632	/* start link training in PPD0 */
2633	ppd0 = intel_ntb_reg_read(4, XEON_GEN4_REG_PPD0);
2634	ppd0 |= GEN4_PPD_LINKTRN;
2635	intel_ntb_reg_write(4, XEON_GEN4_REG_PPD0, ppd0);
2636
2637	/* make sure link training has started */
2638	ppd0 = intel_ntb_reg_read(4, XEON_GEN4_REG_PPD0);
2639	if (!(ppd0 & GEN4_PPD_LINKTRN))
2640		intel_ntb_printf(2, "Link is not training\n");
2641
2642	return (0);
2643}
2644
2645static int
2646intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
2647    enum ntb_width width __unused)
2648{
2649	struct ntb_softc *ntb = device_get_softc(dev);
2650	uint32_t cntl;
2651
2652	intel_ntb_printf(2, "%s\n", __func__);
2653
2654	if (ntb->type == NTB_XEON_GEN4)
2655		return (intel_ntb4_link_enable(dev, speed, width));
2656
2657	if (ntb->type == NTB_ATOM) {
2658		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2659		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2660		return (0);
2661	}
2662
2663	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2664		ntb_link_event(dev);
2665		return (0);
2666	}
2667
2668	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2669	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2670	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2671	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2672	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2673		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2674	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2675	return (0);
2676}
2677
2678static int
2679intel_ntb4_link_disable(device_t dev)
2680{
2681	struct ntb_softc *ntb = device_get_softc(dev);
2682	uint32_t cntl;
2683	uint16_t lnkctl;
2684
2685	/* clear the snoop bits */
2686	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2687	cntl &= ~(NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP);
2688	cntl &= ~(NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP);
2689	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2690
2691	lnkctl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2692	lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE;
2693	intel_ntb_reg_write(2, XEON_GEN4_REG_LINK_CTRL, lnkctl);
2694
2695	/* set LTR to idle */
2696	if (!HAS_FEATURE(ntb, NTB_LTR_BAD))
2697		intel_ntb_reg_write(4, XEON_GEN4_REG_EXT_LTR_SWSEL, NTB_LTR_SWSEL_IDLE);
2698
2699	return (0);
2700}
2701
2702static int
2703intel_ntb_link_disable(device_t dev)
2704{
2705	struct ntb_softc *ntb = device_get_softc(dev);
2706	uint32_t cntl;
2707
2708	intel_ntb_printf(2, "%s\n", __func__);
2709
2710	if (ntb->type == NTB_XEON_GEN4)
2711		return (intel_ntb4_link_disable(dev));
2712
2713	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2714		ntb_link_event(dev);
2715		return (0);
2716	}
2717
2718	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2719	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2720	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2721	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2722		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2723	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2724	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2725	return (0);
2726}
2727
2728static bool
2729intel_ntb_link_enabled(device_t dev)
2730{
2731	struct ntb_softc *ntb = device_get_softc(dev);
2732	uint32_t cntl;
2733
2734	if (ntb->type == NTB_ATOM) {
2735		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2736		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2737	}
2738
2739	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2740		return (true);
2741
2742	if (ntb->type == NTB_XEON_GEN4) {
2743		cntl = intel_ntb_reg_read(2, XEON_GEN4_REG_LINK_CTRL);
2744		return ((cntl & GEN4_LINK_CTRL_LINK_DISABLE) == 0);
2745	}
2746
2747	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2748	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2749}
2750
2751static void
2752recover_atom_link(void *arg)
2753{
2754	struct ntb_softc *ntb = arg;
2755	unsigned speed, width, oldspeed, oldwidth;
2756	uint32_t status32;
2757
2758	atom_perform_link_restart(ntb);
2759
2760	/*
2761	 * There is a potential race between the 2 NTB devices recovering at
2762	 * the same time.  If the times are the same, the link will not recover
2763	 * and the driver will be stuck in this loop forever.  Add a random
2764	 * interval to the recovery time to prevent this race.
2765	 */
2766	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2767	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2768
2769	if (atom_link_is_err(ntb))
2770		goto retry;
2771
2772	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2773	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2774		goto out;
2775
2776	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2777	width = NTB_LNK_STA_WIDTH(status32);
2778	speed = status32 & NTB_LINK_SPEED_MASK;
2779
2780	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2781	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2782	if (oldwidth != width || oldspeed != speed)
2783		goto retry;
2784
2785out:
2786	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2787	    ntb);
2788	return;
2789
2790retry:
2791	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2792	    ntb);
2793}
2794
2795/*
2796 * Polls the HW link status register(s); returns true if something has changed.
2797 */
2798static bool
2799intel_ntb_atom_poll_link(struct ntb_softc *ntb)
2800{
2801	uint32_t ntb_cntl;
2802
2803	ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2804	if (ntb_cntl == ntb->ntb_ctl)
2805		return (false);
2806
2807	ntb->ntb_ctl = ntb_cntl;
2808	ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2809	return (true);
2810}
2811
2812static bool
2813intel_ntb_xeon_gen1_poll_link(struct ntb_softc *ntb)
2814{
2815	uint16_t reg_val;
2816
2817	if (ntb->type == NTB_XEON_GEN1)
2818		db_iowrite_raw(ntb, ntb->self_reg->db_bell,
2819			ntb->db_link_mask);
2820
2821	reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2822	if (reg_val == ntb->lnk_sta)
2823		return (false);
2824
2825	ntb->lnk_sta = reg_val;
2826
2827	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2828		if (_xeon_link_is_up(ntb)) {
2829			if (!ntb->peer_msix_good) {
2830				callout_reset(&ntb->peer_msix_work, 0,
2831				    intel_ntb_exchange_msix, ntb);
2832				return (false);
2833			}
2834		} else {
2835			ntb->peer_msix_good = false;
2836			ntb->peer_msix_done = false;
2837		}
2838	}
2839	return (true);
2840}
2841
2842static bool
2843intel_ntb_xeon_gen4_poll_link(struct ntb_softc *ntb)
2844{
2845	uint16_t reg_val;
2846
2847	/*
2848	* We need to write to DLLSCS bit in the SLOTSTS before we
2849	* can clear the hardware link interrupt on ICX NTB.
2850	*/
2851	intel_ntb_reg_write(2, XEON_GEN4_REG_SLOTSTS, GEN4_SLOTSTS_DLLSCS);
2852	db_iowrite_raw(ntb, ntb->self_reg->db_clear, ntb->db_link_mask);
2853
2854	reg_val = intel_ntb_reg_read(2, ntb->reg->lnk_sta);
2855	if (reg_val == ntb->lnk_sta)
2856		return (false);
2857
2858	ntb->lnk_sta = reg_val;
2859	return (true);
2860}
2861
2862static bool
2863intel_ntb_poll_link(struct ntb_softc *ntb)
2864{
2865	bool val;
2866
2867	switch(ntb->type) {
2868	case NTB_ATOM:
2869		val = intel_ntb_atom_poll_link(ntb);
2870		break;
2871	case NTB_XEON_GEN4:
2872		val = intel_ntb_xeon_gen4_poll_link(ntb);
2873		break;
2874	default:
2875		val = intel_ntb_xeon_gen1_poll_link(ntb);
2876		break;
2877	}
2878	return (val);
2879}
2880
2881static inline enum ntb_speed
2882intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2883{
2884
2885	if (!link_is_up(ntb))
2886		return (NTB_SPEED_NONE);
2887	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2888}
2889
2890static inline enum ntb_width
2891intel_ntb_link_sta_width(struct ntb_softc *ntb)
2892{
2893
2894	if (!link_is_up(ntb))
2895		return (NTB_WIDTH_NONE);
2896	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2897}
2898
2899SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
2900    "Driver state, statistics, and HW registers");
2901
2902#define NTB_REGSZ_MASK	(3ul << 30)
2903#define NTB_REG_64	(1ul << 30)
2904#define NTB_REG_32	(2ul << 30)
2905#define NTB_REG_16	(3ul << 30)
2906#define NTB_REG_8	(0ul << 30)
2907
2908#define NTB_DB_READ	(1ul << 29)
2909#define NTB_PCI_REG	(1ul << 28)
2910#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2911
2912static void
2913intel_ntb_sysctl_init(struct ntb_softc *ntb)
2914{
2915	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2916	struct sysctl_ctx_list *ctx;
2917	struct sysctl_oid *tree, *tmptree;
2918
2919	ctx = device_get_sysctl_ctx(ntb->device);
2920	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2921
2922	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2923	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, ntb, 0,
2924	    sysctl_handle_link_status_human, "A",
2925	    "Link status (human readable)");
2926	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2927	    CTLFLAG_RD | CTLTYPE_UINT | CTLFLAG_MPSAFE, ntb, 0,
2928	    sysctl_handle_link_status, "IU",
2929	    "Link status (1=active, 0=inactive)");
2930	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2931	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, ntb, 0,
2932	    sysctl_handle_link_admin, "IU",
2933	    "Set/get interface status (1=UP, 0=DOWN)");
2934
2935	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2936	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2937	    "Driver state, statistics, and HW registers");
2938	tree_par = SYSCTL_CHILDREN(tree);
2939
2940	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2941	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2942	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2943	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2944	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2945	    &ntb->ppd, 0, "Raw PPD register (cached)");
2946
2947	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2948		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2949		    &ntb->b2b_mw_idx, 0,
2950		    "Index of the MW used for B2B remote register access");
2951		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2952		    CTLFLAG_RD, &ntb->b2b_off,
2953		    "If non-zero, offset of B2B register region in shared MW");
2954	}
2955
2956	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2957	    CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, ntb, 0,
2958	    sysctl_handle_features, "A", "Features/errata of this NTB device");
2959
2960	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2961	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2962	    "NTB CTL register (cached)");
2963	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2964	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2965	    "LNK STA register (cached)");
2966
2967	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2968	    &ntb->mw_count, 0, "MW count");
2969	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2970	    &ntb->spad_count, 0, "Scratchpad count");
2971	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2972	    &ntb->db_count, 0, "Doorbell count");
2973	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2974	    &ntb->db_vec_count, 0, "Doorbell vector count");
2975	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2976	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2977
2978	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2979	    &ntb->db_valid_mask, "Doorbell valid mask");
2980	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2981	    &ntb->db_link_mask, "Doorbell link mask");
2982	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2983	    &ntb->db_mask, "Doorbell mask (cached)");
2984
2985	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2986	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
2987	    "Raw HW registers (big-endian)");
2988	regpar = SYSCTL_CHILDREN(tmptree);
2989
2990	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2991	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
2992	    NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2993	    "NTB Control register");
2994	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2995	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
2996	    NTB_REG_32 | 0x19c, sysctl_handle_register, "IU",
2997	    "NTB Link Capabilities");
2998	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2999	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3000	    NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU",
3001	    "NTB Link Control register");
3002
3003	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
3004	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3005	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
3006	    sysctl_handle_register, "QU", "Doorbell mask register");
3007	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
3008	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3009	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
3010	    sysctl_handle_register, "QU", "Doorbell register");
3011
3012	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
3013	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3014	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
3015	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
3016	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3017		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
3018		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3019		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
3020		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
3021		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
3022		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3023		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
3024		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
3025	} else {
3026		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
3027		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3028		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
3029		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
3030	}
3031
3032	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
3033	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3034	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
3035	    sysctl_handle_register, "QU", "Incoming LMT23 register");
3036	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3037		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
3038		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3039		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
3040		    sysctl_handle_register, "IU", "Incoming LMT4 register");
3041		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
3042		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3043		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
3044		    sysctl_handle_register, "IU", "Incoming LMT5 register");
3045	} else {
3046		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
3047		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3048		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
3049		    sysctl_handle_register, "QU", "Incoming LMT45 register");
3050	}
3051
3052	if (ntb->type == NTB_ATOM)
3053		return;
3054
3055	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
3056	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW statistics");
3057	statpar = SYSCTL_CHILDREN(tmptree);
3058	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
3059	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3060	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
3061	    sysctl_handle_register, "SU", "Upstream Memory Miss");
3062
3063	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
3064	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xeon HW errors");
3065	errpar = SYSCTL_CHILDREN(tmptree);
3066
3067	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
3068	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3069	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
3070	    sysctl_handle_register, "CU", "PPD");
3071
3072	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
3073	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3074	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
3075	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
3076	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
3077	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3078	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
3079	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
3080	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
3081	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3082	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
3083	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
3084
3085	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
3086	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3087	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
3088	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
3089	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
3090	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3091	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
3092	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
3093	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
3094	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3095	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
3096	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
3097
3098	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
3099	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3100	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
3101	    sysctl_handle_register, "SU", "DEVSTS");
3102	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
3103	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3104	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
3105	    sysctl_handle_register, "SU", "LNKSTS");
3106	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
3107	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3108	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
3109	    sysctl_handle_register, "SU", "SLNKSTS");
3110
3111	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
3112	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3113	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
3114	    sysctl_handle_register, "IU", "UNCERRSTS");
3115	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
3116	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3117	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
3118	    sysctl_handle_register, "IU", "CORERRSTS");
3119
3120	if (ntb->conn_type != NTB_CONN_B2B)
3121		return;
3122
3123	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
3124	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3125	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
3126	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
3127	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
3128	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3129	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
3130	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
3131	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
3132	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3133	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
3134	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
3135	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3136		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
3137		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3138		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
3139		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
3140		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
3141		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3142		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
3143		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
3144	} else {
3145		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
3146		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3147		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
3148		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
3149	}
3150
3151	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
3152	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3153	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
3154	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
3155	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3156		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
3157		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3158		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
3159		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
3160		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
3161		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3162		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
3163		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
3164	} else {
3165		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
3166		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3167		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
3168		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
3169	}
3170
3171	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
3172	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3173	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
3174	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
3175	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
3176	    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3177	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
3178	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
3179	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
3180		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
3181		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3182		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
3183		    sysctl_handle_register, "IU",
3184		    "Secondary BAR4 base register");
3185		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
3186		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3187		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
3188		    sysctl_handle_register, "IU",
3189		    "Secondary BAR5 base register");
3190	} else {
3191		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
3192		    CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, ntb,
3193		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
3194		    sysctl_handle_register, "QU",
3195		    "Secondary BAR45 base register");
3196	}
3197}
3198
3199static int
3200sysctl_handle_features(SYSCTL_HANDLER_ARGS)
3201{
3202	struct ntb_softc *ntb = arg1;
3203	struct sbuf sb;
3204	int error;
3205
3206	sbuf_new_for_sysctl(&sb, NULL, 256, req);
3207
3208	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
3209	error = sbuf_finish(&sb);
3210	sbuf_delete(&sb);
3211
3212	if (error || !req->newptr)
3213		return (error);
3214	return (EINVAL);
3215}
3216
3217static int
3218sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
3219{
3220	struct ntb_softc *ntb = arg1;
3221	unsigned old, new;
3222	int error;
3223
3224	old = intel_ntb_link_enabled(ntb->device);
3225
3226	error = SYSCTL_OUT(req, &old, sizeof(old));
3227	if (error != 0 || req->newptr == NULL)
3228		return (error);
3229
3230	error = SYSCTL_IN(req, &new, sizeof(new));
3231	if (error != 0)
3232		return (error);
3233
3234	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
3235	    (new != 0)? "en" : "dis");
3236
3237	if (new != 0)
3238		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
3239	else
3240		error = intel_ntb_link_disable(ntb->device);
3241	return (error);
3242}
3243
3244static int
3245sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
3246{
3247	struct ntb_softc *ntb = arg1;
3248	struct sbuf sb;
3249	enum ntb_speed speed;
3250	enum ntb_width width;
3251	int error;
3252
3253	sbuf_new_for_sysctl(&sb, NULL, 32, req);
3254
3255	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
3256		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
3257		    (unsigned)speed, (unsigned)width);
3258	else
3259		sbuf_printf(&sb, "down");
3260
3261	error = sbuf_finish(&sb);
3262	sbuf_delete(&sb);
3263
3264	if (error || !req->newptr)
3265		return (error);
3266	return (EINVAL);
3267}
3268
3269static int
3270sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
3271{
3272	struct ntb_softc *ntb = arg1;
3273	unsigned res;
3274	int error;
3275
3276	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
3277
3278	error = SYSCTL_OUT(req, &res, sizeof(res));
3279	if (error || !req->newptr)
3280		return (error);
3281	return (EINVAL);
3282}
3283
3284static int
3285sysctl_handle_register(SYSCTL_HANDLER_ARGS)
3286{
3287	struct ntb_softc *ntb;
3288	const void *outp;
3289	uintptr_t sz;
3290	uint64_t umv;
3291	char be[sizeof(umv)];
3292	size_t outsz;
3293	uint32_t reg;
3294	bool db, pci;
3295	int error;
3296
3297	ntb = arg1;
3298	reg = arg2 & ~NTB_REGFLAGS_MASK;
3299	sz = arg2 & NTB_REGSZ_MASK;
3300	db = (arg2 & NTB_DB_READ) != 0;
3301	pci = (arg2 & NTB_PCI_REG) != 0;
3302
3303	KASSERT(!(db && pci), ("bogus"));
3304
3305	if (db) {
3306		KASSERT(sz == NTB_REG_64, ("bogus"));
3307		umv = db_ioread(ntb, reg);
3308		outsz = sizeof(uint64_t);
3309	} else {
3310		switch (sz) {
3311		case NTB_REG_64:
3312			if (pci)
3313				umv = pci_read_config(ntb->device, reg, 8);
3314			else
3315				umv = intel_ntb_reg_read(8, reg);
3316			outsz = sizeof(uint64_t);
3317			break;
3318		case NTB_REG_32:
3319			if (pci)
3320				umv = pci_read_config(ntb->device, reg, 4);
3321			else
3322				umv = intel_ntb_reg_read(4, reg);
3323			outsz = sizeof(uint32_t);
3324			break;
3325		case NTB_REG_16:
3326			if (pci)
3327				umv = pci_read_config(ntb->device, reg, 2);
3328			else
3329				umv = intel_ntb_reg_read(2, reg);
3330			outsz = sizeof(uint16_t);
3331			break;
3332		case NTB_REG_8:
3333			if (pci)
3334				umv = pci_read_config(ntb->device, reg, 1);
3335			else
3336				umv = intel_ntb_reg_read(1, reg);
3337			outsz = sizeof(uint8_t);
3338			break;
3339		default:
3340			panic("bogus");
3341			break;
3342		}
3343	}
3344
3345	/* Encode bigendian so that sysctl -x is legible. */
3346	be64enc(be, umv);
3347	outp = ((char *)be) + sizeof(umv) - outsz;
3348
3349	error = SYSCTL_OUT(req, outp, outsz);
3350	if (error || !req->newptr)
3351		return (error);
3352	return (EINVAL);
3353}
3354
3355static unsigned
3356intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
3357{
3358
3359	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
3360	    uidx >= ntb->b2b_mw_idx) ||
3361	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
3362		uidx++;
3363	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
3364	    uidx >= ntb->b2b_mw_idx) &&
3365	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
3366		uidx++;
3367	return (uidx);
3368}
3369
3370#ifndef EARLY_AP_STARTUP
3371static int msix_ready;
3372
3373static void
3374intel_ntb_msix_ready(void *arg __unused)
3375{
3376
3377	msix_ready = 1;
3378}
3379SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
3380    intel_ntb_msix_ready, NULL);
3381#endif
3382
3383static void
3384intel_ntb_exchange_msix(void *ctx)
3385{
3386	struct ntb_softc *ntb;
3387	uint32_t val;
3388	unsigned i;
3389
3390	ntb = ctx;
3391
3392	if (ntb->peer_msix_good)
3393		goto msix_good;
3394	if (ntb->peer_msix_done)
3395		goto msix_done;
3396
3397#ifndef EARLY_AP_STARTUP
3398	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
3399	if (!msix_ready)
3400		goto reschedule;
3401#endif
3402
3403	intel_ntb_get_msix_info(ntb);
3404	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3405		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
3406		    ntb->msix_data[i].nmd_data);
3407		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
3408		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
3409	}
3410	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
3411
3412	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
3413	if (val != NTB_MSIX_VER_GUARD)
3414		goto reschedule;
3415
3416	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3417		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
3418		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
3419		ntb->peer_msix_data[i].nmd_data = val;
3420		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
3421		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
3422		ntb->peer_msix_data[i].nmd_ofs = val;
3423	}
3424
3425	ntb->peer_msix_done = true;
3426
3427msix_done:
3428	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
3429	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
3430	if (val != NTB_MSIX_RECEIVED)
3431		goto reschedule;
3432
3433	intel_ntb_spad_clear(ntb->device);
3434	ntb->peer_msix_good = true;
3435	/* Give peer time to see our NTB_MSIX_RECEIVED. */
3436	goto reschedule;
3437
3438msix_good:
3439	intel_ntb_poll_link(ntb);
3440	ntb_link_event(ntb->device);
3441	return;
3442
3443reschedule:
3444	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
3445	if (_xeon_link_is_up(ntb)) {
3446		callout_reset(&ntb->peer_msix_work,
3447		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
3448		    intel_ntb_exchange_msix, ntb);
3449	} else
3450		intel_ntb_spad_clear(ntb->device);
3451}
3452
3453/*
3454 * Public API to the rest of the OS
3455 */
3456
3457static uint8_t
3458intel_ntb_spad_count(device_t dev)
3459{
3460	struct ntb_softc *ntb = device_get_softc(dev);
3461
3462	return (ntb->spad_count);
3463}
3464
3465static uint8_t
3466intel_ntb_mw_count(device_t dev)
3467{
3468	struct ntb_softc *ntb = device_get_softc(dev);
3469	uint8_t res;
3470
3471	res = ntb->mw_count;
3472	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
3473		res--;
3474	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
3475		res--;
3476	return (res);
3477}
3478
3479static int
3480intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
3481{
3482	struct ntb_softc *ntb = device_get_softc(dev);
3483
3484	if (idx >= ntb->spad_count)
3485		return (EINVAL);
3486
3487	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
3488
3489	return (0);
3490}
3491
3492/*
3493 * Zeros the local scratchpad.
3494 */
3495static void
3496intel_ntb_spad_clear(device_t dev)
3497{
3498	struct ntb_softc *ntb = device_get_softc(dev);
3499	unsigned i;
3500
3501	for (i = 0; i < ntb->spad_count; i++)
3502		intel_ntb_spad_write(dev, i, 0);
3503}
3504
3505static int
3506intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
3507{
3508	struct ntb_softc *ntb = device_get_softc(dev);
3509
3510	if (idx >= ntb->spad_count)
3511		return (EINVAL);
3512
3513	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
3514
3515	return (0);
3516}
3517
3518static int
3519intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
3520{
3521	struct ntb_softc *ntb = device_get_softc(dev);
3522
3523	if (idx >= ntb->spad_count)
3524		return (EINVAL);
3525
3526	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
3527		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
3528	else
3529		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
3530
3531	return (0);
3532}
3533
3534static int
3535intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
3536{
3537	struct ntb_softc *ntb = device_get_softc(dev);
3538
3539	if (idx >= ntb->spad_count)
3540		return (EINVAL);
3541
3542	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
3543		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
3544	else
3545		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
3546
3547	return (0);
3548}
3549
3550static int
3551intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
3552    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
3553    bus_addr_t *plimit)
3554{
3555	struct ntb_softc *ntb = device_get_softc(dev);
3556	struct ntb_pci_bar_info *bar;
3557	bus_addr_t limit;
3558	size_t bar_b2b_off;
3559	enum ntb_bar bar_num;
3560
3561	if (mw_idx >= intel_ntb_mw_count(dev))
3562		return (EINVAL);
3563	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
3564
3565	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
3566	bar = &ntb->bar_info[bar_num];
3567	bar_b2b_off = 0;
3568	if (mw_idx == ntb->b2b_mw_idx) {
3569		KASSERT(ntb->b2b_off != 0,
3570		    ("user shouldn't get non-shared b2b mw"));
3571		bar_b2b_off = ntb->b2b_off;
3572	}
3573
3574	if (bar_is_64bit(ntb, bar_num))
3575		limit = BUS_SPACE_MAXADDR;
3576	else
3577		limit = BUS_SPACE_MAXADDR_32BIT;
3578
3579	if (base != NULL)
3580		*base = bar->pbase + bar_b2b_off;
3581	if (vbase != NULL)
3582		*vbase = bar->vbase + bar_b2b_off;
3583	if (size != NULL)
3584		*size = bar->size - bar_b2b_off;
3585	if (align != NULL)
3586		*align = bar->size;
3587	if (align_size != NULL)
3588		*align_size = 1;
3589	if (plimit != NULL)
3590		*plimit = limit;
3591	return (0);
3592}
3593
3594static int
3595intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
3596{
3597	struct ntb_softc *ntb = device_get_softc(dev);
3598	struct ntb_pci_bar_info *bar;
3599	uint64_t base, limit, reg_val;
3600	size_t bar_size, mw_size;
3601	uint32_t base_reg, xlat_reg, limit_reg;
3602	enum ntb_bar bar_num;
3603
3604	if (idx >= intel_ntb_mw_count(dev))
3605		return (EINVAL);
3606	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3607
3608	bar_num = intel_ntb_mw_to_bar(ntb, idx);
3609	bar = &ntb->bar_info[bar_num];
3610
3611	bar_size = bar->size;
3612	if (idx == ntb->b2b_mw_idx)
3613		mw_size = bar_size - ntb->b2b_off;
3614	else
3615		mw_size = bar_size;
3616
3617	/* Hardware requires that addr is aligned to bar size */
3618	if ((addr & (bar_size - 1)) != 0)
3619		return (EINVAL);
3620
3621	if (size > mw_size)
3622		return (EINVAL);
3623
3624	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
3625
3626	limit = 0;
3627	if (bar_is_64bit(ntb, bar_num)) {
3628		if (ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4)
3629			base = addr;
3630		else
3631			base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
3632
3633		if (limit_reg != 0 && size != mw_size)
3634			limit = base + size;
3635		else
3636			limit = base + mw_size;
3637
3638		/* Set and verify translation address */
3639		intel_ntb_reg_write(8, xlat_reg, addr);
3640		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
3641		if (reg_val != addr) {
3642			intel_ntb_reg_write(8, xlat_reg, 0);
3643			return (EIO);
3644		}
3645
3646		/* Set and verify the limit */
3647		intel_ntb_reg_write(8, limit_reg, limit);
3648		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
3649		if (reg_val != limit) {
3650			intel_ntb_reg_write(8, limit_reg, base);
3651			intel_ntb_reg_write(8, xlat_reg, 0);
3652			return (EIO);
3653		}
3654	} else {
3655		/* Configure 32-bit (split) BAR MW */
3656		if (ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4)
3657			return (EIO);
3658
3659		if ((addr & UINT32_MAX) != addr)
3660			return (ERANGE);
3661		if (((addr + size) & UINT32_MAX) != (addr + size))
3662			return (ERANGE);
3663
3664		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
3665
3666		if (limit_reg != 0 && size != mw_size)
3667			limit = base + size;
3668
3669		/* Set and verify translation address */
3670		intel_ntb_reg_write(4, xlat_reg, addr);
3671		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
3672		if (reg_val != addr) {
3673			intel_ntb_reg_write(4, xlat_reg, 0);
3674			return (EIO);
3675		}
3676
3677		/* Set and verify the limit */
3678		intel_ntb_reg_write(4, limit_reg, limit);
3679		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3680		if (reg_val != limit) {
3681			intel_ntb_reg_write(4, limit_reg, base);
3682			intel_ntb_reg_write(4, xlat_reg, 0);
3683			return (EIO);
3684		}
3685	}
3686	return (0);
3687}
3688
3689static int
3690intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
3691{
3692
3693	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
3694}
3695
3696static int
3697intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
3698{
3699	struct ntb_softc *ntb = device_get_softc(dev);
3700	struct ntb_pci_bar_info *bar;
3701
3702	if (idx >= intel_ntb_mw_count(dev))
3703		return (EINVAL);
3704	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3705
3706	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3707	*mode = bar->map_mode;
3708	return (0);
3709}
3710
3711static int
3712intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
3713{
3714	struct ntb_softc *ntb = device_get_softc(dev);
3715
3716	if (idx >= intel_ntb_mw_count(dev))
3717		return (EINVAL);
3718
3719	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3720	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
3721}
3722
3723static int
3724intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3725{
3726	struct ntb_pci_bar_info *bar;
3727	int rc;
3728
3729	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3730	if (bar->map_mode == mode)
3731		return (0);
3732
3733	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3734	if (rc == 0)
3735		bar->map_mode = mode;
3736
3737	return (rc);
3738}
3739
3740static void
3741intel_ntb_peer_db_set(device_t dev, uint64_t bits)
3742{
3743	struct ntb_softc *ntb = device_get_softc(dev);
3744	uint64_t db;
3745
3746	if ((bits & ~ntb->db_valid_mask) != 0) {
3747		device_printf(ntb->device, "Invalid doorbell bits %#jx\n",
3748		    (uintmax_t)bits);
3749		return;
3750	}
3751
3752	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
3753		struct ntb_pci_bar_info *lapic;
3754		unsigned i;
3755
3756		lapic = ntb->peer_lapic_bar;
3757
3758		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3759			if ((bits & intel_ntb_db_vector_mask(dev, i)) != 0)
3760				bus_space_write_4(lapic->pci_bus_tag,
3761				    lapic->pci_bus_handle,
3762				    ntb->peer_msix_data[i].nmd_ofs,
3763				    ntb->peer_msix_data[i].nmd_data);
3764		}
3765		return;
3766	}
3767
3768	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3769		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bits);
3770		return;
3771	}
3772
3773	if (ntb->type == NTB_XEON_GEN3 || ntb->type == NTB_XEON_GEN4) {
3774		while (bits != 0) {
3775			db = ffsll(bits);
3776
3777			intel_ntb_reg_write(1,
3778			    ntb->peer_reg->db_bell + (db - 1) * 4, 0x1);
3779
3780			bits = bits & (bits - 1);
3781		}
3782	} else {
3783		db_iowrite(ntb, ntb->peer_reg->db_bell, bits);
3784	}
3785}
3786
3787static int
3788intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3789{
3790	struct ntb_softc *ntb = device_get_softc(dev);
3791	struct ntb_pci_bar_info *bar;
3792	uint64_t regoff;
3793
3794	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3795
3796	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3797		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3798		regoff = ntb->peer_reg->db_bell;
3799	} else {
3800		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3801		    ("invalid b2b idx"));
3802
3803		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3804		regoff = XEON_PDOORBELL_OFFSET;
3805	}
3806	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3807
3808	/* HACK: Specific to current x86 bus implementation. */
3809	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3810	*db_size = ntb->reg->db_size;
3811	return (0);
3812}
3813
3814static uint64_t
3815intel_ntb_db_valid_mask(device_t dev)
3816{
3817	struct ntb_softc *ntb = device_get_softc(dev);
3818
3819	return (ntb->db_valid_mask);
3820}
3821
3822static int
3823intel_ntb_db_vector_count(device_t dev)
3824{
3825	struct ntb_softc *ntb = device_get_softc(dev);
3826
3827	return (ntb->db_vec_count);
3828}
3829
3830static uint64_t
3831intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3832{
3833	struct ntb_softc *ntb = device_get_softc(dev);
3834
3835	if (vector > ntb->db_vec_count)
3836		return (0);
3837	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3838}
3839
3840static bool
3841intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3842{
3843	struct ntb_softc *ntb = device_get_softc(dev);
3844
3845	if (speed != NULL)
3846		*speed = intel_ntb_link_sta_speed(ntb);
3847	if (width != NULL)
3848		*width = intel_ntb_link_sta_width(ntb);
3849	return (link_is_up(ntb));
3850}
3851
3852static void
3853save_bar_parameters(struct ntb_pci_bar_info *bar)
3854{
3855
3856	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3857	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3858	bar->pbase = rman_get_start(bar->pci_resource);
3859	bar->size = rman_get_size(bar->pci_resource);
3860	bar->vbase = rman_get_virtual(bar->pci_resource);
3861}
3862
3863static device_method_t ntb_intel_methods[] = {
3864	/* Device interface */
3865	DEVMETHOD(device_probe,		intel_ntb_probe),
3866	DEVMETHOD(device_attach,	intel_ntb_attach),
3867	DEVMETHOD(device_detach,	intel_ntb_detach),
3868	/* Bus interface */
3869	DEVMETHOD(bus_child_location,	ntb_child_location),
3870	DEVMETHOD(bus_print_child,	ntb_print_child),
3871	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3872	/* NTB interface */
3873	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
3874	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
3875	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
3876	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
3877	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3878	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3879	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3880	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3881	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3882	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3883	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3884	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3885	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3886	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3887	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3888	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3889	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3890	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3891	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3892	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3893	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3894	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3895	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3896	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3897	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3898	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3899	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3900	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3901	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3902	DEVMETHOD_END
3903};
3904
3905static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3906    sizeof(struct ntb_softc));
3907DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, NULL, NULL);
3908MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3909MODULE_VERSION(ntb_hw_intel, 1);
3910MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
3911    nitems(pci_ids));
3912