1/*-
2 * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34 *
35 * NOTE: Much of the code in this module is shared with Linux. Any patches may
36 * be picked up and redistributed in Linux with a dual GPL/BSD license.
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD$");
41
42#include <sys/param.h>
43#include <sys/kernel.h>
44#include <sys/systm.h>
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/interrupt.h>
48#include <sys/malloc.h>
49#include <sys/module.h>
50#include <sys/mutex.h>
51#include <sys/pciio.h>
52#include <sys/queue.h>
53#include <sys/rman.h>
54#include <sys/sbuf.h>
55#include <sys/sysctl.h>
56#include <vm/vm.h>
57#include <vm/pmap.h>
58#include <machine/bus.h>
59#include <machine/intr_machdep.h>
60#include <machine/resource.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63
64#include "ntb_hw_intel.h"
65#include "../ntb.h"
66
67#define MAX_MSIX_INTERRUPTS	\
68	MAX(MAX(XEON_DB_COUNT, ATOM_DB_COUNT), XEON_GEN3_DB_COUNT)
69
70#define NTB_HB_TIMEOUT		1 /* second */
71#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
72#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
73
74#define	NTB_MSIX_VER_GUARD	0xaabbccdd
75#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
76
77/*
78 * PCI constants could be somewhere more generic, but aren't defined/used in
79 * pci.c.
80 */
81#define	PCI_MSIX_ENTRY_SIZE		16
82#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
83#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
84#define	PCI_MSIX_ENTRY_DATA		8
85
86enum ntb_device_type {
87	NTB_XEON_GEN1,
88	NTB_XEON_GEN3,
89	NTB_ATOM
90};
91
92/* ntb_conn_type are hardware numbers, cannot change. */
93enum ntb_conn_type {
94	NTB_CONN_TRANSPARENT = 0,
95	NTB_CONN_B2B = 1,
96	NTB_CONN_RP = 2,
97};
98
99enum ntb_b2b_direction {
100	NTB_DEV_USD = 0,
101	NTB_DEV_DSD = 1,
102};
103
104enum ntb_bar {
105	NTB_CONFIG_BAR = 0,
106	NTB_B2B_BAR_1,
107	NTB_B2B_BAR_2,
108	NTB_B2B_BAR_3,
109	NTB_MAX_BARS
110};
111
112enum {
113	NTB_MSIX_GUARD = 0,
114	NTB_MSIX_DATA0,
115	NTB_MSIX_DATA1,
116	NTB_MSIX_DATA2,
117	NTB_MSIX_OFS0,
118	NTB_MSIX_OFS1,
119	NTB_MSIX_OFS2,
120	NTB_MSIX_DONE,
121	NTB_MAX_MSIX_SPAD
122};
123
124/* Device features and workarounds */
125#define HAS_FEATURE(ntb, feature)	\
126	(((ntb)->features & (feature)) != 0)
127
128struct ntb_hw_info {
129	uint32_t		device_id;
130	const char		*desc;
131	enum ntb_device_type	type;
132	uint32_t		features;
133};
134
135struct ntb_pci_bar_info {
136	bus_space_tag_t		pci_bus_tag;
137	bus_space_handle_t	pci_bus_handle;
138	int			pci_resource_id;
139	struct resource		*pci_resource;
140	vm_paddr_t		pbase;
141	caddr_t			vbase;
142	vm_size_t		size;
143	vm_memattr_t		map_mode;
144
145	/* Configuration register offsets */
146	uint32_t		psz_off;
147	uint32_t		ssz_off;
148	uint32_t		pbarxlat_off;
149};
150
151struct ntb_int_info {
152	struct resource	*res;
153	int		rid;
154	void		*tag;
155};
156
157struct ntb_vec {
158	struct ntb_softc	*ntb;
159	uint32_t		num;
160	unsigned		masked;
161};
162
163struct ntb_reg {
164	uint32_t	ntb_ctl;
165	uint32_t	lnk_sta;
166	uint8_t		db_size;
167	unsigned	mw_bar[NTB_MAX_BARS];
168};
169
170struct ntb_alt_reg {
171	uint32_t	db_bell;
172	uint32_t	db_mask;
173	uint32_t	spad;
174};
175
176struct ntb_xlat_reg {
177	uint32_t	bar0_base;
178	uint32_t	bar2_base;
179	uint32_t	bar4_base;
180	uint32_t	bar5_base;
181
182	uint32_t	bar2_xlat;
183	uint32_t	bar4_xlat;
184	uint32_t	bar5_xlat;
185
186	uint32_t	bar2_limit;
187	uint32_t	bar4_limit;
188	uint32_t	bar5_limit;
189};
190
191struct ntb_b2b_addr {
192	uint64_t	bar0_addr;
193	uint64_t	bar2_addr64;
194	uint64_t	bar4_addr64;
195	uint64_t	bar4_addr32;
196	uint64_t	bar5_addr32;
197};
198
199struct ntb_msix_data {
200	uint32_t	nmd_ofs;
201	uint32_t	nmd_data;
202};
203
204struct ntb_softc {
205	/* ntb.c context. Do not move! Must go first! */
206	void			*ntb_store;
207
208	device_t		device;
209	enum ntb_device_type	type;
210	uint32_t		features;
211
212	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
213	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
214	uint32_t		allocated_interrupts;
215
216	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
217	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
218	bool			peer_msix_good;
219	bool			peer_msix_done;
220	struct ntb_pci_bar_info	*peer_lapic_bar;
221	struct callout		peer_msix_work;
222
223	bus_dma_tag_t		bar0_dma_tag;
224	bus_dmamap_t		bar0_dma_map;
225
226	struct callout		heartbeat_timer;
227	struct callout		lr_timer;
228
229	struct ntb_vec		*msix_vec;
230
231	uint32_t		ppd;
232	enum ntb_conn_type	conn_type;
233	enum ntb_b2b_direction	dev_type;
234
235	/* Offset of peer bar0 in B2B BAR */
236	uint64_t			b2b_off;
237	/* Memory window used to access peer bar0 */
238#define B2B_MW_DISABLED			UINT8_MAX
239	uint8_t				b2b_mw_idx;
240	uint32_t			msix_xlat;
241	uint8_t				msix_mw_idx;
242
243	uint8_t				mw_count;
244	uint8_t				spad_count;
245	uint8_t				db_count;
246	uint8_t				db_vec_count;
247	uint8_t				db_vec_shift;
248
249	/* Protects local db_mask. */
250#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
251#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
252#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
253	struct mtx			db_mask_lock;
254
255	volatile uint32_t		ntb_ctl;
256	volatile uint32_t		lnk_sta;
257
258	uint64_t			db_valid_mask;
259	uint64_t			db_link_mask;
260	uint64_t			db_mask;
261	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
262	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
263
264	int				last_ts;	/* ticks @ last irq */
265
266	const struct ntb_reg		*reg;
267	const struct ntb_alt_reg	*self_reg;
268	const struct ntb_alt_reg	*peer_reg;
269	const struct ntb_xlat_reg	*xlat_reg;
270};
271
272#ifdef __i386__
273static __inline uint64_t
274bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
275    bus_size_t offset)
276{
277
278	return (bus_space_read_4(tag, handle, offset) |
279	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
280}
281
282static __inline void
283bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
284    bus_size_t offset, uint64_t val)
285{
286
287	bus_space_write_4(tag, handle, offset, val);
288	bus_space_write_4(tag, handle, offset + 4, val >> 32);
289}
290#endif
291
292#define intel_ntb_bar_read(SIZE, bar, offset) \
293	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
294	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
295#define intel_ntb_bar_write(SIZE, bar, offset, val) \
296	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
297	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
298#define intel_ntb_reg_read(SIZE, offset) \
299	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
300#define intel_ntb_reg_write(SIZE, offset, val) \
301	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
302#define intel_ntb_mw_read(SIZE, offset) \
303	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
304		offset)
305#define intel_ntb_mw_write(SIZE, offset, val) \
306	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
307		offset, val)
308
309static int intel_ntb_probe(device_t device);
310static int intel_ntb_attach(device_t device);
311static int intel_ntb_detach(device_t device);
312static uint64_t intel_ntb_db_valid_mask(device_t dev);
313static void intel_ntb_spad_clear(device_t dev);
314static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
315static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
316    enum ntb_width *width);
317static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
318    enum ntb_width width);
319static int intel_ntb_link_disable(device_t dev);
320static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
321static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
322
323static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
324static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
325static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
326static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
327    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
328static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
329static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
330    vm_memattr_t);
331static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
332    const char *);
333static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
334static int map_memory_window_bar(struct ntb_softc *ntb,
335    struct ntb_pci_bar_info *bar);
336static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
337static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
338static int intel_ntb_init_isr(struct ntb_softc *ntb);
339static int intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb);
340static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
341static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
342static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
343static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
344static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
345static void ndev_vec_isr(void *arg);
346static void ndev_irq_isr(void *arg);
347static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
348static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
349static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
350static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
351static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
352static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
353static void intel_ntb_exchange_msix(void *);
354static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
355static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
356static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
357static int intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb);
358static int intel_ntb_detect_atom(struct ntb_softc *ntb);
359static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
360static int intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb);
361static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
362static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
363static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
364static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
365    enum ntb_bar regbar);
366static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
367    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
368static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
369    enum ntb_bar idx);
370static int xeon_setup_b2b_mw(struct ntb_softc *,
371    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
372static int xeon_gen3_setup_b2b_mw(struct ntb_softc *);
373static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr,
374    size_t size);
375static inline bool link_is_up(struct ntb_softc *ntb);
376static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
377static inline bool atom_link_is_err(struct ntb_softc *ntb);
378static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
379static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
380static void atom_link_hb(void *arg);
381static void recover_atom_link(void *arg);
382static bool intel_ntb_poll_link(struct ntb_softc *ntb);
383static void save_bar_parameters(struct ntb_pci_bar_info *bar);
384static void intel_ntb_sysctl_init(struct ntb_softc *);
385static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
386static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
387static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
388static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
389static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
390
391static unsigned g_ntb_hw_debug_level;
392SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
393    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
394#define intel_ntb_printf(lvl, ...) do {				\
395	if ((lvl) <= g_ntb_hw_debug_level) {			\
396		device_printf(ntb->device, __VA_ARGS__);	\
397	}							\
398} while (0)
399
400#define	_NTB_PAT_UC	0
401#define	_NTB_PAT_WC	1
402#define	_NTB_PAT_WT	4
403#define	_NTB_PAT_WP	5
404#define	_NTB_PAT_WB	6
405#define	_NTB_PAT_UCM	7
406static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
407SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
408    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
409    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
410    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
411    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
412    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
413    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
414    "UC-: " __XSTRING(_NTB_PAT_UCM));
415
416static inline vm_memattr_t
417intel_ntb_pat_flags(void)
418{
419
420	switch (g_ntb_mw_pat) {
421	case _NTB_PAT_WC:
422		return (VM_MEMATTR_WRITE_COMBINING);
423	case _NTB_PAT_WT:
424		return (VM_MEMATTR_WRITE_THROUGH);
425	case _NTB_PAT_WP:
426		return (VM_MEMATTR_WRITE_PROTECTED);
427	case _NTB_PAT_WB:
428		return (VM_MEMATTR_WRITE_BACK);
429	case _NTB_PAT_UCM:
430		return (VM_MEMATTR_WEAK_UNCACHEABLE);
431	case _NTB_PAT_UC:
432		/* FALLTHROUGH */
433	default:
434		return (VM_MEMATTR_UNCACHEABLE);
435	}
436}
437
438/*
439 * Well, this obviously doesn't belong here, but it doesn't seem to exist
440 * anywhere better yet.
441 */
442static inline const char *
443intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
444{
445
446	switch (pat) {
447	case VM_MEMATTR_WRITE_COMBINING:
448		return ("WRITE_COMBINING");
449	case VM_MEMATTR_WRITE_THROUGH:
450		return ("WRITE_THROUGH");
451	case VM_MEMATTR_WRITE_PROTECTED:
452		return ("WRITE_PROTECTED");
453	case VM_MEMATTR_WRITE_BACK:
454		return ("WRITE_BACK");
455	case VM_MEMATTR_WEAK_UNCACHEABLE:
456		return ("UNCACHED");
457	case VM_MEMATTR_UNCACHEABLE:
458		return ("UNCACHEABLE");
459	default:
460		return ("UNKNOWN");
461	}
462}
463
464static int g_ntb_msix_idx = 1;
465SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
466    0, "Use this memory window to access the peer MSIX message complex on "
467    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
468    "Like b2b_mw_idx, negative values index from the last available memory "
469    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
470
471static int g_ntb_mw_idx = -1;
472SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
473    0, "Use this memory window to access the peer NTB registers.  A "
474    "non-negative value starts from the first MW index; a negative value "
475    "starts from the last MW index.  The default is -1, i.e., the last "
476    "available memory window.  Both sides of the NTB MUST set the same "
477    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
478
479/* Hardware owns the low 16 bits of features. */
480#define NTB_BAR_SIZE_4K		(1 << 0)
481#define NTB_SDOORBELL_LOCKUP	(1 << 1)
482#define NTB_SB01BASE_LOCKUP	(1 << 2)
483#define NTB_B2BDOORBELL_BIT14	(1 << 3)
484/* Software/configuration owns the top 16 bits. */
485#define NTB_SPLIT_BAR		(1ull << 16)
486#define NTB_ONE_MSIX		(1ull << 17)
487
488#define NTB_FEATURES_STR \
489    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
490    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
491
492static struct ntb_hw_info pci_ids[] = {
493	/* XXX: PS/SS IDs left out until they are supported. */
494	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
495		NTB_ATOM, 0 },
496
497	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
498		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
499	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
500		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
501	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B",
502		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
503		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
504	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B",
505		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
506		    NTB_SB01BASE_LOCKUP },
507	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B",
508		NTB_XEON_GEN1, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
509		    NTB_SB01BASE_LOCKUP },
510
511	{ 0x201C8086, "SKL Xeon E5 V5 Non-Transparent Bridge B2B",
512		NTB_XEON_GEN3, 0 },
513};
514
515static const struct ntb_reg atom_reg = {
516	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
517	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
518	.db_size = sizeof(uint64_t),
519	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
520};
521
522static const struct ntb_alt_reg atom_pri_reg = {
523	.db_bell = ATOM_PDOORBELL_OFFSET,
524	.db_mask = ATOM_PDBMSK_OFFSET,
525	.spad = ATOM_SPAD_OFFSET,
526};
527
528static const struct ntb_alt_reg atom_b2b_reg = {
529	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
530	.spad = ATOM_B2B_SPAD_OFFSET,
531};
532
533static const struct ntb_xlat_reg atom_sec_xlat = {
534#if 0
535	/* "FIXME" says the Linux driver. */
536	.bar0_base = ATOM_SBAR0BASE_OFFSET,
537	.bar2_base = ATOM_SBAR2BASE_OFFSET,
538	.bar4_base = ATOM_SBAR4BASE_OFFSET,
539
540	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
541	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
542#endif
543
544	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
545	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
546};
547
548static const struct ntb_reg xeon_reg = {
549	.ntb_ctl = XEON_NTBCNTL_OFFSET,
550	.lnk_sta = XEON_LINK_STATUS_OFFSET,
551	.db_size = sizeof(uint16_t),
552	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
553};
554
555static const struct ntb_alt_reg xeon_pri_reg = {
556	.db_bell = XEON_PDOORBELL_OFFSET,
557	.db_mask = XEON_PDBMSK_OFFSET,
558	.spad = XEON_SPAD_OFFSET,
559};
560
561static const struct ntb_alt_reg xeon_b2b_reg = {
562	.db_bell = XEON_B2B_DOORBELL_OFFSET,
563	.spad = XEON_B2B_SPAD_OFFSET,
564};
565
566static const struct ntb_xlat_reg xeon_sec_xlat = {
567	.bar0_base = XEON_SBAR0BASE_OFFSET,
568	.bar2_base = XEON_SBAR2BASE_OFFSET,
569	.bar4_base = XEON_SBAR4BASE_OFFSET,
570	.bar5_base = XEON_SBAR5BASE_OFFSET,
571
572	.bar2_limit = XEON_SBAR2LMT_OFFSET,
573	.bar4_limit = XEON_SBAR4LMT_OFFSET,
574	.bar5_limit = XEON_SBAR5LMT_OFFSET,
575
576	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
577	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
578	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
579};
580
581static struct ntb_b2b_addr xeon_b2b_usd_addr = {
582	.bar0_addr = XEON_B2B_BAR0_ADDR,
583	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
584	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
585	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
586	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
587};
588
589static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
590	.bar0_addr = XEON_B2B_BAR0_ADDR,
591	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
592	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
593	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
594	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
595};
596
597static const struct ntb_reg xeon_gen3_reg = {
598	.ntb_ctl = XEON_GEN3_REG_IMNTB_CTRL,
599	.lnk_sta = XEON_GEN3_INT_LNK_STS_OFFSET,
600	.db_size = sizeof(uint32_t),
601	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
602};
603
604static const struct ntb_alt_reg xeon_gen3_pri_reg = {
605	.db_bell = XEON_GEN3_REG_EMDOORBELL,
606	.db_mask = XEON_GEN3_REG_IMINT_DISABLE,
607	.spad = XEON_GEN3_REG_IMSPAD,
608};
609
610static const struct ntb_alt_reg xeon_gen3_b2b_reg = {
611	.db_bell = XEON_GEN3_REG_IMDOORBELL,
612	.db_mask = XEON_GEN3_REG_EMINT_DISABLE,
613	.spad = XEON_GEN3_REG_IMB2B_SSPAD,
614};
615
616static const struct ntb_xlat_reg xeon_gen3_sec_xlat = {
617	.bar0_base = XEON_GEN3_EXT_REG_BAR0BASE,
618	.bar2_base = XEON_GEN3_EXT_REG_BAR1BASE,
619	.bar4_base = XEON_GEN3_EXT_REG_BAR2BASE,
620
621	.bar2_limit = XEON_GEN3_REG_IMBAR1XLIMIT,
622	.bar4_limit = XEON_GEN3_REG_IMBAR2XLIMIT,
623
624	.bar2_xlat = XEON_GEN3_REG_IMBAR1XBASE,
625	.bar4_xlat = XEON_GEN3_REG_IMBAR2XBASE,
626};
627
628SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
629    "B2B MW segment overrides -- MUST be the same on both sides");
630
631SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
632    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
633    "hardware, use this 64-bit address on the bus between the NTB devices for "
634    "the window at BAR2, on the upstream side of the link.  MUST be the same "
635    "address on both sides.");
636SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
637    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
638SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
639    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
640    "(split-BAR mode).");
641SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
642    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
643    "(split-BAR mode).");
644
645SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
646    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
647    "hardware, use this 64-bit address on the bus between the NTB devices for "
648    "the window at BAR2, on the downstream side of the link.  MUST be the same"
649    " address on both sides.");
650SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
651    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
652SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
653    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
654    "(split-BAR mode).");
655SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
656    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
657    "(split-BAR mode).");
658
659/*
660 * OS <-> Driver interface structures
661 */
662MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
663
664/*
665 * OS <-> Driver linkage functions
666 */
667static int
668intel_ntb_probe(device_t device)
669{
670	struct ntb_hw_info *p;
671
672	p = intel_ntb_get_device_info(pci_get_devid(device));
673	if (p == NULL)
674		return (ENXIO);
675
676	device_set_desc(device, p->desc);
677	return (0);
678}
679
680static int
681intel_ntb_attach(device_t device)
682{
683	struct ntb_softc *ntb;
684	struct ntb_hw_info *p;
685	int error;
686
687	ntb = device_get_softc(device);
688	p = intel_ntb_get_device_info(pci_get_devid(device));
689
690	ntb->device = device;
691	ntb->type = p->type;
692	ntb->features = p->features;
693	ntb->b2b_mw_idx = B2B_MW_DISABLED;
694	ntb->msix_mw_idx = B2B_MW_DISABLED;
695
696	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
697	callout_init(&ntb->heartbeat_timer, 1);
698	callout_init(&ntb->lr_timer, 1);
699	callout_init(&ntb->peer_msix_work, 1);
700	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
701
702	if (ntb->type == NTB_ATOM)
703		error = intel_ntb_detect_atom(ntb);
704	else if (ntb->type == NTB_XEON_GEN3)
705		error = intel_ntb_detect_xeon_gen3(ntb);
706	else
707		error = intel_ntb_detect_xeon(ntb);
708	if (error != 0)
709		goto out;
710
711	intel_ntb_detect_max_mw(ntb);
712
713	pci_enable_busmaster(ntb->device);
714
715	error = intel_ntb_map_pci_bars(ntb);
716	if (error != 0)
717		goto out;
718	if (ntb->type == NTB_ATOM)
719		error = intel_ntb_atom_init_dev(ntb);
720	else if (ntb->type == NTB_XEON_GEN3)
721		error = intel_ntb_xeon_gen3_init_dev(ntb);
722	else
723		error = intel_ntb_xeon_init_dev(ntb);
724	if (error != 0)
725		goto out;
726
727	intel_ntb_spad_clear(device);
728
729	intel_ntb_poll_link(ntb);
730
731	intel_ntb_sysctl_init(ntb);
732
733	/* Attach children to this controller */
734	error = ntb_register_device(device);
735
736out:
737	if (error != 0)
738		intel_ntb_detach(device);
739	return (error);
740}
741
742static int
743intel_ntb_detach(device_t device)
744{
745	struct ntb_softc *ntb;
746
747	ntb = device_get_softc(device);
748
749	/* Detach & delete all children */
750	ntb_unregister_device(device);
751
752	if (ntb->self_reg != NULL) {
753		DB_MASK_LOCK(ntb);
754		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
755		DB_MASK_UNLOCK(ntb);
756	}
757	callout_drain(&ntb->heartbeat_timer);
758	callout_drain(&ntb->lr_timer);
759	callout_drain(&ntb->peer_msix_work);
760	pci_disable_busmaster(ntb->device);
761	if (ntb->type == NTB_XEON_GEN1)
762		intel_ntb_teardown_xeon(ntb);
763	intel_ntb_teardown_interrupts(ntb);
764
765	mtx_destroy(&ntb->db_mask_lock);
766
767	intel_ntb_unmap_pci_bar(ntb);
768
769	return (0);
770}
771
772/*
773 * Driver internal routines
774 */
775static inline enum ntb_bar
776intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
777{
778
779	KASSERT(mw < ntb->mw_count,
780	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
781	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
782
783	return (ntb->reg->mw_bar[mw]);
784}
785
786static inline bool
787bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
788{
789	/* XXX This assertion could be stronger. */
790	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
791	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
792}
793
794static inline void
795bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
796    uint32_t *xlat, uint32_t *lmt)
797{
798	uint32_t basev, lmtv, xlatv;
799
800	switch (bar) {
801	case NTB_B2B_BAR_1:
802		basev = ntb->xlat_reg->bar2_base;
803		lmtv = ntb->xlat_reg->bar2_limit;
804		xlatv = ntb->xlat_reg->bar2_xlat;
805		break;
806	case NTB_B2B_BAR_2:
807		basev = ntb->xlat_reg->bar4_base;
808		lmtv = ntb->xlat_reg->bar4_limit;
809		xlatv = ntb->xlat_reg->bar4_xlat;
810		break;
811	case NTB_B2B_BAR_3:
812		basev = ntb->xlat_reg->bar5_base;
813		lmtv = ntb->xlat_reg->bar5_limit;
814		xlatv = ntb->xlat_reg->bar5_xlat;
815		break;
816	default:
817		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
818		    ("bad bar"));
819		basev = lmtv = xlatv = 0;
820		break;
821	}
822
823	if (base != NULL)
824		*base = basev;
825	if (xlat != NULL)
826		*xlat = xlatv;
827	if (lmt != NULL)
828		*lmt = lmtv;
829}
830
831static int
832intel_ntb_map_pci_bars(struct ntb_softc *ntb)
833{
834	struct ntb_pci_bar_info *bar;
835	int rc;
836
837	bar = &ntb->bar_info[NTB_CONFIG_BAR];
838	bar->pci_resource_id = PCIR_BAR(0);
839	rc = map_mmr_bar(ntb, bar);
840	if (rc != 0)
841		goto out;
842
843	/*
844	 * At least on Xeon v4 NTB device leaks to host some remote side
845	 * BAR0 writes supposed to update scratchpad registers.  I am not
846	 * sure why it happens, but it may be related to the fact that
847	 * on a link side BAR0 is 32KB, while on a host side it is 64KB.
848	 * Without this hack DMAR blocks those accesses as not allowed.
849	 */
850	if (bus_dma_tag_create(bus_get_dma_tag(ntb->device), 1, 0,
851	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
852	    bar->size, 1, bar->size, 0, NULL, NULL, &ntb->bar0_dma_tag)) {
853		device_printf(ntb->device, "Unable to create BAR0 tag\n");
854		return (ENOMEM);
855	}
856	if (bus_dmamap_create(ntb->bar0_dma_tag, 0, &ntb->bar0_dma_map)) {
857		device_printf(ntb->device, "Unable to create BAR0 map\n");
858		return (ENOMEM);
859	}
860	if (bus_dma_dmar_load_ident(ntb->bar0_dma_tag, ntb->bar0_dma_map,
861	    bar->pbase, bar->size, 0)) {
862		device_printf(ntb->device, "Unable to load BAR0 map\n");
863		return (ENOMEM);
864	}
865
866	bar = &ntb->bar_info[NTB_B2B_BAR_1];
867	bar->pci_resource_id = PCIR_BAR(2);
868	rc = map_memory_window_bar(ntb, bar);
869	if (rc != 0)
870		goto out;
871	if (ntb->type == NTB_XEON_GEN3) {
872		bar->psz_off = XEON_GEN3_INT_REG_IMBAR1SZ;
873		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR1SZ;
874		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR1XBASE;
875	} else {
876		bar->psz_off = XEON_PBAR23SZ_OFFSET;
877		bar->ssz_off = XEON_SBAR23SZ_OFFSET;
878		bar->pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
879	}
880
881	bar = &ntb->bar_info[NTB_B2B_BAR_2];
882	bar->pci_resource_id = PCIR_BAR(4);
883	rc = map_memory_window_bar(ntb, bar);
884	if (rc != 0)
885		goto out;
886	if (ntb->type == NTB_XEON_GEN3) {
887		bar->psz_off = XEON_GEN3_INT_REG_IMBAR2SZ;
888		bar->ssz_off = XEON_GEN3_INT_REG_EMBAR2SZ;
889		bar->pbarxlat_off = XEON_GEN3_REG_EMBAR2XBASE;
890	} else {
891		bar->psz_off = XEON_PBAR4SZ_OFFSET;
892		bar->ssz_off = XEON_SBAR4SZ_OFFSET;
893		bar->pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
894	}
895
896	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
897		goto out;
898
899	if (ntb->type == NTB_XEON_GEN3) {
900		device_printf(ntb->device, "no split bar support\n");
901		return (ENXIO);
902	}
903
904	bar = &ntb->bar_info[NTB_B2B_BAR_3];
905	bar->pci_resource_id = PCIR_BAR(5);
906	rc = map_memory_window_bar(ntb, bar);
907	bar->psz_off = XEON_PBAR5SZ_OFFSET;
908	bar->ssz_off = XEON_SBAR5SZ_OFFSET;
909	bar->pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
910
911out:
912	if (rc != 0)
913		device_printf(ntb->device,
914		    "unable to allocate pci resource\n");
915	return (rc);
916}
917
918static void
919print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
920    const char *kind)
921{
922
923	device_printf(ntb->device,
924	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
925	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
926	    (char *)bar->vbase + bar->size - 1,
927	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
928	    (uintmax_t)bar->size, kind);
929}
930
931static int
932map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
933{
934
935	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
936	    &bar->pci_resource_id, RF_ACTIVE);
937	if (bar->pci_resource == NULL)
938		return (ENXIO);
939
940	save_bar_parameters(bar);
941	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
942	print_map_success(ntb, bar, "mmr");
943	return (0);
944}
945
946static int
947map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
948{
949	int rc;
950	vm_memattr_t mapmode;
951	uint8_t bar_size_bits = 0;
952
953	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
954	    &bar->pci_resource_id, RF_ACTIVE);
955
956	if (bar->pci_resource == NULL)
957		return (ENXIO);
958
959	save_bar_parameters(bar);
960	/*
961	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
962	 * hardware issue. To work around this, query the size it should be
963	 * configured to by the device and modify the resource to correspond to
964	 * this new size. The BIOS on systems with this problem is required to
965	 * provide enough address space to allow the driver to make this change
966	 * safely.
967	 *
968	 * Ideally I could have just specified the size when I allocated the
969	 * resource like:
970	 *  bus_alloc_resource(ntb->device,
971	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
972	 *	1ul << bar_size_bits, RF_ACTIVE);
973	 * but the PCI driver does not honor the size in this call, so we have
974	 * to modify it after the fact.
975	 */
976	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
977		if (bar->pci_resource_id == PCIR_BAR(2))
978			bar_size_bits = pci_read_config(ntb->device,
979			    XEON_PBAR23SZ_OFFSET, 1);
980		else
981			bar_size_bits = pci_read_config(ntb->device,
982			    XEON_PBAR45SZ_OFFSET, 1);
983
984		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
985		    bar->pci_resource, bar->pbase,
986		    bar->pbase + (1ul << bar_size_bits) - 1);
987		if (rc != 0) {
988			device_printf(ntb->device,
989			    "unable to resize bar\n");
990			return (rc);
991		}
992
993		save_bar_parameters(bar);
994	}
995
996	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
997	print_map_success(ntb, bar, "mw");
998
999	/*
1000	 * Optionally, mark MW BARs as anything other than UC to improve
1001	 * performance.
1002	 */
1003	mapmode = intel_ntb_pat_flags();
1004	if (mapmode == bar->map_mode)
1005		return (0);
1006
1007	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
1008	if (rc == 0) {
1009		bar->map_mode = mapmode;
1010		device_printf(ntb->device,
1011		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
1012		    "%s.\n",
1013		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
1014		    (char *)bar->vbase + bar->size - 1,
1015		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
1016		    intel_ntb_vm_memattr_to_str(mapmode));
1017	} else
1018		device_printf(ntb->device,
1019		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
1020		    "%s: %d\n",
1021		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
1022		    (char *)bar->vbase + bar->size - 1,
1023		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
1024		    intel_ntb_vm_memattr_to_str(mapmode), rc);
1025		/* Proceed anyway */
1026	return (0);
1027}
1028
1029static void
1030intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
1031{
1032	struct ntb_pci_bar_info *bar;
1033	int i;
1034
1035	if (ntb->bar0_dma_map != NULL) {
1036		bus_dmamap_unload(ntb->bar0_dma_tag, ntb->bar0_dma_map);
1037		bus_dmamap_destroy(ntb->bar0_dma_tag, ntb->bar0_dma_map);
1038	}
1039	if (ntb->bar0_dma_tag != NULL)
1040		bus_dma_tag_destroy(ntb->bar0_dma_tag);
1041	for (i = 0; i < NTB_MAX_BARS; i++) {
1042		bar = &ntb->bar_info[i];
1043		if (bar->pci_resource != NULL)
1044			bus_release_resource(ntb->device, SYS_RES_MEMORY,
1045			    bar->pci_resource_id, bar->pci_resource);
1046	}
1047}
1048
1049static int
1050intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
1051{
1052	uint32_t i;
1053	int rc;
1054
1055	for (i = 0; i < num_vectors; i++) {
1056		ntb->int_info[i].rid = i + 1;
1057		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
1058		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
1059		if (ntb->int_info[i].res == NULL) {
1060			device_printf(ntb->device,
1061			    "bus_alloc_resource failed\n");
1062			return (ENOMEM);
1063		}
1064		ntb->int_info[i].tag = NULL;
1065		ntb->allocated_interrupts++;
1066		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
1067		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
1068		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
1069		if (rc != 0) {
1070			device_printf(ntb->device, "bus_setup_intr failed\n");
1071			return (ENXIO);
1072		}
1073	}
1074	return (0);
1075}
1076
1077/*
1078 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
1079 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
1080 * should be okay.  This tunable should enable us to test that hypothesis
1081 * when someone gets their hands on some Xeon hardware.
1082 */
1083static int ntb_force_remap_mode;
1084SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
1085    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
1086    " to a smaller number of ithreads, even if the desired number are "
1087    "available");
1088
1089/*
1090 * In case it is NOT ok, give consumers an abort button.
1091 */
1092static int ntb_prefer_intx;
1093SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
1094    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
1095    "than remapping MSI-X messages over available slots (match Linux driver "
1096    "behavior)");
1097
1098/*
1099 * Remap the desired number of MSI-X messages to available ithreads in a simple
1100 * round-robin fashion.
1101 */
1102static int
1103intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1104{
1105	u_int *vectors;
1106	uint32_t i;
1107	int rc;
1108
1109	if (ntb_prefer_intx != 0)
1110		return (ENXIO);
1111
1112	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1113
1114	for (i = 0; i < desired; i++)
1115		vectors[i] = (i % avail) + 1;
1116
1117	rc = pci_remap_msix(dev, desired, vectors);
1118	free(vectors, M_NTB);
1119	return (rc);
1120}
1121
1122static int
1123intel_ntb_xeon_gen3_init_isr(struct ntb_softc *ntb)
1124{
1125	uint64_t i, reg;
1126	uint32_t desired_vectors, num_vectors;
1127	int rc;
1128
1129	ntb->allocated_interrupts = 0;
1130	ntb->last_ts = ticks;
1131
1132	/* Mask all the interrupts, including hardware interrupt */
1133	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, ~0ULL);
1134
1135	/* Clear Interrupt Status */
1136	reg = intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS);
1137	intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS, reg);
1138
1139	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1140	    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
1141
1142	rc = pci_alloc_msix(ntb->device, &num_vectors);
1143	if (rc != 0) {
1144		device_printf(ntb->device,
1145		    "Interrupt allocation failed %d\n", rc);
1146		return (rc);
1147	}
1148	if (desired_vectors != num_vectors) {
1149		device_printf(ntb->device, "Couldn't get %d vectors\n",
1150		    XEON_GEN3_DB_MSIX_VECTOR_COUNT);
1151		return (ENXIO);
1152	}
1153	/* 32 db + 1 hardware */
1154	if (num_vectors == XEON_GEN3_DB_MSIX_VECTOR_COUNT) {
1155		/* Program INTVECXX source register */
1156		for (i = 0; i < XEON_GEN3_DB_MSIX_VECTOR_COUNT; i++) {
1157			/* interrupt source i for vector i */
1158			intel_ntb_reg_write(1, XEON_GEN3_REG_IMINTVEC00 + i, i);
1159			if (i == (XEON_GEN3_DB_MSIX_VECTOR_COUNT - 1)) {
1160				intel_ntb_reg_write(1,
1161				    XEON_GEN3_REG_IMINTVEC00 + i,
1162				    XEON_GEN3_LINK_VECTOR_INDEX);
1163			}
1164		}
1165
1166		intel_ntb_create_msix_vec(ntb, num_vectors);
1167		rc = intel_ntb_setup_msix(ntb, num_vectors);
1168
1169		/* enable all interrupts */
1170		intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_DISABLE, 0ULL);
1171	} else {
1172		device_printf(ntb->device, "need to remap interrupts, giving up.\n");
1173		return (ENXIO);
1174	}
1175
1176	return (0);
1177}
1178
1179static int
1180intel_ntb_init_isr(struct ntb_softc *ntb)
1181{
1182	uint32_t desired_vectors, num_vectors;
1183	int rc;
1184
1185	ntb->allocated_interrupts = 0;
1186	ntb->last_ts = ticks;
1187
1188	/*
1189	 * Mask all doorbell interrupts.  (Except link events!)
1190	 */
1191	DB_MASK_LOCK(ntb);
1192	ntb->db_mask = ntb->db_valid_mask;
1193	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1194	DB_MASK_UNLOCK(ntb);
1195
1196	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1197	    ntb->db_count);
1198	if (desired_vectors >= 1) {
1199		rc = pci_alloc_msix(ntb->device, &num_vectors);
1200
1201		if (ntb_force_remap_mode != 0 && rc == 0 &&
1202		    num_vectors == desired_vectors)
1203			num_vectors--;
1204
1205		if (rc == 0 && num_vectors < desired_vectors) {
1206			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1207			    num_vectors);
1208			if (rc == 0)
1209				num_vectors = desired_vectors;
1210			else
1211				pci_release_msi(ntb->device);
1212		}
1213		if (rc != 0)
1214			num_vectors = 1;
1215	} else
1216		num_vectors = 1;
1217
1218	if (ntb->type == NTB_XEON_GEN1 && num_vectors < ntb->db_vec_count) {
1219		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1220			device_printf(ntb->device,
1221			    "Errata workaround does not support MSI or INTX\n");
1222			return (EINVAL);
1223		}
1224
1225		ntb->db_vec_count = 1;
1226		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1227		rc = intel_ntb_setup_legacy_interrupt(ntb);
1228	} else {
1229		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1230		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1231			device_printf(ntb->device,
1232			    "Errata workaround expects %d doorbell bits\n",
1233			    XEON_NONLINK_DB_MSIX_BITS);
1234			return (EINVAL);
1235		}
1236
1237		intel_ntb_create_msix_vec(ntb, num_vectors);
1238		rc = intel_ntb_setup_msix(ntb, num_vectors);
1239	}
1240	if (rc != 0) {
1241		device_printf(ntb->device,
1242		    "Error allocating interrupts: %d\n", rc);
1243		intel_ntb_free_msix_vec(ntb);
1244	}
1245
1246	return (rc);
1247}
1248
1249static int
1250intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1251{
1252	int rc;
1253
1254	ntb->int_info[0].rid = 0;
1255	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1256	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1257	if (ntb->int_info[0].res == NULL) {
1258		device_printf(ntb->device, "bus_alloc_resource failed\n");
1259		return (ENOMEM);
1260	}
1261
1262	ntb->int_info[0].tag = NULL;
1263	ntb->allocated_interrupts = 1;
1264
1265	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1266	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1267	    ntb, &ntb->int_info[0].tag);
1268	if (rc != 0) {
1269		device_printf(ntb->device, "bus_setup_intr failed\n");
1270		return (ENXIO);
1271	}
1272
1273	return (0);
1274}
1275
1276static void
1277intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1278{
1279	struct ntb_int_info *current_int;
1280	int i;
1281
1282	for (i = 0; i < ntb->allocated_interrupts; i++) {
1283		current_int = &ntb->int_info[i];
1284		if (current_int->tag != NULL)
1285			bus_teardown_intr(ntb->device, current_int->res,
1286			    current_int->tag);
1287
1288		if (current_int->res != NULL)
1289			bus_release_resource(ntb->device, SYS_RES_IRQ,
1290			    rman_get_rid(current_int->res), current_int->res);
1291	}
1292
1293	intel_ntb_free_msix_vec(ntb);
1294	pci_release_msi(ntb->device);
1295}
1296
1297static inline uint64_t
1298db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1299{
1300
1301	switch (ntb->type) {
1302	case NTB_ATOM:
1303	case NTB_XEON_GEN3:
1304		return (intel_ntb_reg_read(8, regoff));
1305	case NTB_XEON_GEN1:
1306		return (intel_ntb_reg_read(2, regoff));
1307	}
1308	__assert_unreachable();
1309}
1310
1311static inline void
1312db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1313{
1314
1315	KASSERT((val & ~ntb->db_valid_mask) == 0,
1316	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1317	     (uintmax_t)(val & ~ntb->db_valid_mask),
1318	     (uintmax_t)ntb->db_valid_mask));
1319
1320	if (regoff == ntb->self_reg->db_mask)
1321		DB_MASK_ASSERT(ntb, MA_OWNED);
1322	db_iowrite_raw(ntb, regoff, val);
1323}
1324
1325static inline void
1326db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1327{
1328
1329	switch (ntb->type) {
1330	case NTB_ATOM:
1331	case NTB_XEON_GEN3:
1332		intel_ntb_reg_write(8, regoff, val);
1333		break;
1334	case NTB_XEON_GEN1:
1335		intel_ntb_reg_write(2, regoff, (uint16_t)val);
1336		break;
1337	}
1338}
1339
1340static void
1341intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1342{
1343	struct ntb_softc *ntb = device_get_softc(dev);
1344
1345	DB_MASK_LOCK(ntb);
1346	ntb->db_mask |= bits;
1347	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1348		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1349	DB_MASK_UNLOCK(ntb);
1350}
1351
1352static void
1353intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1354{
1355	struct ntb_softc *ntb = device_get_softc(dev);
1356	uint64_t ibits;
1357	int i;
1358
1359	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1360	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1361	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1362	     (uintmax_t)ntb->db_valid_mask));
1363
1364	DB_MASK_LOCK(ntb);
1365	ibits = ntb->fake_db & ntb->db_mask & bits;
1366	ntb->db_mask &= ~bits;
1367	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1368		/* Simulate fake interrupts if unmasked DB bits are set. */
1369		ntb->force_db |= ibits;
1370		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1371			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1372				swi_sched(ntb->int_info[i].tag, 0);
1373		}
1374	} else {
1375		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1376	}
1377	DB_MASK_UNLOCK(ntb);
1378}
1379
1380static uint64_t
1381intel_ntb_db_read(device_t dev)
1382{
1383	struct ntb_softc *ntb = device_get_softc(dev);
1384
1385	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1386		return (ntb->fake_db);
1387	if (ntb->type == NTB_XEON_GEN3)
1388		return (intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
1389	else
1390		return (db_ioread(ntb, ntb->self_reg->db_bell));
1391}
1392
1393static void
1394intel_ntb_db_clear(device_t dev, uint64_t bits)
1395{
1396	struct ntb_softc *ntb = device_get_softc(dev);
1397
1398	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1399	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1400	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1401	     (uintmax_t)ntb->db_valid_mask));
1402
1403	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1404		DB_MASK_LOCK(ntb);
1405		ntb->fake_db &= ~bits;
1406		DB_MASK_UNLOCK(ntb);
1407		return;
1408	}
1409
1410	if (ntb->type == NTB_XEON_GEN3)
1411		intel_ntb_reg_write(4, XEON_GEN3_REG_IMINT_STATUS,
1412		    (uint32_t)bits);
1413	else
1414		db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1415}
1416
1417static inline uint64_t
1418intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1419{
1420	uint64_t shift, mask;
1421
1422	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1423		/*
1424		 * Remap vectors in custom way to make at least first
1425		 * three doorbells to not generate stray events.
1426		 * This breaks Linux compatibility (if one existed)
1427		 * when more then one DB is used (not by if_ntb).
1428		 */
1429		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1430			return (1 << db_vector);
1431		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1432			return (0x7ffc);
1433	}
1434
1435	shift = ntb->db_vec_shift;
1436	mask = (1ull << shift) - 1;
1437	return (mask << (shift * db_vector));
1438}
1439
1440static void
1441intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1442{
1443	uint64_t vec_mask;
1444
1445	ntb->last_ts = ticks;
1446	vec_mask = intel_ntb_vec_mask(ntb, vec);
1447
1448	if (ntb->type == NTB_XEON_GEN3 && vec == XEON_GEN3_LINK_VECTOR_INDEX)
1449		vec_mask |= ntb->db_link_mask;
1450	if ((vec_mask & ntb->db_link_mask) != 0) {
1451		if (intel_ntb_poll_link(ntb))
1452			ntb_link_event(ntb->device);
1453		if (ntb->type == NTB_XEON_GEN3)
1454			intel_ntb_reg_write(8, XEON_GEN3_REG_IMINT_STATUS,
1455			    intel_ntb_reg_read(8, XEON_GEN3_REG_IMINT_STATUS));
1456	}
1457
1458	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1459	    (vec_mask & ntb->db_link_mask) == 0) {
1460		DB_MASK_LOCK(ntb);
1461
1462		/*
1463		 * Do not report same DB events again if not cleared yet,
1464		 * unless the mask was just cleared for them and this
1465		 * interrupt handler call can be the consequence of it.
1466		 */
1467		vec_mask &= ~ntb->fake_db | ntb->force_db;
1468		ntb->force_db &= ~vec_mask;
1469
1470		/* Update our internal doorbell register. */
1471		ntb->fake_db |= vec_mask;
1472
1473		/* Do not report masked DB events. */
1474		vec_mask &= ~ntb->db_mask;
1475
1476		DB_MASK_UNLOCK(ntb);
1477	}
1478
1479	if ((vec_mask & ntb->db_valid_mask) != 0)
1480		ntb_db_event(ntb->device, vec);
1481}
1482
1483static void
1484ndev_vec_isr(void *arg)
1485{
1486	struct ntb_vec *nvec = arg;
1487
1488	intel_ntb_interrupt(nvec->ntb, nvec->num);
1489}
1490
1491static void
1492ndev_irq_isr(void *arg)
1493{
1494	/* If we couldn't set up MSI-X, we only have the one vector. */
1495	intel_ntb_interrupt(arg, 0);
1496}
1497
1498static int
1499intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1500{
1501	uint32_t i;
1502
1503	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1504	    M_ZERO | M_WAITOK);
1505	for (i = 0; i < num_vectors; i++) {
1506		ntb->msix_vec[i].num = i;
1507		ntb->msix_vec[i].ntb = ntb;
1508	}
1509
1510	return (0);
1511}
1512
1513static void
1514intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1515{
1516
1517	if (ntb->msix_vec == NULL)
1518		return;
1519
1520	free(ntb->msix_vec, M_NTB);
1521	ntb->msix_vec = NULL;
1522}
1523
1524static void
1525intel_ntb_get_msix_info(struct ntb_softc *ntb)
1526{
1527	struct pci_devinfo *dinfo;
1528	struct pcicfg_msix *msix;
1529	uint32_t laddr, data, i, offset;
1530
1531	dinfo = device_get_ivars(ntb->device);
1532	msix = &dinfo->cfg.msix;
1533
1534	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1535
1536	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1537		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1538
1539		laddr = bus_read_4(msix->msix_table_res, offset +
1540		    PCI_MSIX_ENTRY_LOWER_ADDR);
1541		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1542
1543		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1544		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1545		     MSI_INTEL_ADDR_BASE));
1546		ntb->msix_data[i].nmd_ofs = laddr;
1547
1548		data = bus_read_4(msix->msix_table_res, offset +
1549		    PCI_MSIX_ENTRY_DATA);
1550		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1551
1552		ntb->msix_data[i].nmd_data = data;
1553	}
1554}
1555
1556static struct ntb_hw_info *
1557intel_ntb_get_device_info(uint32_t device_id)
1558{
1559	struct ntb_hw_info *ep;
1560
1561	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
1562		if (ep->device_id == device_id)
1563			return (ep);
1564	}
1565	return (NULL);
1566}
1567
1568static void
1569intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1570{
1571
1572	if (ntb->reg != NULL)
1573		intel_ntb_link_disable(ntb->device);
1574}
1575
1576static void
1577intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1578{
1579
1580	switch (ntb->type) {
1581	case NTB_ATOM:
1582		ntb->mw_count = ATOM_MW_COUNT;
1583		break;
1584	case NTB_XEON_GEN1:
1585		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1586			ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1587		else
1588			ntb->mw_count = XEON_SNB_MW_COUNT;
1589		break;
1590	case NTB_XEON_GEN3:
1591		if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1592			ntb->mw_count = XEON_GEN3_SPLIT_MW_COUNT;
1593		else
1594			ntb->mw_count = XEON_GEN3_MW_COUNT;
1595		break;
1596	}
1597}
1598
1599static int
1600intel_ntb_detect_xeon(struct ntb_softc *ntb)
1601{
1602	uint8_t ppd, conn_type;
1603
1604	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1605	ntb->ppd = ppd;
1606
1607	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1608		ntb->dev_type = NTB_DEV_DSD;
1609	else
1610		ntb->dev_type = NTB_DEV_USD;
1611
1612	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1613		ntb->features |= NTB_SPLIT_BAR;
1614
1615	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1616	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1617		device_printf(ntb->device,
1618		    "Can not apply SB01BASE_LOCKUP workaround "
1619		    "with split BARs disabled!\n");
1620		device_printf(ntb->device,
1621		    "Expect system hangs under heavy NTB traffic!\n");
1622		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1623	}
1624
1625	/*
1626	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1627	 * errata workaround; only do one at a time.
1628	 */
1629	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1630		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1631
1632	conn_type = ppd & XEON_PPD_CONN_TYPE;
1633	switch (conn_type) {
1634	case NTB_CONN_B2B:
1635		ntb->conn_type = conn_type;
1636		break;
1637	case NTB_CONN_RP:
1638	case NTB_CONN_TRANSPARENT:
1639	default:
1640		device_printf(ntb->device, "Unsupported connection type: %u\n",
1641		    (unsigned)conn_type);
1642		return (ENXIO);
1643	}
1644	return (0);
1645}
1646
1647static int
1648intel_ntb_detect_atom(struct ntb_softc *ntb)
1649{
1650	uint32_t ppd, conn_type;
1651
1652	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1653	ntb->ppd = ppd;
1654
1655	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1656		ntb->dev_type = NTB_DEV_DSD;
1657	else
1658		ntb->dev_type = NTB_DEV_USD;
1659
1660	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1661	switch (conn_type) {
1662	case NTB_CONN_B2B:
1663		ntb->conn_type = conn_type;
1664		break;
1665	default:
1666		device_printf(ntb->device, "Unsupported NTB configuration\n");
1667		return (ENXIO);
1668	}
1669	return (0);
1670}
1671
1672static int
1673intel_ntb_detect_xeon_gen3(struct ntb_softc *ntb)
1674{
1675	uint8_t ppd, conn_type;
1676
1677	ppd = pci_read_config(ntb->device, XEON_GEN3_INT_REG_PPD, 1);
1678	ntb->ppd = ppd;
1679
1680	/* check port definition */
1681	conn_type = XEON_GEN3_REG_PPD_PORT_DEF_F(ppd);
1682	switch (conn_type) {
1683	case NTB_CONN_B2B:
1684		ntb->conn_type = conn_type;
1685		break;
1686	default:
1687		device_printf(ntb->device, "Unsupported connection type: %u\n",
1688		    conn_type);
1689		return (ENXIO);
1690	}
1691
1692	/* check cross link configuration status */
1693	if (XEON_GEN3_REG_PPD_CONF_STS_F(ppd)) {
1694		/* NTB Port is configured as DSD/USP */
1695		ntb->dev_type = NTB_DEV_DSD;
1696	} else {
1697		/* NTB Port is configured as USD/DSP */
1698		ntb->dev_type = NTB_DEV_USD;
1699	}
1700
1701	if (XEON_GEN3_REG_PPD_ONE_MSIX_F(ppd)) {
1702		/*
1703		 * This bit when set, causes only a single MSI-X message to be
1704		 * generated if MSI-X is enabled.
1705		 */
1706		ntb->features |= NTB_ONE_MSIX;
1707	}
1708
1709	if (XEON_GEN3_REG_PPD_BAR45_SPL_F(ppd)) {
1710		/* BARs 4 and 5 are presented as two 32b non-prefetchable BARs */
1711		ntb->features |= NTB_SPLIT_BAR;
1712	}
1713
1714	device_printf(ntb->device, "conn type 0x%02x, dev type 0x%02x,"
1715	    "features 0x%02x\n", ntb->conn_type, ntb->dev_type, ntb->features);
1716
1717	return (0);
1718}
1719
1720static int
1721intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1722{
1723	int rc;
1724
1725	ntb->spad_count		= XEON_SPAD_COUNT;
1726	ntb->db_count		= XEON_DB_COUNT;
1727	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1728	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1729	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1730
1731	if (ntb->conn_type != NTB_CONN_B2B) {
1732		device_printf(ntb->device, "Connection type %d not supported\n",
1733		    ntb->conn_type);
1734		return (ENXIO);
1735	}
1736
1737	ntb->reg = &xeon_reg;
1738	ntb->self_reg = &xeon_pri_reg;
1739	ntb->peer_reg = &xeon_b2b_reg;
1740	ntb->xlat_reg = &xeon_sec_xlat;
1741
1742	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1743		ntb->force_db = ntb->fake_db = 0;
1744		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1745		    ntb->mw_count;
1746		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1747		    g_ntb_msix_idx, ntb->msix_mw_idx);
1748		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1749		    VM_MEMATTR_UNCACHEABLE);
1750		KASSERT(rc == 0, ("shouldn't fail"));
1751	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1752		/*
1753		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1754		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1755		 * which may hang the system.  To workaround this, use a memory
1756		 * window to access the interrupt and scratch pad registers on the
1757		 * remote system.
1758		 */
1759		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1760		    ntb->mw_count;
1761		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1762		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1763		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1764		    VM_MEMATTR_UNCACHEABLE);
1765		KASSERT(rc == 0, ("shouldn't fail"));
1766	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1767		/*
1768		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1769		 * mirrored to the remote system.  Shrink the number of bits by one,
1770		 * since bit 14 is the last bit.
1771		 *
1772		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1773		 * anyway.  Nor for non-B2B connection types.
1774		 */
1775		ntb->db_count = XEON_DB_COUNT - 1;
1776
1777	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1778
1779	if (ntb->dev_type == NTB_DEV_USD)
1780		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1781		    &xeon_b2b_usd_addr);
1782	else
1783		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1784		    &xeon_b2b_dsd_addr);
1785	if (rc != 0)
1786		return (rc);
1787
1788	/* Enable Bus Master and Memory Space on the secondary side */
1789	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1790	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1791
1792	/*
1793	 * Mask all doorbell interrupts.
1794	 */
1795	DB_MASK_LOCK(ntb);
1796	ntb->db_mask = ntb->db_valid_mask;
1797	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1798	DB_MASK_UNLOCK(ntb);
1799
1800	rc = intel_ntb_init_isr(ntb);
1801	return (rc);
1802}
1803
1804static int
1805intel_ntb_xeon_gen3_init_dev(struct ntb_softc *ntb)
1806{
1807	int rc;
1808
1809	ntb->spad_count = XEON_GEN3_SPAD_COUNT;
1810	ntb->db_count = XEON_GEN3_DB_COUNT;
1811	ntb->db_link_mask = XEON_GEN3_DB_LINK_BIT;
1812	ntb->db_vec_count = XEON_GEN3_DB_MSIX_VECTOR_COUNT;
1813	ntb->db_vec_shift = XEON_GEN3_DB_MSIX_VECTOR_SHIFT;
1814
1815	if (ntb->conn_type != NTB_CONN_B2B) {
1816		device_printf(ntb->device, "Connection type %d not supported\n",
1817		    ntb->conn_type);
1818		return (ENXIO);
1819	}
1820
1821	ntb->reg = &xeon_gen3_reg;
1822	ntb->self_reg = &xeon_gen3_pri_reg;
1823	ntb->peer_reg = &xeon_gen3_b2b_reg;
1824	ntb->xlat_reg = &xeon_gen3_sec_xlat;
1825
1826	ntb->db_valid_mask = (1ULL << ntb->db_count) - 1;
1827
1828	xeon_gen3_setup_b2b_mw(ntb);
1829
1830	/* Enable Bus Master and Memory Space on the External Side */
1831	intel_ntb_reg_write(2, XEON_GEN3_EXT_REG_PCI_CMD,
1832	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1833
1834	/* Setup Interrupt */
1835	rc = intel_ntb_xeon_gen3_init_isr(ntb);
1836
1837	return (rc);
1838}
1839
1840static int
1841intel_ntb_atom_init_dev(struct ntb_softc *ntb)
1842{
1843	int error;
1844
1845	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1846	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1847
1848	ntb->spad_count		 = ATOM_SPAD_COUNT;
1849	ntb->db_count		 = ATOM_DB_COUNT;
1850	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1851	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1852	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1853
1854	ntb->reg = &atom_reg;
1855	ntb->self_reg = &atom_pri_reg;
1856	ntb->peer_reg = &atom_b2b_reg;
1857	ntb->xlat_reg = &atom_sec_xlat;
1858
1859	/*
1860	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1861	 * resolved.  Mask transaction layer internal parity errors.
1862	 */
1863	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1864
1865	configure_atom_secondary_side_bars(ntb);
1866
1867	/* Enable Bus Master and Memory Space on the secondary side */
1868	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1869	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1870
1871	error = intel_ntb_init_isr(ntb);
1872	if (error != 0)
1873		return (error);
1874
1875	/* Initiate PCI-E link training */
1876	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1877
1878	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1879
1880	return (0);
1881}
1882
1883/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1884static void
1885configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1886{
1887
1888	if (ntb->dev_type == NTB_DEV_USD) {
1889		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1890		    XEON_B2B_BAR2_ADDR64);
1891		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1892		    XEON_B2B_BAR4_ADDR64);
1893		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1894		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1895	} else {
1896		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1897		    XEON_B2B_BAR2_ADDR64);
1898		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1899		    XEON_B2B_BAR4_ADDR64);
1900		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1901		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1902	}
1903}
1904
1905
1906/*
1907 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1908 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1909 * remains for use by a higher layer.
1910 *
1911 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1912 * MW size is sufficiently large.
1913 */
1914static unsigned int ntb_b2b_mw_share;
1915SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1916    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1917    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1918    "value here.");
1919
1920static void
1921xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1922    enum ntb_bar regbar)
1923{
1924	struct ntb_pci_bar_info *bar;
1925	uint8_t bar_sz;
1926
1927	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1928		return;
1929
1930	bar = &ntb->bar_info[idx];
1931	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1932	if (idx == regbar) {
1933		if (ntb->b2b_off != 0)
1934			bar_sz--;
1935		else
1936			bar_sz = 0;
1937	}
1938	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1939	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1940	(void)bar_sz;
1941}
1942
1943static void
1944xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1945    enum ntb_bar idx, enum ntb_bar regbar)
1946{
1947	uint64_t reg_val;
1948	uint32_t base_reg, lmt_reg;
1949
1950	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1951	if (idx == regbar) {
1952		if (ntb->b2b_off)
1953			bar_addr += ntb->b2b_off;
1954		else
1955			bar_addr = 0;
1956	}
1957
1958	if (!bar_is_64bit(ntb, idx)) {
1959		intel_ntb_reg_write(4, base_reg, bar_addr);
1960		reg_val = intel_ntb_reg_read(4, base_reg);
1961		(void)reg_val;
1962
1963		intel_ntb_reg_write(4, lmt_reg, bar_addr);
1964		reg_val = intel_ntb_reg_read(4, lmt_reg);
1965		(void)reg_val;
1966	} else {
1967		intel_ntb_reg_write(8, base_reg, bar_addr);
1968		reg_val = intel_ntb_reg_read(8, base_reg);
1969		(void)reg_val;
1970
1971		intel_ntb_reg_write(8, lmt_reg, bar_addr);
1972		reg_val = intel_ntb_reg_read(8, lmt_reg);
1973		(void)reg_val;
1974	}
1975}
1976
1977static void
1978xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1979{
1980	struct ntb_pci_bar_info *bar;
1981
1982	bar = &ntb->bar_info[idx];
1983	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1984		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1985		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
1986	} else {
1987		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1988		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
1989	}
1990	(void)base_addr;
1991}
1992
1993static int
1994xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1995    const struct ntb_b2b_addr *peer_addr)
1996{
1997	struct ntb_pci_bar_info *b2b_bar;
1998	vm_size_t bar_size;
1999	uint64_t bar_addr;
2000	enum ntb_bar b2b_bar_num, i;
2001
2002	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
2003		b2b_bar = NULL;
2004		b2b_bar_num = NTB_CONFIG_BAR;
2005		ntb->b2b_off = 0;
2006	} else {
2007		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
2008		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
2009		    ("invalid b2b mw bar"));
2010
2011		b2b_bar = &ntb->bar_info[b2b_bar_num];
2012		bar_size = b2b_bar->size;
2013
2014		if (ntb_b2b_mw_share != 0 &&
2015		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
2016			ntb->b2b_off = bar_size >> 1;
2017		else if (bar_size >= XEON_B2B_MIN_SIZE) {
2018			ntb->b2b_off = 0;
2019		} else {
2020			device_printf(ntb->device,
2021			    "B2B bar size is too small!\n");
2022			return (EIO);
2023		}
2024	}
2025
2026	/*
2027	 * Reset the secondary bar sizes to match the primary bar sizes.
2028	 * (Except, disable or halve the size of the B2B secondary bar.)
2029	 */
2030	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
2031		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
2032
2033	bar_addr = 0;
2034	if (b2b_bar_num == NTB_CONFIG_BAR)
2035		bar_addr = addr->bar0_addr;
2036	else if (b2b_bar_num == NTB_B2B_BAR_1)
2037		bar_addr = addr->bar2_addr64;
2038	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2039		bar_addr = addr->bar4_addr64;
2040	else if (b2b_bar_num == NTB_B2B_BAR_2)
2041		bar_addr = addr->bar4_addr32;
2042	else if (b2b_bar_num == NTB_B2B_BAR_3)
2043		bar_addr = addr->bar5_addr32;
2044	else
2045		KASSERT(false, ("invalid bar"));
2046
2047	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
2048
2049	/*
2050	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
2051	 * register BAR.  The B2B BAR is either disabled above or configured
2052	 * half-size.  It starts at PBAR xlat + offset.
2053	 *
2054	 * Also set up incoming BAR limits == base (zero length window).
2055	 */
2056	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
2057	    b2b_bar_num);
2058	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2059		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
2060		    NTB_B2B_BAR_2, b2b_bar_num);
2061		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
2062		    NTB_B2B_BAR_3, b2b_bar_num);
2063	} else
2064		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
2065		    NTB_B2B_BAR_2, b2b_bar_num);
2066
2067	/* Zero incoming translation addrs */
2068	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
2069	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
2070
2071	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2072		uint32_t xlat_reg, lmt_reg;
2073		enum ntb_bar bar_num;
2074
2075		/*
2076		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
2077		 * workaround
2078		 */
2079		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
2080		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
2081		if (bar_is_64bit(ntb, bar_num)) {
2082			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
2083			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
2084			intel_ntb_reg_write(8, lmt_reg, 0);
2085		} else {
2086			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
2087			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
2088			intel_ntb_reg_write(4, lmt_reg, 0);
2089		}
2090
2091		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
2092	}
2093	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
2094	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
2095
2096	/* Zero outgoing translation limits (whole bar size windows) */
2097	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
2098	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
2099
2100	/* Set outgoing translation offsets */
2101	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
2102	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2103		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
2104		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
2105	} else
2106		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
2107
2108	/* Set the translation offset for B2B registers */
2109	bar_addr = 0;
2110	if (b2b_bar_num == NTB_CONFIG_BAR)
2111		bar_addr = peer_addr->bar0_addr;
2112	else if (b2b_bar_num == NTB_B2B_BAR_1)
2113		bar_addr = peer_addr->bar2_addr64;
2114	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2115		bar_addr = peer_addr->bar4_addr64;
2116	else if (b2b_bar_num == NTB_B2B_BAR_2)
2117		bar_addr = peer_addr->bar4_addr32;
2118	else if (b2b_bar_num == NTB_B2B_BAR_3)
2119		bar_addr = peer_addr->bar5_addr32;
2120	else
2121		KASSERT(false, ("invalid bar"));
2122
2123	/*
2124	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
2125	 * at a time.
2126	 */
2127	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
2128	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
2129	return (0);
2130}
2131
2132static int
2133xeon_gen3_setup_b2b_mw(struct ntb_softc *ntb)
2134{
2135	uint64_t reg;
2136	uint32_t embarsz, imbarsz;
2137
2138	/* IMBAR1SZ should be equal to EMBAR1SZ */
2139	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR1SZ, 1);
2140	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR1SZ, 1);
2141	if (embarsz != imbarsz) {
2142		device_printf(ntb->device,
2143		    "IMBAR1SZ (%u) should be equal to EMBAR1SZ (%u)\n",
2144		    imbarsz, embarsz);
2145		return (EIO);
2146	}
2147
2148	/* IMBAR2SZ should be equal to EMBAR2SZ */
2149	embarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_EMBAR2SZ, 1);
2150	imbarsz = pci_read_config(ntb->device, XEON_GEN3_INT_REG_IMBAR2SZ, 1);
2151	if (embarsz != imbarsz) {
2152		device_printf(ntb->device,
2153		    "IMBAR2SZ (%u) should be equal to EMBAR2SZ (%u)\n",
2154		    imbarsz, embarsz);
2155		return (EIO);
2156	}
2157
2158	/* Client will provide the incoming IMBAR1/2XBASE, zero it for now */
2159	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XBASE, 0);
2160	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XBASE, 0);
2161
2162	/*
2163	 * If the value in EMBAR1LIMIT is set equal to the value in EMBAR1,
2164	 * the memory window for EMBAR1 is disabled.
2165	 * Note: It is needed to avoid malacious access.
2166	 */
2167	reg = pci_read_config(ntb->device, XEON_GEN3_EXT_REG_BAR1BASE, 8);
2168	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR1XLIMIT, reg);
2169
2170	reg = pci_read_config(ntb->device, XEON_GEN3_EXT_REG_BAR2BASE, 8);
2171	intel_ntb_reg_write(8, XEON_GEN3_REG_IMBAR2XLIMIT, reg);
2172
2173	return (0);
2174}
2175
2176static inline bool
2177_xeon_link_is_up(struct ntb_softc *ntb)
2178{
2179
2180	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2181		return (true);
2182	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
2183}
2184
2185static inline bool
2186link_is_up(struct ntb_softc *ntb)
2187{
2188
2189	if (ntb->type == NTB_XEON_GEN1 || ntb->type == NTB_XEON_GEN3)
2190		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
2191		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
2192
2193	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
2194	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
2195}
2196
2197static inline bool
2198atom_link_is_err(struct ntb_softc *ntb)
2199{
2200	uint32_t status;
2201
2202	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
2203
2204	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2205	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
2206		return (true);
2207
2208	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2209	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
2210}
2211
2212/* Atom does not have link status interrupt, poll on that platform */
2213static void
2214atom_link_hb(void *arg)
2215{
2216	struct ntb_softc *ntb = arg;
2217	sbintime_t timo, poll_ts;
2218
2219	timo = NTB_HB_TIMEOUT * hz;
2220	poll_ts = ntb->last_ts + timo;
2221
2222	/*
2223	 * Delay polling the link status if an interrupt was received, unless
2224	 * the cached link status says the link is down.
2225	 */
2226	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
2227		timo = poll_ts - ticks;
2228		goto out;
2229	}
2230
2231	if (intel_ntb_poll_link(ntb))
2232		ntb_link_event(ntb->device);
2233
2234	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
2235		/* Link is down with error, proceed with recovery */
2236		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
2237		return;
2238	}
2239
2240out:
2241	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
2242}
2243
2244static void
2245atom_perform_link_restart(struct ntb_softc *ntb)
2246{
2247	uint32_t status;
2248
2249	/* Driver resets the NTB ModPhy lanes - magic! */
2250	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
2251	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
2252	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
2253	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
2254
2255	/* Driver waits 100ms to allow the NTB ModPhy to settle */
2256	pause("ModPhy", hz / 10);
2257
2258	/* Clear AER Errors, write to clear */
2259	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
2260	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
2261	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
2262
2263	/* Clear unexpected electrical idle event in LTSSM, write to clear */
2264	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
2265	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
2266	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
2267
2268	/* Clear DeSkew Buffer error, write to clear */
2269	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
2270	status |= ATOM_DESKEWSTS_DBERR;
2271	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
2272
2273	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
2274	status &= ATOM_IBIST_ERR_OFLOW;
2275	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
2276
2277	/* Releases the NTB state machine to allow the link to retrain */
2278	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
2279	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
2280	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
2281}
2282
2283static int
2284intel_ntb_port_number(device_t dev)
2285{
2286	struct ntb_softc *ntb = device_get_softc(dev);
2287
2288	return (ntb->dev_type == NTB_DEV_USD ? 0 : 1);
2289}
2290
2291static int
2292intel_ntb_peer_port_count(device_t dev)
2293{
2294
2295	return (1);
2296}
2297
2298static int
2299intel_ntb_peer_port_number(device_t dev, int pidx)
2300{
2301	struct ntb_softc *ntb = device_get_softc(dev);
2302
2303	if (pidx != 0)
2304		return (-EINVAL);
2305
2306	return (ntb->dev_type == NTB_DEV_USD ? 1 : 0);
2307}
2308
2309static int
2310intel_ntb_peer_port_idx(device_t dev, int port)
2311{
2312	int peer_port;
2313
2314	peer_port = intel_ntb_peer_port_number(dev, 0);
2315	if (peer_port == -EINVAL || port != peer_port)
2316		return (-EINVAL);
2317
2318	return (0);
2319}
2320
2321static int
2322intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
2323    enum ntb_width width __unused)
2324{
2325	struct ntb_softc *ntb = device_get_softc(dev);
2326	uint32_t cntl;
2327
2328	intel_ntb_printf(2, "%s\n", __func__);
2329
2330	if (ntb->type == NTB_ATOM) {
2331		pci_write_config(ntb->device, NTB_PPD_OFFSET,
2332		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
2333		return (0);
2334	}
2335
2336	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2337		ntb_link_event(dev);
2338		return (0);
2339	}
2340
2341	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2342	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2343	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2344	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2345	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2346		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2347	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2348	return (0);
2349}
2350
2351static int
2352intel_ntb_link_disable(device_t dev)
2353{
2354	struct ntb_softc *ntb = device_get_softc(dev);
2355	uint32_t cntl;
2356
2357	intel_ntb_printf(2, "%s\n", __func__);
2358
2359	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2360		ntb_link_event(dev);
2361		return (0);
2362	}
2363
2364	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2365	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2366	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2367	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2368		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2369	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2370	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2371	return (0);
2372}
2373
2374static bool
2375intel_ntb_link_enabled(device_t dev)
2376{
2377	struct ntb_softc *ntb = device_get_softc(dev);
2378	uint32_t cntl;
2379
2380	if (ntb->type == NTB_ATOM) {
2381		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2382		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2383	}
2384
2385	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2386		return (true);
2387
2388	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2389	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2390}
2391
2392static void
2393recover_atom_link(void *arg)
2394{
2395	struct ntb_softc *ntb = arg;
2396	unsigned speed, width, oldspeed, oldwidth;
2397	uint32_t status32;
2398
2399	atom_perform_link_restart(ntb);
2400
2401	/*
2402	 * There is a potential race between the 2 NTB devices recovering at
2403	 * the same time.  If the times are the same, the link will not recover
2404	 * and the driver will be stuck in this loop forever.  Add a random
2405	 * interval to the recovery time to prevent this race.
2406	 */
2407	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2408	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2409
2410	if (atom_link_is_err(ntb))
2411		goto retry;
2412
2413	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2414	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2415		goto out;
2416
2417	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2418	width = NTB_LNK_STA_WIDTH(status32);
2419	speed = status32 & NTB_LINK_SPEED_MASK;
2420
2421	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2422	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2423	if (oldwidth != width || oldspeed != speed)
2424		goto retry;
2425
2426out:
2427	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2428	    ntb);
2429	return;
2430
2431retry:
2432	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2433	    ntb);
2434}
2435
2436/*
2437 * Polls the HW link status register(s); returns true if something has changed.
2438 */
2439static bool
2440intel_ntb_poll_link(struct ntb_softc *ntb)
2441{
2442	uint32_t ntb_cntl;
2443	uint16_t reg_val;
2444
2445	if (ntb->type == NTB_ATOM) {
2446		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2447		if (ntb_cntl == ntb->ntb_ctl)
2448			return (false);
2449
2450		ntb->ntb_ctl = ntb_cntl;
2451		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2452	} else {
2453		if (ntb->type == NTB_XEON_GEN1)
2454			db_iowrite_raw(ntb, ntb->self_reg->db_bell,
2455			    ntb->db_link_mask);
2456
2457		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2458		if (reg_val == ntb->lnk_sta)
2459			return (false);
2460
2461		ntb->lnk_sta = reg_val;
2462
2463		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2464			if (_xeon_link_is_up(ntb)) {
2465				if (!ntb->peer_msix_good) {
2466					callout_reset(&ntb->peer_msix_work, 0,
2467					    intel_ntb_exchange_msix, ntb);
2468					return (false);
2469				}
2470			} else {
2471				ntb->peer_msix_good = false;
2472				ntb->peer_msix_done = false;
2473			}
2474		}
2475	}
2476	return (true);
2477}
2478
2479static inline enum ntb_speed
2480intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2481{
2482
2483	if (!link_is_up(ntb))
2484		return (NTB_SPEED_NONE);
2485	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2486}
2487
2488static inline enum ntb_width
2489intel_ntb_link_sta_width(struct ntb_softc *ntb)
2490{
2491
2492	if (!link_is_up(ntb))
2493		return (NTB_WIDTH_NONE);
2494	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2495}
2496
2497SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2498    "Driver state, statistics, and HW registers");
2499
2500#define NTB_REGSZ_MASK	(3ul << 30)
2501#define NTB_REG_64	(1ul << 30)
2502#define NTB_REG_32	(2ul << 30)
2503#define NTB_REG_16	(3ul << 30)
2504#define NTB_REG_8	(0ul << 30)
2505
2506#define NTB_DB_READ	(1ul << 29)
2507#define NTB_PCI_REG	(1ul << 28)
2508#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2509
2510static void
2511intel_ntb_sysctl_init(struct ntb_softc *ntb)
2512{
2513	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2514	struct sysctl_ctx_list *ctx;
2515	struct sysctl_oid *tree, *tmptree;
2516
2517	ctx = device_get_sysctl_ctx(ntb->device);
2518	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2519
2520	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2521	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
2522	    sysctl_handle_link_status_human, "A",
2523	    "Link status (human readable)");
2524	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2525	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
2526	    "IU", "Link status (1=active, 0=inactive)");
2527	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2528	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
2529	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
2530
2531	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2532	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
2533	tree_par = SYSCTL_CHILDREN(tree);
2534
2535	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2536	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2537	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2538	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2539	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2540	    &ntb->ppd, 0, "Raw PPD register (cached)");
2541
2542	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2543		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2544		    &ntb->b2b_mw_idx, 0,
2545		    "Index of the MW used for B2B remote register access");
2546		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2547		    CTLFLAG_RD, &ntb->b2b_off,
2548		    "If non-zero, offset of B2B register region in shared MW");
2549	}
2550
2551	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2552	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2553	    "Features/errata of this NTB device");
2554
2555	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2556	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2557	    "NTB CTL register (cached)");
2558	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2559	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2560	    "LNK STA register (cached)");
2561
2562	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2563	    &ntb->mw_count, 0, "MW count");
2564	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2565	    &ntb->spad_count, 0, "Scratchpad count");
2566	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2567	    &ntb->db_count, 0, "Doorbell count");
2568	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2569	    &ntb->db_vec_count, 0, "Doorbell vector count");
2570	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2571	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2572
2573	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2574	    &ntb->db_valid_mask, "Doorbell valid mask");
2575	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2576	    &ntb->db_link_mask, "Doorbell link mask");
2577	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2578	    &ntb->db_mask, "Doorbell mask (cached)");
2579
2580	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2581	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2582	regpar = SYSCTL_CHILDREN(tmptree);
2583
2584	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2585	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2586	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2587	    "NTB Control register");
2588	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2589	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2590	    0x19c, sysctl_handle_register, "IU",
2591	    "NTB Link Capabilities");
2592	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2593	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2594	    0x1a0, sysctl_handle_register, "IU",
2595	    "NTB Link Control register");
2596
2597	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2598	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2599	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2600	    sysctl_handle_register, "QU", "Doorbell mask register");
2601	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2602	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2603	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2604	    sysctl_handle_register, "QU", "Doorbell register");
2605
2606	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2607	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2608	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2609	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2610	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2611		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2612		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2613		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2614		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2615		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2616		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2617		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2618		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2619	} else {
2620		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2621		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2622		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2623		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2624	}
2625
2626	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2627	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2628	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2629	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2630	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2631		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2632		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2633		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2634		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2635		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2636		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2637		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2638		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2639	} else {
2640		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2641		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2642		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2643		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2644	}
2645
2646	if (ntb->type == NTB_ATOM)
2647		return;
2648
2649	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2650	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2651	statpar = SYSCTL_CHILDREN(tmptree);
2652	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2653	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2654	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2655	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2656
2657	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2658	    CTLFLAG_RD, NULL, "Xeon HW errors");
2659	errpar = SYSCTL_CHILDREN(tmptree);
2660
2661	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2662	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2663	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2664	    sysctl_handle_register, "CU", "PPD");
2665
2666	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2667	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2668	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2669	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2670	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2671	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2672	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2673	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2674	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2675	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2676	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2677	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2678
2679	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2680	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2681	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2682	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2683	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2684	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2685	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2686	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2687	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2688	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2689	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2690	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2691
2692	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2693	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2694	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2695	    sysctl_handle_register, "SU", "DEVSTS");
2696	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2697	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2698	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2699	    sysctl_handle_register, "SU", "LNKSTS");
2700	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2701	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2702	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2703	    sysctl_handle_register, "SU", "SLNKSTS");
2704
2705	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2706	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2707	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2708	    sysctl_handle_register, "IU", "UNCERRSTS");
2709	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2710	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2711	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2712	    sysctl_handle_register, "IU", "CORERRSTS");
2713
2714	if (ntb->conn_type != NTB_CONN_B2B)
2715		return;
2716
2717	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01l",
2718	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2719	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETL,
2720	    sysctl_handle_register, "IU", "Outgoing XLAT0L register");
2721	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat01u",
2722	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2723	    NTB_REG_32 | XEON_B2B_XLAT_OFFSETU,
2724	    sysctl_handle_register, "IU", "Outgoing XLAT0U register");
2725	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2726	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2727	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2728	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2729	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2730		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2731		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2732		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2733		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2734		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2735		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2736		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2737		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2738	} else {
2739		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2740		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2741		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2742		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2743	}
2744
2745	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2746	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2747	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2748	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2749	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2750		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2751		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2752		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2753		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2754		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2755		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2756		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2757		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2758	} else {
2759		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2760		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2761		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2762		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2763	}
2764
2765	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2766	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2767	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2768	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2769	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2770	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2771	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2772	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2773	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2774		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2775		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2776		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2777		    sysctl_handle_register, "IU",
2778		    "Secondary BAR4 base register");
2779		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2780		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2781		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2782		    sysctl_handle_register, "IU",
2783		    "Secondary BAR5 base register");
2784	} else {
2785		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2786		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2787		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2788		    sysctl_handle_register, "QU",
2789		    "Secondary BAR45 base register");
2790	}
2791}
2792
2793static int
2794sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2795{
2796	struct ntb_softc *ntb = arg1;
2797	struct sbuf sb;
2798	int error;
2799
2800	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2801
2802	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2803	error = sbuf_finish(&sb);
2804	sbuf_delete(&sb);
2805
2806	if (error || !req->newptr)
2807		return (error);
2808	return (EINVAL);
2809}
2810
2811static int
2812sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2813{
2814	struct ntb_softc *ntb = arg1;
2815	unsigned old, new;
2816	int error;
2817
2818	old = intel_ntb_link_enabled(ntb->device);
2819
2820	error = SYSCTL_OUT(req, &old, sizeof(old));
2821	if (error != 0 || req->newptr == NULL)
2822		return (error);
2823
2824	error = SYSCTL_IN(req, &new, sizeof(new));
2825	if (error != 0)
2826		return (error);
2827
2828	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
2829	    (new != 0)? "en" : "dis");
2830
2831	if (new != 0)
2832		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2833	else
2834		error = intel_ntb_link_disable(ntb->device);
2835	return (error);
2836}
2837
2838static int
2839sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2840{
2841	struct ntb_softc *ntb = arg1;
2842	struct sbuf sb;
2843	enum ntb_speed speed;
2844	enum ntb_width width;
2845	int error;
2846
2847	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2848
2849	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
2850		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2851		    (unsigned)speed, (unsigned)width);
2852	else
2853		sbuf_printf(&sb, "down");
2854
2855	error = sbuf_finish(&sb);
2856	sbuf_delete(&sb);
2857
2858	if (error || !req->newptr)
2859		return (error);
2860	return (EINVAL);
2861}
2862
2863static int
2864sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2865{
2866	struct ntb_softc *ntb = arg1;
2867	unsigned res;
2868	int error;
2869
2870	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
2871
2872	error = SYSCTL_OUT(req, &res, sizeof(res));
2873	if (error || !req->newptr)
2874		return (error);
2875	return (EINVAL);
2876}
2877
2878static int
2879sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2880{
2881	struct ntb_softc *ntb;
2882	const void *outp;
2883	uintptr_t sz;
2884	uint64_t umv;
2885	char be[sizeof(umv)];
2886	size_t outsz;
2887	uint32_t reg;
2888	bool db, pci;
2889	int error;
2890
2891	ntb = arg1;
2892	reg = arg2 & ~NTB_REGFLAGS_MASK;
2893	sz = arg2 & NTB_REGSZ_MASK;
2894	db = (arg2 & NTB_DB_READ) != 0;
2895	pci = (arg2 & NTB_PCI_REG) != 0;
2896
2897	KASSERT(!(db && pci), ("bogus"));
2898
2899	if (db) {
2900		KASSERT(sz == NTB_REG_64, ("bogus"));
2901		umv = db_ioread(ntb, reg);
2902		outsz = sizeof(uint64_t);
2903	} else {
2904		switch (sz) {
2905		case NTB_REG_64:
2906			if (pci)
2907				umv = pci_read_config(ntb->device, reg, 8);
2908			else
2909				umv = intel_ntb_reg_read(8, reg);
2910			outsz = sizeof(uint64_t);
2911			break;
2912		case NTB_REG_32:
2913			if (pci)
2914				umv = pci_read_config(ntb->device, reg, 4);
2915			else
2916				umv = intel_ntb_reg_read(4, reg);
2917			outsz = sizeof(uint32_t);
2918			break;
2919		case NTB_REG_16:
2920			if (pci)
2921				umv = pci_read_config(ntb->device, reg, 2);
2922			else
2923				umv = intel_ntb_reg_read(2, reg);
2924			outsz = sizeof(uint16_t);
2925			break;
2926		case NTB_REG_8:
2927			if (pci)
2928				umv = pci_read_config(ntb->device, reg, 1);
2929			else
2930				umv = intel_ntb_reg_read(1, reg);
2931			outsz = sizeof(uint8_t);
2932			break;
2933		default:
2934			panic("bogus");
2935			break;
2936		}
2937	}
2938
2939	/* Encode bigendian so that sysctl -x is legible. */
2940	be64enc(be, umv);
2941	outp = ((char *)be) + sizeof(umv) - outsz;
2942
2943	error = SYSCTL_OUT(req, outp, outsz);
2944	if (error || !req->newptr)
2945		return (error);
2946	return (EINVAL);
2947}
2948
2949static unsigned
2950intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2951{
2952
2953	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2954	    uidx >= ntb->b2b_mw_idx) ||
2955	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2956		uidx++;
2957	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2958	    uidx >= ntb->b2b_mw_idx) &&
2959	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2960		uidx++;
2961	return (uidx);
2962}
2963
2964#ifndef EARLY_AP_STARTUP
2965static int msix_ready;
2966
2967static void
2968intel_ntb_msix_ready(void *arg __unused)
2969{
2970
2971	msix_ready = 1;
2972}
2973SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
2974    intel_ntb_msix_ready, NULL);
2975#endif
2976
2977static void
2978intel_ntb_exchange_msix(void *ctx)
2979{
2980	struct ntb_softc *ntb;
2981	uint32_t val;
2982	unsigned i;
2983
2984	ntb = ctx;
2985
2986	if (ntb->peer_msix_good)
2987		goto msix_good;
2988	if (ntb->peer_msix_done)
2989		goto msix_done;
2990
2991#ifndef EARLY_AP_STARTUP
2992	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
2993	if (!msix_ready)
2994		goto reschedule;
2995#endif
2996
2997	intel_ntb_get_msix_info(ntb);
2998	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2999		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
3000		    ntb->msix_data[i].nmd_data);
3001		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
3002		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
3003	}
3004	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
3005
3006	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
3007	if (val != NTB_MSIX_VER_GUARD)
3008		goto reschedule;
3009
3010	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3011		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
3012		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
3013		ntb->peer_msix_data[i].nmd_data = val;
3014		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
3015		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
3016		ntb->peer_msix_data[i].nmd_ofs = val;
3017	}
3018
3019	ntb->peer_msix_done = true;
3020
3021msix_done:
3022	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
3023	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
3024	if (val != NTB_MSIX_RECEIVED)
3025		goto reschedule;
3026
3027	intel_ntb_spad_clear(ntb->device);
3028	ntb->peer_msix_good = true;
3029	/* Give peer time to see our NTB_MSIX_RECEIVED. */
3030	goto reschedule;
3031
3032msix_good:
3033	intel_ntb_poll_link(ntb);
3034	ntb_link_event(ntb->device);
3035	return;
3036
3037reschedule:
3038	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
3039	if (_xeon_link_is_up(ntb)) {
3040		callout_reset(&ntb->peer_msix_work,
3041		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
3042		    intel_ntb_exchange_msix, ntb);
3043	} else
3044		intel_ntb_spad_clear(ntb->device);
3045}
3046
3047/*
3048 * Public API to the rest of the OS
3049 */
3050
3051static uint8_t
3052intel_ntb_spad_count(device_t dev)
3053{
3054	struct ntb_softc *ntb = device_get_softc(dev);
3055
3056	return (ntb->spad_count);
3057}
3058
3059static uint8_t
3060intel_ntb_mw_count(device_t dev)
3061{
3062	struct ntb_softc *ntb = device_get_softc(dev);
3063	uint8_t res;
3064
3065	res = ntb->mw_count;
3066	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
3067		res--;
3068	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
3069		res--;
3070	return (res);
3071}
3072
3073static int
3074intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
3075{
3076	struct ntb_softc *ntb = device_get_softc(dev);
3077
3078	if (idx >= ntb->spad_count)
3079		return (EINVAL);
3080
3081	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
3082
3083	return (0);
3084}
3085
3086/*
3087 * Zeros the local scratchpad.
3088 */
3089static void
3090intel_ntb_spad_clear(device_t dev)
3091{
3092	struct ntb_softc *ntb = device_get_softc(dev);
3093	unsigned i;
3094
3095	for (i = 0; i < ntb->spad_count; i++)
3096		intel_ntb_spad_write(dev, i, 0);
3097}
3098
3099static int
3100intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
3101{
3102	struct ntb_softc *ntb = device_get_softc(dev);
3103
3104	if (idx >= ntb->spad_count)
3105		return (EINVAL);
3106
3107	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
3108
3109	return (0);
3110}
3111
3112static int
3113intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
3114{
3115	struct ntb_softc *ntb = device_get_softc(dev);
3116
3117	if (idx >= ntb->spad_count)
3118		return (EINVAL);
3119
3120	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
3121		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
3122	else
3123		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
3124
3125	return (0);
3126}
3127
3128static int
3129intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
3130{
3131	struct ntb_softc *ntb = device_get_softc(dev);
3132
3133	if (idx >= ntb->spad_count)
3134		return (EINVAL);
3135
3136	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
3137		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
3138	else
3139		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
3140
3141	return (0);
3142}
3143
3144static int
3145intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
3146    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
3147    bus_addr_t *plimit)
3148{
3149	struct ntb_softc *ntb = device_get_softc(dev);
3150	struct ntb_pci_bar_info *bar;
3151	bus_addr_t limit;
3152	size_t bar_b2b_off;
3153	enum ntb_bar bar_num;
3154
3155	if (mw_idx >= intel_ntb_mw_count(dev))
3156		return (EINVAL);
3157	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
3158
3159	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
3160	bar = &ntb->bar_info[bar_num];
3161	bar_b2b_off = 0;
3162	if (mw_idx == ntb->b2b_mw_idx) {
3163		KASSERT(ntb->b2b_off != 0,
3164		    ("user shouldn't get non-shared b2b mw"));
3165		bar_b2b_off = ntb->b2b_off;
3166	}
3167
3168	if (bar_is_64bit(ntb, bar_num))
3169		limit = BUS_SPACE_MAXADDR;
3170	else
3171		limit = BUS_SPACE_MAXADDR_32BIT;
3172
3173	if (base != NULL)
3174		*base = bar->pbase + bar_b2b_off;
3175	if (vbase != NULL)
3176		*vbase = bar->vbase + bar_b2b_off;
3177	if (size != NULL)
3178		*size = bar->size - bar_b2b_off;
3179	if (align != NULL)
3180		*align = bar->size;
3181	if (align_size != NULL)
3182		*align_size = 1;
3183	if (plimit != NULL)
3184		*plimit = limit;
3185	return (0);
3186}
3187
3188static int
3189intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
3190{
3191	struct ntb_softc *ntb = device_get_softc(dev);
3192	struct ntb_pci_bar_info *bar;
3193	uint64_t base, limit, reg_val;
3194	size_t bar_size, mw_size;
3195	uint32_t base_reg, xlat_reg, limit_reg;
3196	enum ntb_bar bar_num;
3197
3198	if (idx >= intel_ntb_mw_count(dev))
3199		return (EINVAL);
3200	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3201
3202	bar_num = intel_ntb_mw_to_bar(ntb, idx);
3203	bar = &ntb->bar_info[bar_num];
3204
3205	bar_size = bar->size;
3206	if (idx == ntb->b2b_mw_idx)
3207		mw_size = bar_size - ntb->b2b_off;
3208	else
3209		mw_size = bar_size;
3210
3211	/* Hardware requires that addr is aligned to bar size */
3212	if ((addr & (bar_size - 1)) != 0)
3213		return (EINVAL);
3214
3215	if (size > mw_size)
3216		return (EINVAL);
3217
3218	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
3219
3220	limit = 0;
3221	if (bar_is_64bit(ntb, bar_num)) {
3222		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
3223
3224		if (limit_reg != 0 && size != mw_size)
3225			limit = base + size;
3226		else
3227			limit = base + mw_size;
3228
3229		/* Set and verify translation address */
3230		intel_ntb_reg_write(8, xlat_reg, addr);
3231		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
3232		if (reg_val != addr) {
3233			intel_ntb_reg_write(8, xlat_reg, 0);
3234			return (EIO);
3235		}
3236
3237		/* Set and verify the limit */
3238		intel_ntb_reg_write(8, limit_reg, limit);
3239		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
3240		if (reg_val != limit) {
3241			intel_ntb_reg_write(8, limit_reg, base);
3242			intel_ntb_reg_write(8, xlat_reg, 0);
3243			return (EIO);
3244		}
3245
3246		if (ntb->type == NTB_XEON_GEN3) {
3247			limit = base + size;
3248
3249			/* set EMBAR1/2XLIMIT */
3250			if (!idx)
3251				intel_ntb_reg_write(8,
3252				    XEON_GEN3_REG_EMBAR1XLIMIT, limit);
3253			else
3254				intel_ntb_reg_write(8,
3255				    XEON_GEN3_REG_EMBAR2XLIMIT, limit);
3256		}
3257	} else {
3258		/* Configure 32-bit (split) BAR MW */
3259		if (ntb->type == NTB_XEON_GEN3)
3260			return (EIO);
3261
3262		if ((addr & UINT32_MAX) != addr)
3263			return (ERANGE);
3264		if (((addr + size) & UINT32_MAX) != (addr + size))
3265			return (ERANGE);
3266
3267		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
3268
3269		if (limit_reg != 0 && size != mw_size)
3270			limit = base + size;
3271
3272		/* Set and verify translation address */
3273		intel_ntb_reg_write(4, xlat_reg, addr);
3274		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
3275		if (reg_val != addr) {
3276			intel_ntb_reg_write(4, xlat_reg, 0);
3277			return (EIO);
3278		}
3279
3280		/* Set and verify the limit */
3281		intel_ntb_reg_write(4, limit_reg, limit);
3282		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
3283		if (reg_val != limit) {
3284			intel_ntb_reg_write(4, limit_reg, base);
3285			intel_ntb_reg_write(4, xlat_reg, 0);
3286			return (EIO);
3287		}
3288	}
3289	return (0);
3290}
3291
3292static int
3293intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
3294{
3295
3296	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
3297}
3298
3299static int
3300intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
3301{
3302	struct ntb_softc *ntb = device_get_softc(dev);
3303	struct ntb_pci_bar_info *bar;
3304
3305	if (idx >= intel_ntb_mw_count(dev))
3306		return (EINVAL);
3307	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3308
3309	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3310	*mode = bar->map_mode;
3311	return (0);
3312}
3313
3314static int
3315intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
3316{
3317	struct ntb_softc *ntb = device_get_softc(dev);
3318
3319	if (idx >= intel_ntb_mw_count(dev))
3320		return (EINVAL);
3321
3322	idx = intel_ntb_user_mw_to_idx(ntb, idx);
3323	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
3324}
3325
3326static int
3327intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
3328{
3329	struct ntb_pci_bar_info *bar;
3330	int rc;
3331
3332	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
3333	if (bar->map_mode == mode)
3334		return (0);
3335
3336	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
3337	if (rc == 0)
3338		bar->map_mode = mode;
3339
3340	return (rc);
3341}
3342
3343static void
3344intel_ntb_peer_db_set(device_t dev, uint64_t bits)
3345{
3346	struct ntb_softc *ntb = device_get_softc(dev);
3347	uint64_t db;
3348
3349	if ((bits & ~ntb->db_valid_mask) != 0) {
3350		device_printf(ntb->device, "Invalid doorbell bits %#jx\n",
3351		    (uintmax_t)bits);
3352		return;
3353	}
3354
3355	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
3356		struct ntb_pci_bar_info *lapic;
3357		unsigned i;
3358
3359		lapic = ntb->peer_lapic_bar;
3360
3361		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
3362			if ((bits & intel_ntb_db_vector_mask(dev, i)) != 0)
3363				bus_space_write_4(lapic->pci_bus_tag,
3364				    lapic->pci_bus_handle,
3365				    ntb->peer_msix_data[i].nmd_ofs,
3366				    ntb->peer_msix_data[i].nmd_data);
3367		}
3368		return;
3369	}
3370
3371	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3372		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bits);
3373		return;
3374	}
3375
3376	if (ntb->type == NTB_XEON_GEN3) {
3377		while (bits != 0) {
3378			db = ffsll(bits);
3379
3380			intel_ntb_reg_write(1,
3381			    ntb->peer_reg->db_bell + (db - 1) * 4, 0x1);
3382
3383			bits = bits & (bits - 1);
3384		}
3385	} else {
3386		db_iowrite(ntb, ntb->peer_reg->db_bell, bits);
3387	}
3388}
3389
3390static int
3391intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3392{
3393	struct ntb_softc *ntb = device_get_softc(dev);
3394	struct ntb_pci_bar_info *bar;
3395	uint64_t regoff;
3396
3397	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3398
3399	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3400		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3401		regoff = ntb->peer_reg->db_bell;
3402	} else {
3403		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3404		    ("invalid b2b idx"));
3405
3406		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3407		regoff = XEON_PDOORBELL_OFFSET;
3408	}
3409	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3410
3411	/* HACK: Specific to current x86 bus implementation. */
3412	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3413	*db_size = ntb->reg->db_size;
3414	return (0);
3415}
3416
3417static uint64_t
3418intel_ntb_db_valid_mask(device_t dev)
3419{
3420	struct ntb_softc *ntb = device_get_softc(dev);
3421
3422	return (ntb->db_valid_mask);
3423}
3424
3425static int
3426intel_ntb_db_vector_count(device_t dev)
3427{
3428	struct ntb_softc *ntb = device_get_softc(dev);
3429
3430	return (ntb->db_vec_count);
3431}
3432
3433static uint64_t
3434intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3435{
3436	struct ntb_softc *ntb = device_get_softc(dev);
3437
3438	if (vector > ntb->db_vec_count)
3439		return (0);
3440	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3441}
3442
3443static bool
3444intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3445{
3446	struct ntb_softc *ntb = device_get_softc(dev);
3447
3448	if (speed != NULL)
3449		*speed = intel_ntb_link_sta_speed(ntb);
3450	if (width != NULL)
3451		*width = intel_ntb_link_sta_width(ntb);
3452	return (link_is_up(ntb));
3453}
3454
3455static void
3456save_bar_parameters(struct ntb_pci_bar_info *bar)
3457{
3458
3459	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3460	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3461	bar->pbase = rman_get_start(bar->pci_resource);
3462	bar->size = rman_get_size(bar->pci_resource);
3463	bar->vbase = rman_get_virtual(bar->pci_resource);
3464}
3465
3466static device_method_t ntb_intel_methods[] = {
3467	/* Device interface */
3468	DEVMETHOD(device_probe,		intel_ntb_probe),
3469	DEVMETHOD(device_attach,	intel_ntb_attach),
3470	DEVMETHOD(device_detach,	intel_ntb_detach),
3471	/* Bus interface */
3472	DEVMETHOD(bus_child_location_str, ntb_child_location_str),
3473	DEVMETHOD(bus_print_child,	ntb_print_child),
3474	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3475	/* NTB interface */
3476	DEVMETHOD(ntb_port_number,	intel_ntb_port_number),
3477	DEVMETHOD(ntb_peer_port_count,	intel_ntb_peer_port_count),
3478	DEVMETHOD(ntb_peer_port_number,	intel_ntb_peer_port_number),
3479	DEVMETHOD(ntb_peer_port_idx, 	intel_ntb_peer_port_idx),
3480	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3481	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3482	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3483	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3484	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3485	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3486	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3487	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3488	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3489	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3490	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3491	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3492	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3493	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3494	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3495	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3496	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3497	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3498	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3499	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3500	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3501	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3502	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3503	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3504	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3505	DEVMETHOD_END
3506};
3507
3508static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3509    sizeof(struct ntb_softc));
3510DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
3511MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3512MODULE_VERSION(ntb_hw_intel, 1);
3513MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids,
3514    nitems(pci_ids));
3515