1/*-
2 * Copyright (c) 2016-2017 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
34 *
35 * NOTE: Much of the code in this module is shared with Linux. Any patches may
36 * be picked up and redistributed in Linux with a dual GPL/BSD license.
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: stable/11/sys/dev/ntb/ntb_hw/ntb_hw_intel.c 355152 2019-11-28 00:41:42Z mav $");
41
42#include <sys/param.h>
43#include <sys/kernel.h>
44#include <sys/systm.h>
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/interrupt.h>
48#include <sys/malloc.h>
49#include <sys/module.h>
50#include <sys/mutex.h>
51#include <sys/pciio.h>
52#include <sys/queue.h>
53#include <sys/rman.h>
54#include <sys/sbuf.h>
55#include <sys/sysctl.h>
56#include <vm/vm.h>
57#include <vm/pmap.h>
58#include <machine/bus.h>
59#include <machine/intr_machdep.h>
60#include <machine/resource.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63
64#include "ntb_hw_intel.h"
65#include "../ntb.h"
66
67#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
68
69#define NTB_HB_TIMEOUT		1 /* second */
70#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
71#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
72
73#define	NTB_MSIX_VER_GUARD	0xaabbccdd
74#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
75
76/*
77 * PCI constants could be somewhere more generic, but aren't defined/used in
78 * pci.c.
79 */
80#define	PCI_MSIX_ENTRY_SIZE		16
81#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
82#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
83#define	PCI_MSIX_ENTRY_DATA		8
84
85enum ntb_device_type {
86	NTB_XEON,
87	NTB_ATOM
88};
89
90/* ntb_conn_type are hardware numbers, cannot change. */
91enum ntb_conn_type {
92	NTB_CONN_TRANSPARENT = 0,
93	NTB_CONN_B2B = 1,
94	NTB_CONN_RP = 2,
95};
96
97enum ntb_b2b_direction {
98	NTB_DEV_USD = 0,
99	NTB_DEV_DSD = 1,
100};
101
102enum ntb_bar {
103	NTB_CONFIG_BAR = 0,
104	NTB_B2B_BAR_1,
105	NTB_B2B_BAR_2,
106	NTB_B2B_BAR_3,
107	NTB_MAX_BARS
108};
109
110enum {
111	NTB_MSIX_GUARD = 0,
112	NTB_MSIX_DATA0,
113	NTB_MSIX_DATA1,
114	NTB_MSIX_DATA2,
115	NTB_MSIX_OFS0,
116	NTB_MSIX_OFS1,
117	NTB_MSIX_OFS2,
118	NTB_MSIX_DONE,
119	NTB_MAX_MSIX_SPAD
120};
121
122/* Device features and workarounds */
123#define HAS_FEATURE(ntb, feature)	\
124	(((ntb)->features & (feature)) != 0)
125
126struct ntb_hw_info {
127	uint32_t		device_id;
128	const char		*desc;
129	enum ntb_device_type	type;
130	uint32_t		features;
131};
132
133struct ntb_pci_bar_info {
134	bus_space_tag_t		pci_bus_tag;
135	bus_space_handle_t	pci_bus_handle;
136	int			pci_resource_id;
137	struct resource		*pci_resource;
138	vm_paddr_t		pbase;
139	caddr_t			vbase;
140	vm_size_t		size;
141	vm_memattr_t		map_mode;
142
143	/* Configuration register offsets */
144	uint32_t		psz_off;
145	uint32_t		ssz_off;
146	uint32_t		pbarxlat_off;
147};
148
149struct ntb_int_info {
150	struct resource	*res;
151	int		rid;
152	void		*tag;
153};
154
155struct ntb_vec {
156	struct ntb_softc	*ntb;
157	uint32_t		num;
158	unsigned		masked;
159};
160
161struct ntb_reg {
162	uint32_t	ntb_ctl;
163	uint32_t	lnk_sta;
164	uint8_t		db_size;
165	unsigned	mw_bar[NTB_MAX_BARS];
166};
167
168struct ntb_alt_reg {
169	uint32_t	db_bell;
170	uint32_t	db_mask;
171	uint32_t	spad;
172};
173
174struct ntb_xlat_reg {
175	uint32_t	bar0_base;
176	uint32_t	bar2_base;
177	uint32_t	bar4_base;
178	uint32_t	bar5_base;
179
180	uint32_t	bar2_xlat;
181	uint32_t	bar4_xlat;
182	uint32_t	bar5_xlat;
183
184	uint32_t	bar2_limit;
185	uint32_t	bar4_limit;
186	uint32_t	bar5_limit;
187};
188
189struct ntb_b2b_addr {
190	uint64_t	bar0_addr;
191	uint64_t	bar2_addr64;
192	uint64_t	bar4_addr64;
193	uint64_t	bar4_addr32;
194	uint64_t	bar5_addr32;
195};
196
197struct ntb_msix_data {
198	uint32_t	nmd_ofs;
199	uint32_t	nmd_data;
200};
201
202struct ntb_softc {
203	/* ntb.c context. Do not move! Must go first! */
204	void			*ntb_store;
205
206	device_t		device;
207	enum ntb_device_type	type;
208	uint32_t		features;
209
210	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
211	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
212	uint32_t		allocated_interrupts;
213
214	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
215	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
216	bool			peer_msix_good;
217	bool			peer_msix_done;
218	struct ntb_pci_bar_info	*peer_lapic_bar;
219	struct callout		peer_msix_work;
220
221	struct callout		heartbeat_timer;
222	struct callout		lr_timer;
223
224	struct ntb_vec		*msix_vec;
225
226	uint32_t		ppd;
227	enum ntb_conn_type	conn_type;
228	enum ntb_b2b_direction	dev_type;
229
230	/* Offset of peer bar0 in B2B BAR */
231	uint64_t			b2b_off;
232	/* Memory window used to access peer bar0 */
233#define B2B_MW_DISABLED			UINT8_MAX
234	uint8_t				b2b_mw_idx;
235	uint32_t			msix_xlat;
236	uint8_t				msix_mw_idx;
237
238	uint8_t				mw_count;
239	uint8_t				spad_count;
240	uint8_t				db_count;
241	uint8_t				db_vec_count;
242	uint8_t				db_vec_shift;
243
244	/* Protects local db_mask. */
245#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
246#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
247#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
248	struct mtx			db_mask_lock;
249
250	volatile uint32_t		ntb_ctl;
251	volatile uint32_t		lnk_sta;
252
253	uint64_t			db_valid_mask;
254	uint64_t			db_link_mask;
255	uint64_t			db_mask;
256	uint64_t			fake_db;	/* NTB_SB01BASE_LOCKUP*/
257	uint64_t			force_db;	/* NTB_SB01BASE_LOCKUP*/
258
259	int				last_ts;	/* ticks @ last irq */
260
261	const struct ntb_reg		*reg;
262	const struct ntb_alt_reg	*self_reg;
263	const struct ntb_alt_reg	*peer_reg;
264	const struct ntb_xlat_reg	*xlat_reg;
265};
266
267#ifdef __i386__
268static __inline uint64_t
269bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
270    bus_size_t offset)
271{
272
273	return (bus_space_read_4(tag, handle, offset) |
274	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
275}
276
277static __inline void
278bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
279    bus_size_t offset, uint64_t val)
280{
281
282	bus_space_write_4(tag, handle, offset, val);
283	bus_space_write_4(tag, handle, offset + 4, val >> 32);
284}
285#endif
286
287#define intel_ntb_bar_read(SIZE, bar, offset) \
288	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
289	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
290#define intel_ntb_bar_write(SIZE, bar, offset, val) \
291	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
292	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
293#define intel_ntb_reg_read(SIZE, offset) \
294	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
295#define intel_ntb_reg_write(SIZE, offset, val) \
296	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
297#define intel_ntb_mw_read(SIZE, offset) \
298	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
299		offset)
300#define intel_ntb_mw_write(SIZE, offset, val) \
301	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
302		offset, val)
303
304static int intel_ntb_probe(device_t device);
305static int intel_ntb_attach(device_t device);
306static int intel_ntb_detach(device_t device);
307static uint64_t intel_ntb_db_valid_mask(device_t dev);
308static void intel_ntb_spad_clear(device_t dev);
309static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
310static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
311    enum ntb_width *width);
312static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
313    enum ntb_width width);
314static int intel_ntb_link_disable(device_t dev);
315static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
316static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
317
318static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
319static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
320static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
321static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
322    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
323static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
324static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
325    vm_memattr_t);
326static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
327    const char *);
328static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
329static int map_memory_window_bar(struct ntb_softc *ntb,
330    struct ntb_pci_bar_info *bar);
331static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
332static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
333static int intel_ntb_init_isr(struct ntb_softc *ntb);
334static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
335static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
336static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
337static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
338static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
339static void ndev_vec_isr(void *arg);
340static void ndev_irq_isr(void *arg);
341static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
342static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
343static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
344static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
345static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
346static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
347static void intel_ntb_exchange_msix(void *);
348static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
349static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
350static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
351static int intel_ntb_detect_atom(struct ntb_softc *ntb);
352static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
353static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
354static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
355static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
356static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
357    enum ntb_bar regbar);
358static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
359    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
360static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
361    enum ntb_bar idx);
362static int xeon_setup_b2b_mw(struct ntb_softc *,
363    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
364static inline bool link_is_up(struct ntb_softc *ntb);
365static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
366static inline bool atom_link_is_err(struct ntb_softc *ntb);
367static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
368static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
369static void atom_link_hb(void *arg);
370static void recover_atom_link(void *arg);
371static bool intel_ntb_poll_link(struct ntb_softc *ntb);
372static void save_bar_parameters(struct ntb_pci_bar_info *bar);
373static void intel_ntb_sysctl_init(struct ntb_softc *);
374static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
375static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
376static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
377static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
378static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
379
380static unsigned g_ntb_hw_debug_level;
381SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
382    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
383#define intel_ntb_printf(lvl, ...) do {				\
384	if ((lvl) <= g_ntb_hw_debug_level) {			\
385		device_printf(ntb->device, __VA_ARGS__);	\
386	}							\
387} while (0)
388
389#define	_NTB_PAT_UC	0
390#define	_NTB_PAT_WC	1
391#define	_NTB_PAT_WT	4
392#define	_NTB_PAT_WP	5
393#define	_NTB_PAT_WB	6
394#define	_NTB_PAT_UCM	7
395static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
396SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
397    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
398    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
399    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
400    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
401    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
402    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
403    "UC-: " __XSTRING(_NTB_PAT_UCM));
404
405static inline vm_memattr_t
406intel_ntb_pat_flags(void)
407{
408
409	switch (g_ntb_mw_pat) {
410	case _NTB_PAT_WC:
411		return (VM_MEMATTR_WRITE_COMBINING);
412	case _NTB_PAT_WT:
413		return (VM_MEMATTR_WRITE_THROUGH);
414	case _NTB_PAT_WP:
415		return (VM_MEMATTR_WRITE_PROTECTED);
416	case _NTB_PAT_WB:
417		return (VM_MEMATTR_WRITE_BACK);
418	case _NTB_PAT_UCM:
419		return (VM_MEMATTR_WEAK_UNCACHEABLE);
420	case _NTB_PAT_UC:
421		/* FALLTHROUGH */
422	default:
423		return (VM_MEMATTR_UNCACHEABLE);
424	}
425}
426
427/*
428 * Well, this obviously doesn't belong here, but it doesn't seem to exist
429 * anywhere better yet.
430 */
431static inline const char *
432intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
433{
434
435	switch (pat) {
436	case VM_MEMATTR_WRITE_COMBINING:
437		return ("WRITE_COMBINING");
438	case VM_MEMATTR_WRITE_THROUGH:
439		return ("WRITE_THROUGH");
440	case VM_MEMATTR_WRITE_PROTECTED:
441		return ("WRITE_PROTECTED");
442	case VM_MEMATTR_WRITE_BACK:
443		return ("WRITE_BACK");
444	case VM_MEMATTR_WEAK_UNCACHEABLE:
445		return ("UNCACHED");
446	case VM_MEMATTR_UNCACHEABLE:
447		return ("UNCACHEABLE");
448	default:
449		return ("UNKNOWN");
450	}
451}
452
453static int g_ntb_msix_idx = 1;
454SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
455    0, "Use this memory window to access the peer MSIX message complex on "
456    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
457    "Like b2b_mw_idx, negative values index from the last available memory "
458    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
459
460static int g_ntb_mw_idx = -1;
461SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
462    0, "Use this memory window to access the peer NTB registers.  A "
463    "non-negative value starts from the first MW index; a negative value "
464    "starts from the last MW index.  The default is -1, i.e., the last "
465    "available memory window.  Both sides of the NTB MUST set the same "
466    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
467
468/* Hardware owns the low 16 bits of features. */
469#define NTB_BAR_SIZE_4K		(1 << 0)
470#define NTB_SDOORBELL_LOCKUP	(1 << 1)
471#define NTB_SB01BASE_LOCKUP	(1 << 2)
472#define NTB_B2BDOORBELL_BIT14	(1 << 3)
473/* Software/configuration owns the top 16 bits. */
474#define NTB_SPLIT_BAR		(1ull << 16)
475
476#define NTB_FEATURES_STR \
477    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
478    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
479
480static struct ntb_hw_info pci_ids[] = {
481	/* XXX: PS/SS IDs left out until they are supported. */
482	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
483		NTB_ATOM, 0 },
484
485	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
486		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
487	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
488		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
489	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
490		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
491		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
492	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
493		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
494		    NTB_SB01BASE_LOCKUP },
495	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
496		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
497		    NTB_SB01BASE_LOCKUP },
498
499	{ 0x00000000, NULL, NTB_ATOM, 0 }
500};
501
502static const struct ntb_reg atom_reg = {
503	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
504	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
505	.db_size = sizeof(uint64_t),
506	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
507};
508
509static const struct ntb_alt_reg atom_pri_reg = {
510	.db_bell = ATOM_PDOORBELL_OFFSET,
511	.db_mask = ATOM_PDBMSK_OFFSET,
512	.spad = ATOM_SPAD_OFFSET,
513};
514
515static const struct ntb_alt_reg atom_b2b_reg = {
516	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
517	.spad = ATOM_B2B_SPAD_OFFSET,
518};
519
520static const struct ntb_xlat_reg atom_sec_xlat = {
521#if 0
522	/* "FIXME" says the Linux driver. */
523	.bar0_base = ATOM_SBAR0BASE_OFFSET,
524	.bar2_base = ATOM_SBAR2BASE_OFFSET,
525	.bar4_base = ATOM_SBAR4BASE_OFFSET,
526
527	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
528	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
529#endif
530
531	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
532	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
533};
534
535static const struct ntb_reg xeon_reg = {
536	.ntb_ctl = XEON_NTBCNTL_OFFSET,
537	.lnk_sta = XEON_LINK_STATUS_OFFSET,
538	.db_size = sizeof(uint16_t),
539	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
540};
541
542static const struct ntb_alt_reg xeon_pri_reg = {
543	.db_bell = XEON_PDOORBELL_OFFSET,
544	.db_mask = XEON_PDBMSK_OFFSET,
545	.spad = XEON_SPAD_OFFSET,
546};
547
548static const struct ntb_alt_reg xeon_b2b_reg = {
549	.db_bell = XEON_B2B_DOORBELL_OFFSET,
550	.spad = XEON_B2B_SPAD_OFFSET,
551};
552
553static const struct ntb_xlat_reg xeon_sec_xlat = {
554	.bar0_base = XEON_SBAR0BASE_OFFSET,
555	.bar2_base = XEON_SBAR2BASE_OFFSET,
556	.bar4_base = XEON_SBAR4BASE_OFFSET,
557	.bar5_base = XEON_SBAR5BASE_OFFSET,
558
559	.bar2_limit = XEON_SBAR2LMT_OFFSET,
560	.bar4_limit = XEON_SBAR4LMT_OFFSET,
561	.bar5_limit = XEON_SBAR5LMT_OFFSET,
562
563	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
564	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
565	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
566};
567
568static struct ntb_b2b_addr xeon_b2b_usd_addr = {
569	.bar0_addr = XEON_B2B_BAR0_ADDR,
570	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
571	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
572	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
573	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
574};
575
576static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
577	.bar0_addr = XEON_B2B_BAR0_ADDR,
578	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
579	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
580	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
581	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
582};
583
584SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
585    "B2B MW segment overrides -- MUST be the same on both sides");
586
587SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
588    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
589    "hardware, use this 64-bit address on the bus between the NTB devices for "
590    "the window at BAR2, on the upstream side of the link.  MUST be the same "
591    "address on both sides.");
592SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
593    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
594SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
595    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
596    "(split-BAR mode).");
597SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
598    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
599    "(split-BAR mode).");
600
601SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
602    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
603    "hardware, use this 64-bit address on the bus between the NTB devices for "
604    "the window at BAR2, on the downstream side of the link.  MUST be the same"
605    " address on both sides.");
606SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
607    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
608SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
609    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
610    "(split-BAR mode).");
611SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
612    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
613    "(split-BAR mode).");
614
615/*
616 * OS <-> Driver interface structures
617 */
618MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
619
620/*
621 * OS <-> Driver linkage functions
622 */
623static int
624intel_ntb_probe(device_t device)
625{
626	struct ntb_hw_info *p;
627
628	p = intel_ntb_get_device_info(pci_get_devid(device));
629	if (p == NULL)
630		return (ENXIO);
631
632	device_set_desc(device, p->desc);
633	return (0);
634}
635
636static int
637intel_ntb_attach(device_t device)
638{
639	struct ntb_softc *ntb;
640	struct ntb_hw_info *p;
641	int error;
642
643	ntb = device_get_softc(device);
644	p = intel_ntb_get_device_info(pci_get_devid(device));
645
646	ntb->device = device;
647	ntb->type = p->type;
648	ntb->features = p->features;
649	ntb->b2b_mw_idx = B2B_MW_DISABLED;
650	ntb->msix_mw_idx = B2B_MW_DISABLED;
651
652	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
653	callout_init(&ntb->heartbeat_timer, 1);
654	callout_init(&ntb->lr_timer, 1);
655	callout_init(&ntb->peer_msix_work, 1);
656	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
657
658	if (ntb->type == NTB_ATOM)
659		error = intel_ntb_detect_atom(ntb);
660	else
661		error = intel_ntb_detect_xeon(ntb);
662	if (error != 0)
663		goto out;
664
665	intel_ntb_detect_max_mw(ntb);
666
667	pci_enable_busmaster(ntb->device);
668
669	error = intel_ntb_map_pci_bars(ntb);
670	if (error != 0)
671		goto out;
672	if (ntb->type == NTB_ATOM)
673		error = intel_ntb_atom_init_dev(ntb);
674	else
675		error = intel_ntb_xeon_init_dev(ntb);
676	if (error != 0)
677		goto out;
678
679	intel_ntb_spad_clear(device);
680
681	intel_ntb_poll_link(ntb);
682
683	intel_ntb_sysctl_init(ntb);
684
685	/* Attach children to this controller */
686	error = ntb_register_device(device);
687
688out:
689	if (error != 0)
690		intel_ntb_detach(device);
691	return (error);
692}
693
694static int
695intel_ntb_detach(device_t device)
696{
697	struct ntb_softc *ntb;
698
699	ntb = device_get_softc(device);
700
701	/* Detach & delete all children */
702	ntb_unregister_device(device);
703
704	if (ntb->self_reg != NULL) {
705		DB_MASK_LOCK(ntb);
706		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
707		DB_MASK_UNLOCK(ntb);
708	}
709	callout_drain(&ntb->heartbeat_timer);
710	callout_drain(&ntb->lr_timer);
711	callout_drain(&ntb->peer_msix_work);
712	pci_disable_busmaster(ntb->device);
713	if (ntb->type == NTB_XEON)
714		intel_ntb_teardown_xeon(ntb);
715	intel_ntb_teardown_interrupts(ntb);
716
717	mtx_destroy(&ntb->db_mask_lock);
718
719	intel_ntb_unmap_pci_bar(ntb);
720
721	return (0);
722}
723
724/*
725 * Driver internal routines
726 */
727static inline enum ntb_bar
728intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
729{
730
731	KASSERT(mw < ntb->mw_count,
732	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
733	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
734
735	return (ntb->reg->mw_bar[mw]);
736}
737
738static inline bool
739bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
740{
741	/* XXX This assertion could be stronger. */
742	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
743	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
744}
745
746static inline void
747bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
748    uint32_t *xlat, uint32_t *lmt)
749{
750	uint32_t basev, lmtv, xlatv;
751
752	switch (bar) {
753	case NTB_B2B_BAR_1:
754		basev = ntb->xlat_reg->bar2_base;
755		lmtv = ntb->xlat_reg->bar2_limit;
756		xlatv = ntb->xlat_reg->bar2_xlat;
757		break;
758	case NTB_B2B_BAR_2:
759		basev = ntb->xlat_reg->bar4_base;
760		lmtv = ntb->xlat_reg->bar4_limit;
761		xlatv = ntb->xlat_reg->bar4_xlat;
762		break;
763	case NTB_B2B_BAR_3:
764		basev = ntb->xlat_reg->bar5_base;
765		lmtv = ntb->xlat_reg->bar5_limit;
766		xlatv = ntb->xlat_reg->bar5_xlat;
767		break;
768	default:
769		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
770		    ("bad bar"));
771		basev = lmtv = xlatv = 0;
772		break;
773	}
774
775	if (base != NULL)
776		*base = basev;
777	if (xlat != NULL)
778		*xlat = xlatv;
779	if (lmt != NULL)
780		*lmt = lmtv;
781}
782
783static int
784intel_ntb_map_pci_bars(struct ntb_softc *ntb)
785{
786	int rc;
787
788	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
789	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
790	if (rc != 0)
791		goto out;
792
793	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
794	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
795	if (rc != 0)
796		goto out;
797	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
798	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
799	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
800
801	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
802	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
803	if (rc != 0)
804		goto out;
805	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
806	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
807	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
808
809	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
810		goto out;
811
812	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
813	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
814	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
815	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
816	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
817
818out:
819	if (rc != 0)
820		device_printf(ntb->device,
821		    "unable to allocate pci resource\n");
822	return (rc);
823}
824
825static void
826print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
827    const char *kind)
828{
829
830	device_printf(ntb->device,
831	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
832	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
833	    (char *)bar->vbase + bar->size - 1,
834	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
835	    (uintmax_t)bar->size, kind);
836}
837
838static int
839map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
840{
841
842	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
843	    &bar->pci_resource_id, RF_ACTIVE);
844	if (bar->pci_resource == NULL)
845		return (ENXIO);
846
847	save_bar_parameters(bar);
848	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
849	print_map_success(ntb, bar, "mmr");
850	return (0);
851}
852
853static int
854map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
855{
856	int rc;
857	vm_memattr_t mapmode;
858	uint8_t bar_size_bits = 0;
859
860	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
861	    &bar->pci_resource_id, RF_ACTIVE);
862
863	if (bar->pci_resource == NULL)
864		return (ENXIO);
865
866	save_bar_parameters(bar);
867	/*
868	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
869	 * hardware issue. To work around this, query the size it should be
870	 * configured to by the device and modify the resource to correspond to
871	 * this new size. The BIOS on systems with this problem is required to
872	 * provide enough address space to allow the driver to make this change
873	 * safely.
874	 *
875	 * Ideally I could have just specified the size when I allocated the
876	 * resource like:
877	 *  bus_alloc_resource(ntb->device,
878	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
879	 *	1ul << bar_size_bits, RF_ACTIVE);
880	 * but the PCI driver does not honor the size in this call, so we have
881	 * to modify it after the fact.
882	 */
883	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
884		if (bar->pci_resource_id == PCIR_BAR(2))
885			bar_size_bits = pci_read_config(ntb->device,
886			    XEON_PBAR23SZ_OFFSET, 1);
887		else
888			bar_size_bits = pci_read_config(ntb->device,
889			    XEON_PBAR45SZ_OFFSET, 1);
890
891		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
892		    bar->pci_resource, bar->pbase,
893		    bar->pbase + (1ul << bar_size_bits) - 1);
894		if (rc != 0) {
895			device_printf(ntb->device,
896			    "unable to resize bar\n");
897			return (rc);
898		}
899
900		save_bar_parameters(bar);
901	}
902
903	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
904	print_map_success(ntb, bar, "mw");
905
906	/*
907	 * Optionally, mark MW BARs as anything other than UC to improve
908	 * performance.
909	 */
910	mapmode = intel_ntb_pat_flags();
911	if (mapmode == bar->map_mode)
912		return (0);
913
914	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
915	if (rc == 0) {
916		bar->map_mode = mapmode;
917		device_printf(ntb->device,
918		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
919		    "%s.\n",
920		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
921		    (char *)bar->vbase + bar->size - 1,
922		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
923		    intel_ntb_vm_memattr_to_str(mapmode));
924	} else
925		device_printf(ntb->device,
926		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
927		    "%s: %d\n",
928		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
929		    (char *)bar->vbase + bar->size - 1,
930		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
931		    intel_ntb_vm_memattr_to_str(mapmode), rc);
932		/* Proceed anyway */
933	return (0);
934}
935
936static void
937intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
938{
939	struct ntb_pci_bar_info *current_bar;
940	int i;
941
942	for (i = 0; i < NTB_MAX_BARS; i++) {
943		current_bar = &ntb->bar_info[i];
944		if (current_bar->pci_resource != NULL)
945			bus_release_resource(ntb->device, SYS_RES_MEMORY,
946			    current_bar->pci_resource_id,
947			    current_bar->pci_resource);
948	}
949}
950
951static int
952intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
953{
954	uint32_t i;
955	int rc;
956
957	for (i = 0; i < num_vectors; i++) {
958		ntb->int_info[i].rid = i + 1;
959		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
960		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
961		if (ntb->int_info[i].res == NULL) {
962			device_printf(ntb->device,
963			    "bus_alloc_resource failed\n");
964			return (ENOMEM);
965		}
966		ntb->int_info[i].tag = NULL;
967		ntb->allocated_interrupts++;
968		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
969		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
970		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
971		if (rc != 0) {
972			device_printf(ntb->device, "bus_setup_intr failed\n");
973			return (ENXIO);
974		}
975	}
976	return (0);
977}
978
979/*
980 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
981 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
982 * should be okay.  This tunable should enable us to test that hypothesis
983 * when someone gets their hands on some Xeon hardware.
984 */
985static int ntb_force_remap_mode;
986SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
987    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
988    " to a smaller number of ithreads, even if the desired number are "
989    "available");
990
991/*
992 * In case it is NOT ok, give consumers an abort button.
993 */
994static int ntb_prefer_intx;
995SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
996    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
997    "than remapping MSI-X messages over available slots (match Linux driver "
998    "behavior)");
999
1000/*
1001 * Remap the desired number of MSI-X messages to available ithreads in a simple
1002 * round-robin fashion.
1003 */
1004static int
1005intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
1006{
1007	u_int *vectors;
1008	uint32_t i;
1009	int rc;
1010
1011	if (ntb_prefer_intx != 0)
1012		return (ENXIO);
1013
1014	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
1015
1016	for (i = 0; i < desired; i++)
1017		vectors[i] = (i % avail) + 1;
1018
1019	rc = pci_remap_msix(dev, desired, vectors);
1020	free(vectors, M_NTB);
1021	return (rc);
1022}
1023
1024static int
1025intel_ntb_init_isr(struct ntb_softc *ntb)
1026{
1027	uint32_t desired_vectors, num_vectors;
1028	int rc;
1029
1030	ntb->allocated_interrupts = 0;
1031	ntb->last_ts = ticks;
1032
1033	/*
1034	 * Mask all doorbell interrupts.  (Except link events!)
1035	 */
1036	DB_MASK_LOCK(ntb);
1037	ntb->db_mask = ntb->db_valid_mask;
1038	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1039	DB_MASK_UNLOCK(ntb);
1040
1041	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
1042	    ntb->db_count);
1043	if (desired_vectors >= 1) {
1044		rc = pci_alloc_msix(ntb->device, &num_vectors);
1045
1046		if (ntb_force_remap_mode != 0 && rc == 0 &&
1047		    num_vectors == desired_vectors)
1048			num_vectors--;
1049
1050		if (rc == 0 && num_vectors < desired_vectors) {
1051			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
1052			    num_vectors);
1053			if (rc == 0)
1054				num_vectors = desired_vectors;
1055			else
1056				pci_release_msi(ntb->device);
1057		}
1058		if (rc != 0)
1059			num_vectors = 1;
1060	} else
1061		num_vectors = 1;
1062
1063	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1064		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1065			device_printf(ntb->device,
1066			    "Errata workaround does not support MSI or INTX\n");
1067			return (EINVAL);
1068		}
1069
1070		ntb->db_vec_count = 1;
1071		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1072		rc = intel_ntb_setup_legacy_interrupt(ntb);
1073	} else {
1074		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
1075		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1076			device_printf(ntb->device,
1077			    "Errata workaround expects %d doorbell bits\n",
1078			    XEON_NONLINK_DB_MSIX_BITS);
1079			return (EINVAL);
1080		}
1081
1082		intel_ntb_create_msix_vec(ntb, num_vectors);
1083		rc = intel_ntb_setup_msix(ntb, num_vectors);
1084	}
1085	if (rc != 0) {
1086		device_printf(ntb->device,
1087		    "Error allocating interrupts: %d\n", rc);
1088		intel_ntb_free_msix_vec(ntb);
1089	}
1090
1091	return (rc);
1092}
1093
1094static int
1095intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1096{
1097	int rc;
1098
1099	ntb->int_info[0].rid = 0;
1100	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1101	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1102	if (ntb->int_info[0].res == NULL) {
1103		device_printf(ntb->device, "bus_alloc_resource failed\n");
1104		return (ENOMEM);
1105	}
1106
1107	ntb->int_info[0].tag = NULL;
1108	ntb->allocated_interrupts = 1;
1109
1110	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1111	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1112	    ntb, &ntb->int_info[0].tag);
1113	if (rc != 0) {
1114		device_printf(ntb->device, "bus_setup_intr failed\n");
1115		return (ENXIO);
1116	}
1117
1118	return (0);
1119}
1120
1121static void
1122intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
1123{
1124	struct ntb_int_info *current_int;
1125	int i;
1126
1127	for (i = 0; i < ntb->allocated_interrupts; i++) {
1128		current_int = &ntb->int_info[i];
1129		if (current_int->tag != NULL)
1130			bus_teardown_intr(ntb->device, current_int->res,
1131			    current_int->tag);
1132
1133		if (current_int->res != NULL)
1134			bus_release_resource(ntb->device, SYS_RES_IRQ,
1135			    rman_get_rid(current_int->res), current_int->res);
1136	}
1137
1138	intel_ntb_free_msix_vec(ntb);
1139	pci_release_msi(ntb->device);
1140}
1141
1142/*
1143 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1144 * out to make code clearer.
1145 */
1146static inline uint64_t
1147db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1148{
1149
1150	if (ntb->type == NTB_ATOM)
1151		return (intel_ntb_reg_read(8, regoff));
1152
1153	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1154
1155	return (intel_ntb_reg_read(2, regoff));
1156}
1157
1158static inline void
1159db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1160{
1161
1162	KASSERT((val & ~ntb->db_valid_mask) == 0,
1163	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1164	     (uintmax_t)(val & ~ntb->db_valid_mask),
1165	     (uintmax_t)ntb->db_valid_mask));
1166
1167	if (regoff == ntb->self_reg->db_mask)
1168		DB_MASK_ASSERT(ntb, MA_OWNED);
1169	db_iowrite_raw(ntb, regoff, val);
1170}
1171
1172static inline void
1173db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1174{
1175
1176	if (ntb->type == NTB_ATOM) {
1177		intel_ntb_reg_write(8, regoff, val);
1178		return;
1179	}
1180
1181	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1182	intel_ntb_reg_write(2, regoff, (uint16_t)val);
1183}
1184
1185static void
1186intel_ntb_db_set_mask(device_t dev, uint64_t bits)
1187{
1188	struct ntb_softc *ntb = device_get_softc(dev);
1189
1190	DB_MASK_LOCK(ntb);
1191	ntb->db_mask |= bits;
1192	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1193		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1194	DB_MASK_UNLOCK(ntb);
1195}
1196
1197static void
1198intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
1199{
1200	struct ntb_softc *ntb = device_get_softc(dev);
1201	uint64_t ibits;
1202	int i;
1203
1204	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1205	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1206	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1207	     (uintmax_t)ntb->db_valid_mask));
1208
1209	DB_MASK_LOCK(ntb);
1210	ibits = ntb->fake_db & ntb->db_mask & bits;
1211	ntb->db_mask &= ~bits;
1212	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1213		/* Simulate fake interrupts if unmasked DB bits are set. */
1214		ntb->force_db |= ibits;
1215		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1216			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
1217				swi_sched(ntb->int_info[i].tag, 0);
1218		}
1219	} else {
1220		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1221	}
1222	DB_MASK_UNLOCK(ntb);
1223}
1224
1225static uint64_t
1226intel_ntb_db_read(device_t dev)
1227{
1228	struct ntb_softc *ntb = device_get_softc(dev);
1229
1230	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1231		return (ntb->fake_db);
1232
1233	return (db_ioread(ntb, ntb->self_reg->db_bell));
1234}
1235
1236static void
1237intel_ntb_db_clear(device_t dev, uint64_t bits)
1238{
1239	struct ntb_softc *ntb = device_get_softc(dev);
1240
1241	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1242	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1243	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1244	     (uintmax_t)ntb->db_valid_mask));
1245
1246	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1247		DB_MASK_LOCK(ntb);
1248		ntb->fake_db &= ~bits;
1249		DB_MASK_UNLOCK(ntb);
1250		return;
1251	}
1252
1253	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1254}
1255
1256static inline uint64_t
1257intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1258{
1259	uint64_t shift, mask;
1260
1261	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1262		/*
1263		 * Remap vectors in custom way to make at least first
1264		 * three doorbells to not generate stray events.
1265		 * This breaks Linux compatibility (if one existed)
1266		 * when more then one DB is used (not by if_ntb).
1267		 */
1268		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
1269			return (1 << db_vector);
1270		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
1271			return (0x7ffc);
1272	}
1273
1274	shift = ntb->db_vec_shift;
1275	mask = (1ull << shift) - 1;
1276	return (mask << (shift * db_vector));
1277}
1278
1279static void
1280intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1281{
1282	uint64_t vec_mask;
1283
1284	ntb->last_ts = ticks;
1285	vec_mask = intel_ntb_vec_mask(ntb, vec);
1286
1287	if ((vec_mask & ntb->db_link_mask) != 0) {
1288		if (intel_ntb_poll_link(ntb))
1289			ntb_link_event(ntb->device);
1290	}
1291
1292	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1293	    (vec_mask & ntb->db_link_mask) == 0) {
1294		DB_MASK_LOCK(ntb);
1295
1296		/*
1297		 * Do not report same DB events again if not cleared yet,
1298		 * unless the mask was just cleared for them and this
1299		 * interrupt handler call can be the consequence of it.
1300		 */
1301		vec_mask &= ~ntb->fake_db | ntb->force_db;
1302		ntb->force_db &= ~vec_mask;
1303
1304		/* Update our internal doorbell register. */
1305		ntb->fake_db |= vec_mask;
1306
1307		/* Do not report masked DB events. */
1308		vec_mask &= ~ntb->db_mask;
1309
1310		DB_MASK_UNLOCK(ntb);
1311	}
1312
1313	if ((vec_mask & ntb->db_valid_mask) != 0)
1314		ntb_db_event(ntb->device, vec);
1315}
1316
1317static void
1318ndev_vec_isr(void *arg)
1319{
1320	struct ntb_vec *nvec = arg;
1321
1322	intel_ntb_interrupt(nvec->ntb, nvec->num);
1323}
1324
1325static void
1326ndev_irq_isr(void *arg)
1327{
1328	/* If we couldn't set up MSI-X, we only have the one vector. */
1329	intel_ntb_interrupt(arg, 0);
1330}
1331
1332static int
1333intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1334{
1335	uint32_t i;
1336
1337	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1338	    M_ZERO | M_WAITOK);
1339	for (i = 0; i < num_vectors; i++) {
1340		ntb->msix_vec[i].num = i;
1341		ntb->msix_vec[i].ntb = ntb;
1342	}
1343
1344	return (0);
1345}
1346
1347static void
1348intel_ntb_free_msix_vec(struct ntb_softc *ntb)
1349{
1350
1351	if (ntb->msix_vec == NULL)
1352		return;
1353
1354	free(ntb->msix_vec, M_NTB);
1355	ntb->msix_vec = NULL;
1356}
1357
1358static void
1359intel_ntb_get_msix_info(struct ntb_softc *ntb)
1360{
1361	struct pci_devinfo *dinfo;
1362	struct pcicfg_msix *msix;
1363	uint32_t laddr, data, i, offset;
1364
1365	dinfo = device_get_ivars(ntb->device);
1366	msix = &dinfo->cfg.msix;
1367
1368	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
1369
1370	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
1371		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
1372
1373		laddr = bus_read_4(msix->msix_table_res, offset +
1374		    PCI_MSIX_ENTRY_LOWER_ADDR);
1375		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
1376
1377		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
1378		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
1379		     MSI_INTEL_ADDR_BASE));
1380		ntb->msix_data[i].nmd_ofs = laddr;
1381
1382		data = bus_read_4(msix->msix_table_res, offset +
1383		    PCI_MSIX_ENTRY_DATA);
1384		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
1385
1386		ntb->msix_data[i].nmd_data = data;
1387	}
1388}
1389
1390static struct ntb_hw_info *
1391intel_ntb_get_device_info(uint32_t device_id)
1392{
1393	struct ntb_hw_info *ep = pci_ids;
1394
1395	while (ep->device_id) {
1396		if (ep->device_id == device_id)
1397			return (ep);
1398		++ep;
1399	}
1400	return (NULL);
1401}
1402
1403static void
1404intel_ntb_teardown_xeon(struct ntb_softc *ntb)
1405{
1406
1407	if (ntb->reg != NULL)
1408		intel_ntb_link_disable(ntb->device);
1409}
1410
1411static void
1412intel_ntb_detect_max_mw(struct ntb_softc *ntb)
1413{
1414
1415	if (ntb->type == NTB_ATOM) {
1416		ntb->mw_count = ATOM_MW_COUNT;
1417		return;
1418	}
1419
1420	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1421		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1422	else
1423		ntb->mw_count = XEON_SNB_MW_COUNT;
1424}
1425
1426static int
1427intel_ntb_detect_xeon(struct ntb_softc *ntb)
1428{
1429	uint8_t ppd, conn_type;
1430
1431	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1432	ntb->ppd = ppd;
1433
1434	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1435		ntb->dev_type = NTB_DEV_DSD;
1436	else
1437		ntb->dev_type = NTB_DEV_USD;
1438
1439	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1440		ntb->features |= NTB_SPLIT_BAR;
1441
1442	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
1443	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1444		device_printf(ntb->device,
1445		    "Can not apply SB01BASE_LOCKUP workaround "
1446		    "with split BARs disabled!\n");
1447		device_printf(ntb->device,
1448		    "Expect system hangs under heavy NTB traffic!\n");
1449		ntb->features &= ~NTB_SB01BASE_LOCKUP;
1450	}
1451
1452	/*
1453	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
1454	 * errata workaround; only do one at a time.
1455	 */
1456	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
1457		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
1458
1459	conn_type = ppd & XEON_PPD_CONN_TYPE;
1460	switch (conn_type) {
1461	case NTB_CONN_B2B:
1462		ntb->conn_type = conn_type;
1463		break;
1464	case NTB_CONN_RP:
1465	case NTB_CONN_TRANSPARENT:
1466	default:
1467		device_printf(ntb->device, "Unsupported connection type: %u\n",
1468		    (unsigned)conn_type);
1469		return (ENXIO);
1470	}
1471	return (0);
1472}
1473
1474static int
1475intel_ntb_detect_atom(struct ntb_softc *ntb)
1476{
1477	uint32_t ppd, conn_type;
1478
1479	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1480	ntb->ppd = ppd;
1481
1482	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1483		ntb->dev_type = NTB_DEV_DSD;
1484	else
1485		ntb->dev_type = NTB_DEV_USD;
1486
1487	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1488	switch (conn_type) {
1489	case NTB_CONN_B2B:
1490		ntb->conn_type = conn_type;
1491		break;
1492	default:
1493		device_printf(ntb->device, "Unsupported NTB configuration\n");
1494		return (ENXIO);
1495	}
1496	return (0);
1497}
1498
1499static int
1500intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
1501{
1502	int rc;
1503
1504	ntb->spad_count		= XEON_SPAD_COUNT;
1505	ntb->db_count		= XEON_DB_COUNT;
1506	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1507	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1508	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1509
1510	if (ntb->conn_type != NTB_CONN_B2B) {
1511		device_printf(ntb->device, "Connection type %d not supported\n",
1512		    ntb->conn_type);
1513		return (ENXIO);
1514	}
1515
1516	ntb->reg = &xeon_reg;
1517	ntb->self_reg = &xeon_pri_reg;
1518	ntb->peer_reg = &xeon_b2b_reg;
1519	ntb->xlat_reg = &xeon_sec_xlat;
1520
1521	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1522		ntb->force_db = ntb->fake_db = 0;
1523		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
1524		    ntb->mw_count;
1525		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
1526		    g_ntb_msix_idx, ntb->msix_mw_idx);
1527		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
1528		    VM_MEMATTR_UNCACHEABLE);
1529		KASSERT(rc == 0, ("shouldn't fail"));
1530	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
1531		/*
1532		 * There is a Xeon hardware errata related to writes to SDOORBELL or
1533		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1534		 * which may hang the system.  To workaround this, use a memory
1535		 * window to access the interrupt and scratch pad registers on the
1536		 * remote system.
1537		 */
1538		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1539		    ntb->mw_count;
1540		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1541		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1542		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
1543		    VM_MEMATTR_UNCACHEABLE);
1544		KASSERT(rc == 0, ("shouldn't fail"));
1545	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
1546		/*
1547		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1548		 * mirrored to the remote system.  Shrink the number of bits by one,
1549		 * since bit 14 is the last bit.
1550		 *
1551		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1552		 * anyway.  Nor for non-B2B connection types.
1553		 */
1554		ntb->db_count = XEON_DB_COUNT - 1;
1555
1556	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1557
1558	if (ntb->dev_type == NTB_DEV_USD)
1559		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1560		    &xeon_b2b_usd_addr);
1561	else
1562		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1563		    &xeon_b2b_dsd_addr);
1564	if (rc != 0)
1565		return (rc);
1566
1567	/* Enable Bus Master and Memory Space on the secondary side */
1568	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1569	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1570
1571	/*
1572	 * Mask all doorbell interrupts.
1573	 */
1574	DB_MASK_LOCK(ntb);
1575	ntb->db_mask = ntb->db_valid_mask;
1576	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1577	DB_MASK_UNLOCK(ntb);
1578
1579	rc = intel_ntb_init_isr(ntb);
1580	return (rc);
1581}
1582
1583static int
1584intel_ntb_atom_init_dev(struct ntb_softc *ntb)
1585{
1586	int error;
1587
1588	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1589	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1590
1591	ntb->spad_count		 = ATOM_SPAD_COUNT;
1592	ntb->db_count		 = ATOM_DB_COUNT;
1593	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1594	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1595	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1596
1597	ntb->reg = &atom_reg;
1598	ntb->self_reg = &atom_pri_reg;
1599	ntb->peer_reg = &atom_b2b_reg;
1600	ntb->xlat_reg = &atom_sec_xlat;
1601
1602	/*
1603	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1604	 * resolved.  Mask transaction layer internal parity errors.
1605	 */
1606	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1607
1608	configure_atom_secondary_side_bars(ntb);
1609
1610	/* Enable Bus Master and Memory Space on the secondary side */
1611	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1612	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1613
1614	error = intel_ntb_init_isr(ntb);
1615	if (error != 0)
1616		return (error);
1617
1618	/* Initiate PCI-E link training */
1619	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1620
1621	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1622
1623	return (0);
1624}
1625
1626/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1627static void
1628configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1629{
1630
1631	if (ntb->dev_type == NTB_DEV_USD) {
1632		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1633		    XEON_B2B_BAR2_ADDR64);
1634		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1635		    XEON_B2B_BAR4_ADDR64);
1636		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1637		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1638	} else {
1639		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1640		    XEON_B2B_BAR2_ADDR64);
1641		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1642		    XEON_B2B_BAR4_ADDR64);
1643		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1644		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1645	}
1646}
1647
1648
1649/*
1650 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1651 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1652 * remains for use by a higher layer.
1653 *
1654 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1655 * MW size is sufficiently large.
1656 */
1657static unsigned int ntb_b2b_mw_share;
1658SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1659    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1660    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1661    "value here.");
1662
1663static void
1664xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1665    enum ntb_bar regbar)
1666{
1667	struct ntb_pci_bar_info *bar;
1668	uint8_t bar_sz;
1669
1670	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1671		return;
1672
1673	bar = &ntb->bar_info[idx];
1674	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1675	if (idx == regbar) {
1676		if (ntb->b2b_off != 0)
1677			bar_sz--;
1678		else
1679			bar_sz = 0;
1680	}
1681	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1682	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1683	(void)bar_sz;
1684}
1685
1686static void
1687xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1688    enum ntb_bar idx, enum ntb_bar regbar)
1689{
1690	uint64_t reg_val;
1691	uint32_t base_reg, lmt_reg;
1692
1693	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1694	if (idx == regbar) {
1695		if (ntb->b2b_off)
1696			bar_addr += ntb->b2b_off;
1697		else
1698			bar_addr = 0;
1699	}
1700
1701	if (!bar_is_64bit(ntb, idx)) {
1702		intel_ntb_reg_write(4, base_reg, bar_addr);
1703		reg_val = intel_ntb_reg_read(4, base_reg);
1704		(void)reg_val;
1705
1706		intel_ntb_reg_write(4, lmt_reg, bar_addr);
1707		reg_val = intel_ntb_reg_read(4, lmt_reg);
1708		(void)reg_val;
1709	} else {
1710		intel_ntb_reg_write(8, base_reg, bar_addr);
1711		reg_val = intel_ntb_reg_read(8, base_reg);
1712		(void)reg_val;
1713
1714		intel_ntb_reg_write(8, lmt_reg, bar_addr);
1715		reg_val = intel_ntb_reg_read(8, lmt_reg);
1716		(void)reg_val;
1717	}
1718}
1719
1720static void
1721xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1722{
1723	struct ntb_pci_bar_info *bar;
1724
1725	bar = &ntb->bar_info[idx];
1726	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1727		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1728		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
1729	} else {
1730		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1731		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
1732	}
1733	(void)base_addr;
1734}
1735
1736static int
1737xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1738    const struct ntb_b2b_addr *peer_addr)
1739{
1740	struct ntb_pci_bar_info *b2b_bar;
1741	vm_size_t bar_size;
1742	uint64_t bar_addr;
1743	enum ntb_bar b2b_bar_num, i;
1744
1745	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1746		b2b_bar = NULL;
1747		b2b_bar_num = NTB_CONFIG_BAR;
1748		ntb->b2b_off = 0;
1749	} else {
1750		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1751		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1752		    ("invalid b2b mw bar"));
1753
1754		b2b_bar = &ntb->bar_info[b2b_bar_num];
1755		bar_size = b2b_bar->size;
1756
1757		if (ntb_b2b_mw_share != 0 &&
1758		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1759			ntb->b2b_off = bar_size >> 1;
1760		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1761			ntb->b2b_off = 0;
1762		} else {
1763			device_printf(ntb->device,
1764			    "B2B bar size is too small!\n");
1765			return (EIO);
1766		}
1767	}
1768
1769	/*
1770	 * Reset the secondary bar sizes to match the primary bar sizes.
1771	 * (Except, disable or halve the size of the B2B secondary bar.)
1772	 */
1773	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1774		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1775
1776	bar_addr = 0;
1777	if (b2b_bar_num == NTB_CONFIG_BAR)
1778		bar_addr = addr->bar0_addr;
1779	else if (b2b_bar_num == NTB_B2B_BAR_1)
1780		bar_addr = addr->bar2_addr64;
1781	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1782		bar_addr = addr->bar4_addr64;
1783	else if (b2b_bar_num == NTB_B2B_BAR_2)
1784		bar_addr = addr->bar4_addr32;
1785	else if (b2b_bar_num == NTB_B2B_BAR_3)
1786		bar_addr = addr->bar5_addr32;
1787	else
1788		KASSERT(false, ("invalid bar"));
1789
1790	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1791
1792	/*
1793	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1794	 * register BAR.  The B2B BAR is either disabled above or configured
1795	 * half-size.  It starts at PBAR xlat + offset.
1796	 *
1797	 * Also set up incoming BAR limits == base (zero length window).
1798	 */
1799	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1800	    b2b_bar_num);
1801	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1802		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1803		    NTB_B2B_BAR_2, b2b_bar_num);
1804		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1805		    NTB_B2B_BAR_3, b2b_bar_num);
1806	} else
1807		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1808		    NTB_B2B_BAR_2, b2b_bar_num);
1809
1810	/* Zero incoming translation addrs */
1811	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1812	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1813
1814	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
1815		uint32_t xlat_reg, lmt_reg;
1816		enum ntb_bar bar_num;
1817
1818		/*
1819		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
1820		 * workaround
1821		 */
1822		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
1823		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
1824		if (bar_is_64bit(ntb, bar_num)) {
1825			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
1826			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
1827			intel_ntb_reg_write(8, lmt_reg, 0);
1828		} else {
1829			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
1830			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
1831			intel_ntb_reg_write(4, lmt_reg, 0);
1832		}
1833
1834		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
1835	}
1836	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
1837	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
1838
1839	/* Zero outgoing translation limits (whole bar size windows) */
1840	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1841	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1842
1843	/* Set outgoing translation offsets */
1844	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1845	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
1846		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1847		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1848	} else
1849		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1850
1851	/* Set the translation offset for B2B registers */
1852	bar_addr = 0;
1853	if (b2b_bar_num == NTB_CONFIG_BAR)
1854		bar_addr = peer_addr->bar0_addr;
1855	else if (b2b_bar_num == NTB_B2B_BAR_1)
1856		bar_addr = peer_addr->bar2_addr64;
1857	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
1858		bar_addr = peer_addr->bar4_addr64;
1859	else if (b2b_bar_num == NTB_B2B_BAR_2)
1860		bar_addr = peer_addr->bar4_addr32;
1861	else if (b2b_bar_num == NTB_B2B_BAR_3)
1862		bar_addr = peer_addr->bar5_addr32;
1863	else
1864		KASSERT(false, ("invalid bar"));
1865
1866	/*
1867	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1868	 * at a time.
1869	 */
1870	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1871	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1872	return (0);
1873}
1874
1875static inline bool
1876_xeon_link_is_up(struct ntb_softc *ntb)
1877{
1878
1879	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1880		return (true);
1881	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1882}
1883
1884static inline bool
1885link_is_up(struct ntb_softc *ntb)
1886{
1887
1888	if (ntb->type == NTB_XEON)
1889		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
1890		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
1891
1892	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1893	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1894}
1895
1896static inline bool
1897atom_link_is_err(struct ntb_softc *ntb)
1898{
1899	uint32_t status;
1900
1901	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1902
1903	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1904	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1905		return (true);
1906
1907	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1908	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1909}
1910
1911/* Atom does not have link status interrupt, poll on that platform */
1912static void
1913atom_link_hb(void *arg)
1914{
1915	struct ntb_softc *ntb = arg;
1916	sbintime_t timo, poll_ts;
1917
1918	timo = NTB_HB_TIMEOUT * hz;
1919	poll_ts = ntb->last_ts + timo;
1920
1921	/*
1922	 * Delay polling the link status if an interrupt was received, unless
1923	 * the cached link status says the link is down.
1924	 */
1925	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1926		timo = poll_ts - ticks;
1927		goto out;
1928	}
1929
1930	if (intel_ntb_poll_link(ntb))
1931		ntb_link_event(ntb->device);
1932
1933	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1934		/* Link is down with error, proceed with recovery */
1935		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1936		return;
1937	}
1938
1939out:
1940	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1941}
1942
1943static void
1944atom_perform_link_restart(struct ntb_softc *ntb)
1945{
1946	uint32_t status;
1947
1948	/* Driver resets the NTB ModPhy lanes - magic! */
1949	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1950	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1951	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1952	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1953
1954	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1955	pause("ModPhy", hz / 10);
1956
1957	/* Clear AER Errors, write to clear */
1958	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1959	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1960	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1961
1962	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1963	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1964	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1965	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1966
1967	/* Clear DeSkew Buffer error, write to clear */
1968	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1969	status |= ATOM_DESKEWSTS_DBERR;
1970	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1971
1972	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1973	status &= ATOM_IBIST_ERR_OFLOW;
1974	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1975
1976	/* Releases the NTB state machine to allow the link to retrain */
1977	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1978	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1979	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1980}
1981
1982static int
1983intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
1984    enum ntb_width width __unused)
1985{
1986	struct ntb_softc *ntb = device_get_softc(dev);
1987	uint32_t cntl;
1988
1989	intel_ntb_printf(2, "%s\n", __func__);
1990
1991	if (ntb->type == NTB_ATOM) {
1992		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1993		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1994		return (0);
1995	}
1996
1997	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1998		ntb_link_event(dev);
1999		return (0);
2000	}
2001
2002	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2003	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
2004	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
2005	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
2006	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2007		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
2008	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2009	return (0);
2010}
2011
2012static int
2013intel_ntb_link_disable(device_t dev)
2014{
2015	struct ntb_softc *ntb = device_get_softc(dev);
2016	uint32_t cntl;
2017
2018	intel_ntb_printf(2, "%s\n", __func__);
2019
2020	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
2021		ntb_link_event(dev);
2022		return (0);
2023	}
2024
2025	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2026	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
2027	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
2028	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
2029		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
2030	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
2031	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
2032	return (0);
2033}
2034
2035static bool
2036intel_ntb_link_enabled(device_t dev)
2037{
2038	struct ntb_softc *ntb = device_get_softc(dev);
2039	uint32_t cntl;
2040
2041	if (ntb->type == NTB_ATOM) {
2042		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
2043		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
2044	}
2045
2046	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
2047		return (true);
2048
2049	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2050	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
2051}
2052
2053static void
2054recover_atom_link(void *arg)
2055{
2056	struct ntb_softc *ntb = arg;
2057	unsigned speed, width, oldspeed, oldwidth;
2058	uint32_t status32;
2059
2060	atom_perform_link_restart(ntb);
2061
2062	/*
2063	 * There is a potential race between the 2 NTB devices recovering at
2064	 * the same time.  If the times are the same, the link will not recover
2065	 * and the driver will be stuck in this loop forever.  Add a random
2066	 * interval to the recovery time to prevent this race.
2067	 */
2068	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
2069	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
2070
2071	if (atom_link_is_err(ntb))
2072		goto retry;
2073
2074	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2075	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
2076		goto out;
2077
2078	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2079	width = NTB_LNK_STA_WIDTH(status32);
2080	speed = status32 & NTB_LINK_SPEED_MASK;
2081
2082	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
2083	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
2084	if (oldwidth != width || oldspeed != speed)
2085		goto retry;
2086
2087out:
2088	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
2089	    ntb);
2090	return;
2091
2092retry:
2093	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
2094	    ntb);
2095}
2096
2097/*
2098 * Polls the HW link status register(s); returns true if something has changed.
2099 */
2100static bool
2101intel_ntb_poll_link(struct ntb_softc *ntb)
2102{
2103	uint32_t ntb_cntl;
2104	uint16_t reg_val;
2105
2106	if (ntb->type == NTB_ATOM) {
2107		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
2108		if (ntb_cntl == ntb->ntb_ctl)
2109			return (false);
2110
2111		ntb->ntb_ctl = ntb_cntl;
2112		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
2113	} else {
2114		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
2115
2116		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2117		if (reg_val == ntb->lnk_sta)
2118			return (false);
2119
2120		ntb->lnk_sta = reg_val;
2121
2122		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2123			if (_xeon_link_is_up(ntb)) {
2124				if (!ntb->peer_msix_good) {
2125					callout_reset(&ntb->peer_msix_work, 0,
2126					    intel_ntb_exchange_msix, ntb);
2127					return (false);
2128				}
2129			} else {
2130				ntb->peer_msix_good = false;
2131				ntb->peer_msix_done = false;
2132			}
2133		}
2134	}
2135	return (true);
2136}
2137
2138static inline enum ntb_speed
2139intel_ntb_link_sta_speed(struct ntb_softc *ntb)
2140{
2141
2142	if (!link_is_up(ntb))
2143		return (NTB_SPEED_NONE);
2144	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2145}
2146
2147static inline enum ntb_width
2148intel_ntb_link_sta_width(struct ntb_softc *ntb)
2149{
2150
2151	if (!link_is_up(ntb))
2152		return (NTB_WIDTH_NONE);
2153	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2154}
2155
2156SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2157    "Driver state, statistics, and HW registers");
2158
2159#define NTB_REGSZ_MASK	(3ul << 30)
2160#define NTB_REG_64	(1ul << 30)
2161#define NTB_REG_32	(2ul << 30)
2162#define NTB_REG_16	(3ul << 30)
2163#define NTB_REG_8	(0ul << 30)
2164
2165#define NTB_DB_READ	(1ul << 29)
2166#define NTB_PCI_REG	(1ul << 28)
2167#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2168
2169static void
2170intel_ntb_sysctl_init(struct ntb_softc *ntb)
2171{
2172	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
2173	struct sysctl_ctx_list *ctx;
2174	struct sysctl_oid *tree, *tmptree;
2175
2176	ctx = device_get_sysctl_ctx(ntb->device);
2177	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
2178
2179	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
2180	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
2181	    sysctl_handle_link_status_human, "A",
2182	    "Link status (human readable)");
2183	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
2184	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
2185	    "IU", "Link status (1=active, 0=inactive)");
2186	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
2187	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
2188	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
2189
2190	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
2191	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
2192	tree_par = SYSCTL_CHILDREN(tree);
2193
2194	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2195	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2196	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2197	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2198	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2199	    &ntb->ppd, 0, "Raw PPD register (cached)");
2200
2201	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2202		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2203		    &ntb->b2b_mw_idx, 0,
2204		    "Index of the MW used for B2B remote register access");
2205		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2206		    CTLFLAG_RD, &ntb->b2b_off,
2207		    "If non-zero, offset of B2B register region in shared MW");
2208	}
2209
2210	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2211	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2212	    "Features/errata of this NTB device");
2213
2214	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2215	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2216	    "NTB CTL register (cached)");
2217	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2218	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2219	    "LNK STA register (cached)");
2220
2221	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2222	    &ntb->mw_count, 0, "MW count");
2223	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2224	    &ntb->spad_count, 0, "Scratchpad count");
2225	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2226	    &ntb->db_count, 0, "Doorbell count");
2227	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2228	    &ntb->db_vec_count, 0, "Doorbell vector count");
2229	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2230	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2231
2232	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2233	    &ntb->db_valid_mask, "Doorbell valid mask");
2234	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2235	    &ntb->db_link_mask, "Doorbell link mask");
2236	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2237	    &ntb->db_mask, "Doorbell mask (cached)");
2238
2239	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2240	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2241	regpar = SYSCTL_CHILDREN(tmptree);
2242
2243	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2244	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2245	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2246	    "NTB Control register");
2247	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2248	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2249	    0x19c, sysctl_handle_register, "IU",
2250	    "NTB Link Capabilities");
2251	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2252	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2253	    0x1a0, sysctl_handle_register, "IU",
2254	    "NTB Link Control register");
2255
2256	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2257	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2258	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2259	    sysctl_handle_register, "QU", "Doorbell mask register");
2260	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2261	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2262	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2263	    sysctl_handle_register, "QU", "Doorbell register");
2264
2265	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2266	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2267	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2268	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2269	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2270		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2271		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2272		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2273		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2274		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2275		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2276		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2277		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2278	} else {
2279		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2280		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2281		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2282		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2283	}
2284
2285	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2286	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2287	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2288	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2289	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2290		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2291		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2292		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2293		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2294		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2295		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2296		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2297		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2298	} else {
2299		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2300		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2301		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2302		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2303	}
2304
2305	if (ntb->type == NTB_ATOM)
2306		return;
2307
2308	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2309	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2310	statpar = SYSCTL_CHILDREN(tmptree);
2311	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2312	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2313	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2314	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2315
2316	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2317	    CTLFLAG_RD, NULL, "Xeon HW errors");
2318	errpar = SYSCTL_CHILDREN(tmptree);
2319
2320	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2321	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2322	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2323	    sysctl_handle_register, "CU", "PPD");
2324
2325	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2326	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2327	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2328	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2329	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2330	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2331	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2332	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2333	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2334	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2335	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2336	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2337
2338	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2339	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2340	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2341	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2342	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2343	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2344	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2345	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2346	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2347	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2348	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2349	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2350
2351	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2352	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2353	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2354	    sysctl_handle_register, "SU", "DEVSTS");
2355	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2356	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2357	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2358	    sysctl_handle_register, "SU", "LNKSTS");
2359	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2360	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2361	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2362	    sysctl_handle_register, "SU", "SLNKSTS");
2363
2364	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2365	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2366	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2367	    sysctl_handle_register, "IU", "UNCERRSTS");
2368	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2369	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2370	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2371	    sysctl_handle_register, "IU", "CORERRSTS");
2372
2373	if (ntb->conn_type != NTB_CONN_B2B)
2374		return;
2375
2376	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2377	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2378	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2379	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2380	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2381		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2382		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2383		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2384		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2385		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2386		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2387		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2388		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2389	} else {
2390		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2391		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2392		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2393		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2394	}
2395
2396	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2397	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2398	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2399	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2400	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2401		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2402		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2403		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2404		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2405		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2406		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2407		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2408		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2409	} else {
2410		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2411		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2412		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2413		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2414	}
2415
2416	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2417	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2418	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2419	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2420	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2421	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2422	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2423	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2424	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
2425		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2426		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2427		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2428		    sysctl_handle_register, "IU",
2429		    "Secondary BAR4 base register");
2430		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2431		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2432		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2433		    sysctl_handle_register, "IU",
2434		    "Secondary BAR5 base register");
2435	} else {
2436		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2437		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2438		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2439		    sysctl_handle_register, "QU",
2440		    "Secondary BAR45 base register");
2441	}
2442}
2443
2444static int
2445sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2446{
2447	struct ntb_softc *ntb = arg1;
2448	struct sbuf sb;
2449	int error;
2450
2451	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2452
2453	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2454	error = sbuf_finish(&sb);
2455	sbuf_delete(&sb);
2456
2457	if (error || !req->newptr)
2458		return (error);
2459	return (EINVAL);
2460}
2461
2462static int
2463sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
2464{
2465	struct ntb_softc *ntb = arg1;
2466	unsigned old, new;
2467	int error;
2468
2469	old = intel_ntb_link_enabled(ntb->device);
2470
2471	error = SYSCTL_OUT(req, &old, sizeof(old));
2472	if (error != 0 || req->newptr == NULL)
2473		return (error);
2474
2475	error = SYSCTL_IN(req, &new, sizeof(new));
2476	if (error != 0)
2477		return (error);
2478
2479	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
2480	    (new != 0)? "en" : "dis");
2481
2482	if (new != 0)
2483		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
2484	else
2485		error = intel_ntb_link_disable(ntb->device);
2486	return (error);
2487}
2488
2489static int
2490sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
2491{
2492	struct ntb_softc *ntb = arg1;
2493	struct sbuf sb;
2494	enum ntb_speed speed;
2495	enum ntb_width width;
2496	int error;
2497
2498	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2499
2500	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
2501		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2502		    (unsigned)speed, (unsigned)width);
2503	else
2504		sbuf_printf(&sb, "down");
2505
2506	error = sbuf_finish(&sb);
2507	sbuf_delete(&sb);
2508
2509	if (error || !req->newptr)
2510		return (error);
2511	return (EINVAL);
2512}
2513
2514static int
2515sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2516{
2517	struct ntb_softc *ntb = arg1;
2518	unsigned res;
2519	int error;
2520
2521	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
2522
2523	error = SYSCTL_OUT(req, &res, sizeof(res));
2524	if (error || !req->newptr)
2525		return (error);
2526	return (EINVAL);
2527}
2528
2529static int
2530sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2531{
2532	struct ntb_softc *ntb;
2533	const void *outp;
2534	uintptr_t sz;
2535	uint64_t umv;
2536	char be[sizeof(umv)];
2537	size_t outsz;
2538	uint32_t reg;
2539	bool db, pci;
2540	int error;
2541
2542	ntb = arg1;
2543	reg = arg2 & ~NTB_REGFLAGS_MASK;
2544	sz = arg2 & NTB_REGSZ_MASK;
2545	db = (arg2 & NTB_DB_READ) != 0;
2546	pci = (arg2 & NTB_PCI_REG) != 0;
2547
2548	KASSERT(!(db && pci), ("bogus"));
2549
2550	if (db) {
2551		KASSERT(sz == NTB_REG_64, ("bogus"));
2552		umv = db_ioread(ntb, reg);
2553		outsz = sizeof(uint64_t);
2554	} else {
2555		switch (sz) {
2556		case NTB_REG_64:
2557			if (pci)
2558				umv = pci_read_config(ntb->device, reg, 8);
2559			else
2560				umv = intel_ntb_reg_read(8, reg);
2561			outsz = sizeof(uint64_t);
2562			break;
2563		case NTB_REG_32:
2564			if (pci)
2565				umv = pci_read_config(ntb->device, reg, 4);
2566			else
2567				umv = intel_ntb_reg_read(4, reg);
2568			outsz = sizeof(uint32_t);
2569			break;
2570		case NTB_REG_16:
2571			if (pci)
2572				umv = pci_read_config(ntb->device, reg, 2);
2573			else
2574				umv = intel_ntb_reg_read(2, reg);
2575			outsz = sizeof(uint16_t);
2576			break;
2577		case NTB_REG_8:
2578			if (pci)
2579				umv = pci_read_config(ntb->device, reg, 1);
2580			else
2581				umv = intel_ntb_reg_read(1, reg);
2582			outsz = sizeof(uint8_t);
2583			break;
2584		default:
2585			panic("bogus");
2586			break;
2587		}
2588	}
2589
2590	/* Encode bigendian so that sysctl -x is legible. */
2591	be64enc(be, umv);
2592	outp = ((char *)be) + sizeof(umv) - outsz;
2593
2594	error = SYSCTL_OUT(req, outp, outsz);
2595	if (error || !req->newptr)
2596		return (error);
2597	return (EINVAL);
2598}
2599
2600static unsigned
2601intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2602{
2603
2604	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2605	    uidx >= ntb->b2b_mw_idx) ||
2606	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2607		uidx++;
2608	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2609	    uidx >= ntb->b2b_mw_idx) &&
2610	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
2611		uidx++;
2612	return (uidx);
2613}
2614
2615#ifndef EARLY_AP_STARTUP
2616static int msix_ready;
2617
2618static void
2619intel_ntb_msix_ready(void *arg __unused)
2620{
2621
2622	msix_ready = 1;
2623}
2624SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
2625    intel_ntb_msix_ready, NULL);
2626#endif
2627
2628static void
2629intel_ntb_exchange_msix(void *ctx)
2630{
2631	struct ntb_softc *ntb;
2632	uint32_t val;
2633	unsigned i;
2634
2635	ntb = ctx;
2636
2637	if (ntb->peer_msix_good)
2638		goto msix_good;
2639	if (ntb->peer_msix_done)
2640		goto msix_done;
2641
2642#ifndef EARLY_AP_STARTUP
2643	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
2644	if (!msix_ready)
2645		goto reschedule;
2646#endif
2647
2648	intel_ntb_get_msix_info(ntb);
2649	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2650		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
2651		    ntb->msix_data[i].nmd_data);
2652		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
2653		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
2654	}
2655	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
2656
2657	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
2658	if (val != NTB_MSIX_VER_GUARD)
2659		goto reschedule;
2660
2661	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2662		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
2663		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
2664		ntb->peer_msix_data[i].nmd_data = val;
2665		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
2666		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
2667		ntb->peer_msix_data[i].nmd_ofs = val;
2668	}
2669
2670	ntb->peer_msix_done = true;
2671
2672msix_done:
2673	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
2674	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
2675	if (val != NTB_MSIX_RECEIVED)
2676		goto reschedule;
2677
2678	intel_ntb_spad_clear(ntb->device);
2679	ntb->peer_msix_good = true;
2680	/* Give peer time to see our NTB_MSIX_RECEIVED. */
2681	goto reschedule;
2682
2683msix_good:
2684	intel_ntb_poll_link(ntb);
2685	ntb_link_event(ntb->device);
2686	return;
2687
2688reschedule:
2689	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2690	if (_xeon_link_is_up(ntb)) {
2691		callout_reset(&ntb->peer_msix_work,
2692		    hz * (ntb->peer_msix_good ? 2 : 1) / 10,
2693		    intel_ntb_exchange_msix, ntb);
2694	} else
2695		intel_ntb_spad_clear(ntb->device);
2696}
2697
2698/*
2699 * Public API to the rest of the OS
2700 */
2701
2702static uint8_t
2703intel_ntb_spad_count(device_t dev)
2704{
2705	struct ntb_softc *ntb = device_get_softc(dev);
2706
2707	return (ntb->spad_count);
2708}
2709
2710static uint8_t
2711intel_ntb_mw_count(device_t dev)
2712{
2713	struct ntb_softc *ntb = device_get_softc(dev);
2714	uint8_t res;
2715
2716	res = ntb->mw_count;
2717	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2718		res--;
2719	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
2720		res--;
2721	return (res);
2722}
2723
2724static int
2725intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
2726{
2727	struct ntb_softc *ntb = device_get_softc(dev);
2728
2729	if (idx >= ntb->spad_count)
2730		return (EINVAL);
2731
2732	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2733
2734	return (0);
2735}
2736
2737/*
2738 * Zeros the local scratchpad.
2739 */
2740static void
2741intel_ntb_spad_clear(device_t dev)
2742{
2743	struct ntb_softc *ntb = device_get_softc(dev);
2744	unsigned i;
2745
2746	for (i = 0; i < ntb->spad_count; i++)
2747		intel_ntb_spad_write(dev, i, 0);
2748}
2749
2750static int
2751intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2752{
2753	struct ntb_softc *ntb = device_get_softc(dev);
2754
2755	if (idx >= ntb->spad_count)
2756		return (EINVAL);
2757
2758	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2759
2760	return (0);
2761}
2762
2763static int
2764intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
2765{
2766	struct ntb_softc *ntb = device_get_softc(dev);
2767
2768	if (idx >= ntb->spad_count)
2769		return (EINVAL);
2770
2771	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2772		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2773	else
2774		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2775
2776	return (0);
2777}
2778
2779static int
2780intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
2781{
2782	struct ntb_softc *ntb = device_get_softc(dev);
2783
2784	if (idx >= ntb->spad_count)
2785		return (EINVAL);
2786
2787	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
2788		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2789	else
2790		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2791
2792	return (0);
2793}
2794
2795static int
2796intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
2797    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2798    bus_addr_t *plimit)
2799{
2800	struct ntb_softc *ntb = device_get_softc(dev);
2801	struct ntb_pci_bar_info *bar;
2802	bus_addr_t limit;
2803	size_t bar_b2b_off;
2804	enum ntb_bar bar_num;
2805
2806	if (mw_idx >= intel_ntb_mw_count(dev))
2807		return (EINVAL);
2808	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
2809
2810	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
2811	bar = &ntb->bar_info[bar_num];
2812	bar_b2b_off = 0;
2813	if (mw_idx == ntb->b2b_mw_idx) {
2814		KASSERT(ntb->b2b_off != 0,
2815		    ("user shouldn't get non-shared b2b mw"));
2816		bar_b2b_off = ntb->b2b_off;
2817	}
2818
2819	if (bar_is_64bit(ntb, bar_num))
2820		limit = BUS_SPACE_MAXADDR;
2821	else
2822		limit = BUS_SPACE_MAXADDR_32BIT;
2823
2824	if (base != NULL)
2825		*base = bar->pbase + bar_b2b_off;
2826	if (vbase != NULL)
2827		*vbase = bar->vbase + bar_b2b_off;
2828	if (size != NULL)
2829		*size = bar->size - bar_b2b_off;
2830	if (align != NULL)
2831		*align = bar->size;
2832	if (align_size != NULL)
2833		*align_size = 1;
2834	if (plimit != NULL)
2835		*plimit = limit;
2836	return (0);
2837}
2838
2839static int
2840intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
2841{
2842	struct ntb_softc *ntb = device_get_softc(dev);
2843	struct ntb_pci_bar_info *bar;
2844	uint64_t base, limit, reg_val;
2845	size_t bar_size, mw_size;
2846	uint32_t base_reg, xlat_reg, limit_reg;
2847	enum ntb_bar bar_num;
2848
2849	if (idx >= intel_ntb_mw_count(dev))
2850		return (EINVAL);
2851	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2852
2853	bar_num = intel_ntb_mw_to_bar(ntb, idx);
2854	bar = &ntb->bar_info[bar_num];
2855
2856	bar_size = bar->size;
2857	if (idx == ntb->b2b_mw_idx)
2858		mw_size = bar_size - ntb->b2b_off;
2859	else
2860		mw_size = bar_size;
2861
2862	/* Hardware requires that addr is aligned to bar size */
2863	if ((addr & (bar_size - 1)) != 0)
2864		return (EINVAL);
2865
2866	if (size > mw_size)
2867		return (EINVAL);
2868
2869	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2870
2871	limit = 0;
2872	if (bar_is_64bit(ntb, bar_num)) {
2873		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2874
2875		if (limit_reg != 0 && size != mw_size)
2876			limit = base + size;
2877
2878		/* Set and verify translation address */
2879		intel_ntb_reg_write(8, xlat_reg, addr);
2880		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2881		if (reg_val != addr) {
2882			intel_ntb_reg_write(8, xlat_reg, 0);
2883			return (EIO);
2884		}
2885
2886		/* Set and verify the limit */
2887		intel_ntb_reg_write(8, limit_reg, limit);
2888		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2889		if (reg_val != limit) {
2890			intel_ntb_reg_write(8, limit_reg, base);
2891			intel_ntb_reg_write(8, xlat_reg, 0);
2892			return (EIO);
2893		}
2894	} else {
2895		/* Configure 32-bit (split) BAR MW */
2896
2897		if ((addr & UINT32_MAX) != addr)
2898			return (ERANGE);
2899		if (((addr + size) & UINT32_MAX) != (addr + size))
2900			return (ERANGE);
2901
2902		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2903
2904		if (limit_reg != 0 && size != mw_size)
2905			limit = base + size;
2906
2907		/* Set and verify translation address */
2908		intel_ntb_reg_write(4, xlat_reg, addr);
2909		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2910		if (reg_val != addr) {
2911			intel_ntb_reg_write(4, xlat_reg, 0);
2912			return (EIO);
2913		}
2914
2915		/* Set and verify the limit */
2916		intel_ntb_reg_write(4, limit_reg, limit);
2917		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
2918		if (reg_val != limit) {
2919			intel_ntb_reg_write(4, limit_reg, base);
2920			intel_ntb_reg_write(4, xlat_reg, 0);
2921			return (EIO);
2922		}
2923	}
2924	return (0);
2925}
2926
2927static int
2928intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
2929{
2930
2931	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
2932}
2933
2934static int
2935intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
2936{
2937	struct ntb_softc *ntb = device_get_softc(dev);
2938	struct ntb_pci_bar_info *bar;
2939
2940	if (idx >= intel_ntb_mw_count(dev))
2941		return (EINVAL);
2942	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2943
2944	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
2945	*mode = bar->map_mode;
2946	return (0);
2947}
2948
2949static int
2950intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
2951{
2952	struct ntb_softc *ntb = device_get_softc(dev);
2953
2954	if (idx >= intel_ntb_mw_count(dev))
2955		return (EINVAL);
2956
2957	idx = intel_ntb_user_mw_to_idx(ntb, idx);
2958	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
2959}
2960
2961static int
2962intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
2963{
2964	struct ntb_pci_bar_info *bar;
2965	int rc;
2966
2967	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
2968	if (bar->map_mode == mode)
2969		return (0);
2970
2971	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
2972	if (rc == 0)
2973		bar->map_mode = mode;
2974
2975	return (rc);
2976}
2977
2978static void
2979intel_ntb_peer_db_set(device_t dev, uint64_t bit)
2980{
2981	struct ntb_softc *ntb = device_get_softc(dev);
2982
2983	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
2984		struct ntb_pci_bar_info *lapic;
2985		unsigned i;
2986
2987		lapic = ntb->peer_lapic_bar;
2988
2989		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
2990			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
2991				bus_space_write_4(lapic->pci_bus_tag,
2992				    lapic->pci_bus_handle,
2993				    ntb->peer_msix_data[i].nmd_ofs,
2994				    ntb->peer_msix_data[i].nmd_data);
2995		}
2996		return;
2997	}
2998
2999	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3000		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
3001		return;
3002	}
3003
3004	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
3005}
3006
3007static int
3008intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
3009{
3010	struct ntb_softc *ntb = device_get_softc(dev);
3011	struct ntb_pci_bar_info *bar;
3012	uint64_t regoff;
3013
3014	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
3015
3016	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
3017		bar = &ntb->bar_info[NTB_CONFIG_BAR];
3018		regoff = ntb->peer_reg->db_bell;
3019	} else {
3020		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
3021		    ("invalid b2b idx"));
3022
3023		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
3024		regoff = XEON_PDOORBELL_OFFSET;
3025	}
3026	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
3027
3028	/* HACK: Specific to current x86 bus implementation. */
3029	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
3030	*db_size = ntb->reg->db_size;
3031	return (0);
3032}
3033
3034static uint64_t
3035intel_ntb_db_valid_mask(device_t dev)
3036{
3037	struct ntb_softc *ntb = device_get_softc(dev);
3038
3039	return (ntb->db_valid_mask);
3040}
3041
3042static int
3043intel_ntb_db_vector_count(device_t dev)
3044{
3045	struct ntb_softc *ntb = device_get_softc(dev);
3046
3047	return (ntb->db_vec_count);
3048}
3049
3050static uint64_t
3051intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
3052{
3053	struct ntb_softc *ntb = device_get_softc(dev);
3054
3055	if (vector > ntb->db_vec_count)
3056		return (0);
3057	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
3058}
3059
3060static bool
3061intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
3062{
3063	struct ntb_softc *ntb = device_get_softc(dev);
3064
3065	if (speed != NULL)
3066		*speed = intel_ntb_link_sta_speed(ntb);
3067	if (width != NULL)
3068		*width = intel_ntb_link_sta_width(ntb);
3069	return (link_is_up(ntb));
3070}
3071
3072static void
3073save_bar_parameters(struct ntb_pci_bar_info *bar)
3074{
3075
3076	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
3077	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
3078	bar->pbase = rman_get_start(bar->pci_resource);
3079	bar->size = rman_get_size(bar->pci_resource);
3080	bar->vbase = rman_get_virtual(bar->pci_resource);
3081}
3082
3083static device_method_t ntb_intel_methods[] = {
3084	/* Device interface */
3085	DEVMETHOD(device_probe,		intel_ntb_probe),
3086	DEVMETHOD(device_attach,	intel_ntb_attach),
3087	DEVMETHOD(device_detach,	intel_ntb_detach),
3088	/* Bus interface */
3089	DEVMETHOD(bus_child_location_str, ntb_child_location_str),
3090	DEVMETHOD(bus_print_child,	ntb_print_child),
3091	DEVMETHOD(bus_get_dma_tag,	ntb_get_dma_tag),
3092	/* NTB interface */
3093	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
3094	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
3095	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
3096	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
3097	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
3098	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
3099	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
3100	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
3101	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
3102	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
3103	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
3104	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
3105	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
3106	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
3107	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
3108	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
3109	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
3110	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
3111	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
3112	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
3113	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
3114	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
3115	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
3116	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
3117	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
3118	DEVMETHOD_END
3119};
3120
3121static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
3122    sizeof(struct ntb_softc));
3123DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
3124MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1);
3125MODULE_VERSION(ntb_hw_intel, 1);
3126